diff options
Diffstat (limited to 'arch/s390')
52 files changed, 2068 insertions, 495 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ff19efdf6fe..2508a6f3158 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -85,6 +85,7 @@ config S390 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX select ARCH_INLINE_SPIN_TRYLOCK @@ -341,26 +342,16 @@ config STACK_GUARD The minimum size for the stack guard should be 256 for 31 bit and 512 for 64 bit. -config WARN_STACK +config WARN_DYNAMIC_STACK def_bool n - prompt "Emit compiler warnings for function with broken stack usage" + prompt "Emit compiler warnings for function with dynamic stack usage" help - This option enables the compiler options -mwarn-framesize and - -mwarn-dynamicstack. If the compiler supports these options it - will generate warnings for function which either use alloca or - create a stack frame bigger than CONFIG_WARN_STACK_SIZE. + This option enables the compiler option -mwarn-dynamicstack. If the + compiler supports this options generates warnings for functions + that dynamically allocate stack space using alloca. Say N if you are unsure. -config WARN_STACK_SIZE - int "Maximum frame size considered safe (128-2048)" - range 128 2048 - depends on WARN_STACK - default "2048" - help - This allows you to specify the maximum frame size a function may - have without the compiler complaining about it. - config ARCH_POPULATES_NODE_MAP def_bool y @@ -406,7 +397,7 @@ config QDIO If unsure, say Y. config CHSC_SCH - def_tristate y + def_tristate m prompt "Support for CHSC subchannels" help This driver allows usage of CHSC subchannels. A CHSC subchannel diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 2b380df9560..d76cef3fef3 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -31,4 +31,7 @@ config DEBUG_STRICT_USER_COPY_CHECKS If unsure, or if you run an older (pre 4.4) gcc, say N. +config DEBUG_SET_MODULE_RONX + def_bool y + depends on MODULES endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d5b8a6ade52..27a0b5df5ea 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -80,8 +80,7 @@ endif endif ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_STACK) += -mwarn-dynamicstack -cflags-$(CONFIG_WARN_STACK) += -mwarn-framesize=$(CONFIG_WARN_STACK_SIZE) +cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 8800cf09069..635d677d328 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -6,7 +6,7 @@ COMPILE_VERSION := __linux_compile_version_id__`hostname | \ tr -c '[0-9A-Za-z]' '_'`__`date | \ tr -c '[0-9A-Za-z]' '_'`_t -EXTRA_CFLAGS := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. +ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. targets := image targets += bzImage diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 1c999f726a5..10e22c4ec4a 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -7,7 +7,8 @@ BITS := $(if $(CONFIG_64BIT),64,31) targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ - vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o sizes.h head$(BITS).o + vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \ + sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += $(cflags-y) @@ -48,6 +49,7 @@ suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_XZ) := xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) @@ -57,6 +59,8 @@ $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) $(call if_changed,lzma) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) $(call if_changed,lzo) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) + $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 0851eb1e919..028f23ea81d 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -19,6 +19,7 @@ #undef memset #undef memcpy #undef memmove +#define memmove memmove #define memzero(s, n) memset((s), 0, (n)) /* Symbols defined by linker scripts */ @@ -54,6 +55,10 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_unlzo.c" #endif +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + extern _sclp_print_early(const char *); int puts(const char *s) @@ -133,11 +138,12 @@ unsigned long decompress_kernel(void) unsigned long output_addr; unsigned char *output; - check_ipl_parmblock((void *) 0, (unsigned long) output + SZ__bss_start); + output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; + check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); memset(&_bss, 0, &_ebss - &_bss); free_mem_ptr = (unsigned long)&_end; free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - output = (unsigned char *) ((free_mem_end_ptr + 4095UL) & -4096UL); + output = (unsigned char *) output_addr; #ifdef CONFIG_BLK_DEV_INITRD /* diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index f42dbabc0d3..48884f89ab9 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -38,6 +38,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) BUG_ON(ret != bsize); data += bsize - index; len -= bsize - index; + index = 0; } /* process as many blocks as possible */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 76daea11718..d9db13810d1 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -9,7 +9,7 @@ * * Atomic operations that C can't guarantee us. * Useful for resource counting etc. - * s390 uses 'Compare And Swap' for atomicity in SMP enviroment. + * s390 uses 'Compare And Swap' for atomicity in SMP environment. * */ @@ -36,14 +36,19 @@ static inline int atomic_read(const atomic_t *v) { - barrier(); - return v->counter; + int c; + + asm volatile( + " l %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; } static inline void atomic_set(atomic_t *v, int i) { - v->counter = i; - barrier(); + asm volatile( + " st %1,%0\n" + : "=Q" (v->counter) : "d" (i)); } static inline int atomic_add_return(int i, atomic_t *v) @@ -128,14 +133,19 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) static inline long long atomic64_read(const atomic64_t *v) { - barrier(); - return v->counter; + long long c; + + asm volatile( + " lg %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; } static inline void atomic64_set(atomic64_t *v, long long i) { - v->counter = i; - barrier(); + asm volatile( + " stg %1,%0\n" + : "=Q" (v->counter) : "d" (i)); } static inline long long atomic64_add_return(long long i, atomic64_t *v) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 2e05972c508..e1c8f3a4988 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -742,18 +742,42 @@ static inline int sched_find_first_bit(unsigned long *b) * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 */ -#define ext2_set_bit(nr, addr) \ - __test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_set_bit_atomic(lock, nr, addr) \ - test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_clear_bit(nr, addr) \ - __test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_clear_bit_atomic(lock, nr, addr) \ - test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_test_bit(nr, addr) \ - test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) - -static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size) +static inline void __set_bit_le(unsigned long nr, void *addr) +{ + __set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline void __clear_bit_le(unsigned long nr, void *addr) +{ + __clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int __test_and_set_bit_le(unsigned long nr, void *addr) +{ + return __test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int test_and_set_bit_le(unsigned long nr, void *addr) +{ + return test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int __test_and_clear_bit_le(unsigned long nr, void *addr) +{ + return __test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int test_and_clear_bit_le(unsigned long nr, void *addr) +{ + return test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int test_bit_le(unsigned long nr, const void *addr) +{ + return test_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr); +} + +static inline int find_first_zero_bit_le(void *vaddr, unsigned int size) { unsigned long bytes, bits; @@ -764,7 +788,7 @@ static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size) return (bits < size) ? bits : size; } -static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size, +static inline int find_next_zero_bit_le(void *vaddr, unsigned long size, unsigned long offset) { unsigned long *addr = vaddr, *p; @@ -790,11 +814,10 @@ static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size, size -= __BITOPS_WORDSIZE; p++; } - return offset + ext2_find_first_zero_bit(p, size); + return offset + find_first_zero_bit_le(p, size); } -static inline unsigned long ext2_find_first_bit(void *vaddr, - unsigned long size) +static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size) { unsigned long bytes, bits; @@ -805,7 +828,7 @@ static inline unsigned long ext2_find_first_bit(void *vaddr, return (bits < size) ? bits : size; } -static inline int ext2_find_next_bit(void *vaddr, unsigned long size, +static inline int find_next_bit_le(void *vaddr, unsigned long size, unsigned long offset) { unsigned long *addr = vaddr, *p; @@ -831,10 +854,14 @@ static inline int ext2_find_next_bit(void *vaddr, unsigned long size, size -= __BITOPS_WORDSIZE; p++; } - return offset + ext2_find_first_bit(p, size); + return offset + find_first_bit_le(p, size); } -#include <asm-generic/bitops/minix.h> +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_bit_le(nr, addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_bit_le(nr, addr) + #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 24aafa68b64..2a30d5ac066 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -13,6 +13,7 @@ #define L1_CACHE_BYTES 256 #define L1_CACHE_SHIFT 8 +#define NET_SKB_PAD 32 #define __read_mostly __attribute__((__section__(".data..read_mostly"))) diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 405cc97c624..43a5c78046d 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -1,32 +1,15 @@ #ifndef _S390_CACHEFLUSH_H #define _S390_CACHEFLUSH_H -/* Keep includes the same across arches. */ -#include <linux/mm.h> - /* Caches aren't brain-dead on the s390. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define flush_dcache_page(page) do { } while (0) -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) -#define flush_icache_page(vma,pg) do { } while (0) -#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) -#define flush_cache_vmap(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) do { } while (0) - -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ - memcpy(dst, src, len) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - memcpy(dst, src, len) +#include <asm-generic/cacheflush.h> #ifdef CONFIG_DEBUG_PAGEALLOC void kernel_map_pages(struct page *page, int numpages, int enable); #endif +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); + #endif /* _S390_CACHEFLUSH_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index ff6f62e0ec3..623f2fb7177 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -112,7 +112,6 @@ enum uc_todo { /** * struct ccw driver - device driver for channel attached devices - * @owner: owning module * @ids: ids supported by this driver * @probe: function called on probe * @remove: function called on remove @@ -128,10 +127,8 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure - * @name: device driver name */ struct ccw_driver { - struct module *owner; struct ccw_device_id *ids; int (*probe) (struct ccw_device *); void (*remove) (struct ccw_device *); @@ -147,7 +144,6 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; - char *name; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index c79c1e787b8..f2ea2c56a7e 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -29,8 +29,6 @@ struct ccwgroup_device { /** * struct ccwgroup_driver - driver for ccw group devices - * @owner: driver owner - * @name: driver name * @max_slaves: maximum number of slave devices * @driver_id: unique id * @probe: function called on probe @@ -46,8 +44,6 @@ struct ccwgroup_device { * @driver: embedded driver structure */ struct ccwgroup_driver { - struct module *owner; - char *name; int max_slaves; unsigned long driver_id; diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index e34347d567a..fc50a3342da 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -183,7 +183,7 @@ struct esw3 { * The irb that is handed to the device driver when an interrupt occurs. For * solicited interrupts, the common I/O layer already performs checks whether * a field is valid; a field not being valid is always passed as %0. - * If a unit check occured, @ecw may contain sense data; this is retrieved + * If a unit check occurred, @ecw may contain sense data; this is retrieved * by the common I/O layer itself if the device doesn't support concurrent * sense (so that the device driver never needs to perform basic sene itself). * For unsolicited interrupts, the irb is passed as-is (expect for sense data, diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h new file mode 100644 index 00000000000..7488e52efa9 --- /dev/null +++ b/arch/s390/include/asm/cmpxchg.h @@ -0,0 +1,225 @@ +/* + * Copyright IBM Corp. 1999, 2011 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include <linux/types.h> + +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long x, void *ptr, int size) +{ + unsigned long addr, old; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" (x << shift), "d" (~(255 << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" (x << shift), "d" (~(65535 << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 4: + asm volatile( + " l %0,%3\n" + "0: cs %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) ptr) + : "d" (x), "Q" (*(int *) ptr) + : "memory", "cc"); + return old; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " lg %0,%3\n" + "0: csg %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=m" (*(long *) ptr) + : "d" (x), "Q" (*(long *) ptr) + : "memory", "cc"); + return old; +#endif /* CONFIG_64BIT */ + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\ + __ret; \ +}) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG + +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long addr, prev, tmp; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) + : "d" (old << shift), "d" (new << shift), + "d" (~(255 << shift)), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) + : "d" (old << shift), "d" (new << shift), + "d" (~(65535 << shift)), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev >> shift; + case 4: + asm volatile( + " cs %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(int *) ptr) + : "0" (old), "d" (new), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " csg %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(long *) ptr) + : "0" (old), "d" (new), "Q" (*(long *) ptr) + : "memory", "cc"); + return prev; +#endif /* CONFIG_64BIT */ + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) + +#ifdef CONFIG_64BIT +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) +#else /* CONFIG_64BIT */ +static inline unsigned long long __cmpxchg64(void *ptr, + unsigned long long old, + unsigned long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + + asm volatile( + " cds %0,%2,%1" + : "+&d" (rp_old), "=Q" (ptr) + : "d" (rp_new), "Q" (ptr) + : "cc"); + return rp_old.pair; +} +#define cmpxchg64(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) +#endif /* CONFIG_64BIT */ + +#include <asm-generic/cmpxchg-local.h> + +static inline unsigned long __cmpxchg_local(void *ptr, + unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + return __cmpxchg(ptr, old, new, size); + default: + return __cmpxchg_local_generic(ptr, old, new, size); + } + + return old; +} + +/* + * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make + * them available. + */ +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n)) + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 5c5d02de49e..81cf36b691f 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -7,7 +7,7 @@ #include <linux/uaccess.h> #include <asm/errno.h> -static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; pagefault_disable(); @@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, - int oldval, int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); + return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bf3de04170a..2c79b641627 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -148,11 +148,6 @@ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); */ extern unsigned long thread_saved_pc(struct task_struct *t); -/* - * Print register of task into buffer. Used in fs/proc/array.c. - */ -extern void task_show_regs(struct seq_file *m, struct task_struct *task); - extern void show_code(struct pt_regs *regs); unsigned long get_wchan(struct task_struct *p); diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 423fdda2322..d0eb4653ceb 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -43,29 +43,6 @@ #ifdef __KERNEL__ -#include <linux/list.h> -#include <linux/spinlock.h> - -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *); - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed long count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - #ifndef __s390x__ #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -81,41 +58,6 @@ struct rw_semaphore { #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) /* - * initialisation - */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - - -/* * lock for reading */ static inline void __down_read(struct rw_semaphore *sem) @@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return new; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 8f8d759f6a7..d382629a017 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -14,6 +14,7 @@ #include <asm/setup.h> #include <asm/processor.h> #include <asm/lowcore.h> +#include <asm/cmpxchg.h> #ifdef __KERNEL__ @@ -120,161 +121,6 @@ extern int memcpy_real(void *, void *, size_t); #define nop() asm volatile("nop") -#define xchg(ptr,x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (void *)(ptr),sizeof(*(ptr))); \ - __ret; \ -}) - -extern void __xchg_called_with_bad_pointer(void); - -static inline unsigned long __xchg(unsigned long x, void * ptr, int size) -{ - unsigned long addr, old; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" (x << shift), "d" (~(255 << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" (x << shift), "d" (~(65535 << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 4: - asm volatile( - " l %0,%3\n" - "0: cs %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) ptr) - : "d" (x), "Q" (*(int *) ptr) - : "memory", "cc"); - return old; -#ifdef __s390x__ - case 8: - asm volatile( - " lg %0,%3\n" - "0: csg %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=m" (*(long *) ptr) - : "d" (x), "Q" (*(long *) ptr) - : "memory", "cc"); - return old; -#endif /* __s390x__ */ - } - __xchg_called_with_bad_pointer(); - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#define __HAVE_ARCH_CMPXCHG 1 - -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) - -extern void __cmpxchg_called_with_bad_pointer(void); - -static inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) -{ - unsigned long addr, prev, tmp; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) - : "d" (old << shift), "d" (new << shift), - "d" (~(255 << shift)), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) - : "d" (old << shift), "d" (new << shift), - "d" (~(65535 << shift)), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev >> shift; - case 4: - asm volatile( - " cs %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(int *) ptr) - : "0" (old), "d" (new), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev; -#ifdef __s390x__ - case 8: - asm volatile( - " csg %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(long *) ptr) - : "0" (old), "d" (new), "Q" (*(long *) ptr) - : "memory", "cc"); - return prev; -#endif /* __s390x__ */ - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking @@ -353,46 +199,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) __ctl_load(__dummy, cr, cr); \ }) -#include <linux/irqflags.h> - -#include <asm-generic/cmpxchg-local.h> - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - case 2: - case 4: -#ifdef __s390x__ - case 8: -#endif - return __cmpxchg(ptr, old, new, size); - default: - return __cmpxchg_local_generic(ptr, old, new, size); - } - - return old; -} - -/* - * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make - * them available. - */ -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) -#ifdef __s390x__ -#define cmpxchg64_local(ptr, o, n) \ - ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ - }) -#else -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -#endif - /* * Use to set psw mask except for the first byte which * won't be changed by this function. diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index f1f644f2240..9074a54c4d1 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -22,6 +22,7 @@ */ #include <linux/mm.h> +#include <linux/pagemap.h> #include <linux/swap.h> #include <asm/processor.h> #include <asm/pgalloc.h> diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index 04d6b95a89c..eeb52ccf499 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -30,14 +30,6 @@ typedef __signed__ long saddr_t; #ifndef __ASSEMBLY__ -typedef u64 dma64_addr_t; -#ifdef __s390x__ -/* DMA addresses come in 32-bit and 64-bit flavours. */ -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif - #ifndef __s390x__ typedef union { unsigned long long pair; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d6b1ed0ec52..2d9ea11f919 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,8 +83,8 @@ struct uaccess_ops { size_t (*clear_user)(size_t, void __user *); size_t (*strnlen_user)(size_t, const char __user *); size_t (*strncpy_from_user)(size_t, const char __user *, char *); - int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); - int (*futex_atomic_cmpxchg)(int __user *, int old, int new); + int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old); + int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new); }; extern struct uaccess_ops uaccess; diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 1049ef27c15..e8215257237 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -272,7 +272,11 @@ #define __NR_fanotify_init 332 #define __NR_fanotify_mark 333 #define __NR_prlimit64 334 -#define NR_syscalls 335 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define NR_syscalls 339 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 8e60fb23b90..1dc96ea08fa 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1877,3 +1877,30 @@ sys_prlimit64_wrapper: llgtr %r4,%r4 # const struct rlimit64 __user * llgtr %r5,%r5 # struct rlimit64 __user * jg sys_prlimit64 # branch to system call + + .globl sys_name_to_handle_at_wrapper +sys_name_to_handle_at_wrapper: + lgfr %r2,%r2 # int + llgtr %r3,%r3 # const char __user * + llgtr %r4,%r4 # struct file_handle __user * + llgtr %r5,%r5 # int __user * + lgfr %r6,%r6 # int + jg sys_name_to_handle_at + + .globl compat_sys_open_by_handle_at_wrapper +compat_sys_open_by_handle_at_wrapper: + lgfr %r2,%r2 # int + llgtr %r3,%r3 # struct file_handle __user * + lgfr %r4,%r4 # int + jg compat_sys_open_by_handle_at + + .globl compat_sys_clock_adjtime_wrapper +compat_sys_clock_adjtime_wrapper: + lgfr %r2,%r2 # clockid_t (int) + llgtr %r3,%r3 # struct compat_timex __user * + jg compat_sys_clock_adjtime + + .globl sys_syncfs_wrapper +sys_syncfs_wrapper: + lgfr %r2,%r2 # int + jg sys_syncfs diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3b7e7dddc32..068f8465c4e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -94,6 +94,7 @@ static noinline __init void create_kernel_nss(void) unsigned int sinitrd_pfn, einitrd_pfn; #endif int response; + int hlen; size_t len; char *savesys_ptr; char defsys_cmd[DEFSYS_CMD_SIZE]; @@ -124,24 +125,27 @@ static noinline __init void create_kernel_nss(void) end_pfn = PFN_UP(__pa(&_end)); min_size = end_pfn << 2; - sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", - kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1, - eshared_pfn, end_pfn); + hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, + "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, + eshared_pfn - 1, eshared_pfn, end_pfn); #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); min_size = einitrd_pfn << 2; - sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd, - sinitrd_pfn, einitrd_pfn); + hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); } #endif - sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13", - defsys_cmd, min_size); - sprintf(savesys_cmd, "SAVESYS %s \n IPL %s", - kernel_nss_name, kernel_nss_name); + snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); + defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; + snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; __cpcmd(defsys_cmd, NULL, 0, &response); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7061398341d..fb317bf2c37 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -460,7 +460,7 @@ startup: #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST - stfl __LC_STFL_FAC_LIST # store facility list + .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f la %r0,0 diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index a922d51df6b..b09b9c62573 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -12,6 +12,7 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/reboot.h> +#include <linux/ftrace.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/pgtable.h> @@ -71,6 +72,7 @@ static void __machine_kexec(void *data) void machine_kexec(struct kimage *image) { + tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 5e73dee63ba..9eabbc90795 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -78,7 +78,7 @@ do_reipl_asm: basr %r13,0 * in the ESA psw. * Bit 31 of the addresses has to be 0 for the * 31bit lpswe instruction a fact they appear to have - * ommited from the pop. + * omitted from the pop. */ .Lnewpsw: .quad 0x0000000080000000 .quad .Lpg1 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6f6350826c8..f5434d1ecb3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -102,16 +102,6 @@ EXPORT_SYMBOL(lowcore_ptr); #include <asm/setup.h> -static struct resource code_resource = { - .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; - -static struct resource data_resource = { - .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; - /* * condev= and conmode= setup parameter. */ @@ -436,21 +426,43 @@ setup_lowcore(void) lowcore_ptr[0] = lc; } -static void __init -setup_resources(void) +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource __initdata *standard_resources[] = { + &code_resource, + &data_resource, + &bss_resource, +}; + +static void __init setup_resources(void) { - struct resource *res, *sub_res; - int i; + struct resource *res, *std_res, *sub_res; + int i, j; code_resource.start = (unsigned long) &_text; code_resource.end = (unsigned long) &_etext - 1; data_resource.start = (unsigned long) &_etext; data_resource.end = (unsigned long) &_edata - 1; + bss_resource.start = (unsigned long) &__bss_start; + bss_resource.end = (unsigned long) &__bss_stop - 1; for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: @@ -464,40 +476,24 @@ setup_resources(void) res->name = "reserved"; } res->start = memory_chunk[i].addr; - res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; + res->end = res->start + memory_chunk[i].size - 1; request_resource(&iomem_resource, res); - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &code_resource, - sizeof(struct resource)); - sub_res->end = res->end; - code_resource.start = res->end + 1; - request_resource(res, sub_res); - } - - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end <= res->end) - request_resource(res, &code_resource); - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &data_resource, - sizeof(struct resource)); - sub_res->end = res->end; - data_resource.start = res->end + 1; - request_resource(res, sub_res); + for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { + std_res = standard_resources[j]; + if (std_res->start < res->start || + std_res->start > res->end) + continue; + if (std_res->end > res->end) { + sub_res = alloc_bootmem_low(sizeof(*sub_res)); + *sub_res = *std_res; + sub_res->end = res->end; + std_res->start = res->end + 1; + request_resource(res, sub_res); + } else { + request_resource(res, std_res); + } } - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end <= res->end) - request_resource(res, &data_resource); } } @@ -712,7 +708,7 @@ static void __init setup_hwcaps(void) * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information * as stored by stfl, bits 32-xxx contain additional facilities. * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilites + * doublewords passed to the instruction. The additional facilities * are: * Bit 42: decimal floating point facility is installed * Bit 44: perform floating point operation facility is installed diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S index 469f11b574f..20530dd2eab 100644 --- a/arch/s390/kernel/switch_cpu.S +++ b/arch/s390/kernel/switch_cpu.S @@ -46,7 +46,9 @@ smp_restart_cpu: ltr %r4,%r4 /* New stack ? */ jz 1f lr %r15,%r4 -1: basr %r14,%r2 +1: lr %r14,%r2 /* r14: Function to call */ + lr %r2,%r3 /* r2 : Parameter for function*/ + basr %r14,%r14 /* Call function */ .gprregs_addr: .long .gprregs diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S index d94aacc898c..5be3f43898f 100644 --- a/arch/s390/kernel/switch_cpu64.S +++ b/arch/s390/kernel/switch_cpu64.S @@ -42,7 +42,9 @@ smp_restart_cpu: ltgr %r4,%r4 /* New stack ? */ jz 1f lgr %r15,%r4 -1: basr %r14,%r2 +1: lgr %r14,%r2 /* r14: Function to call */ + lgr %r2,%r3 /* r2 : Parameter for function*/ + basr %r14,%r14 /* Call function */ .section .data,"aw",@progbits .gprregs: diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index a8fee1b1439..9c65fd4ddce 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -343,3 +343,7 @@ SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper) SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */ +SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) +SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9e7b039458d..87be655557a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -724,7 +724,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync) } /* - * Sync the TOD clock using the port refered to by aibp. This port + * Sync the TOD clock using the port referred to by aibp. This port * has to be enabled and the other port has to be disabled. The * last eacr update has to be more than 1.6 seconds in the past. */ @@ -1012,7 +1012,7 @@ static void etr_work_fn(struct work_struct *work) eacr = etr_handle_update(&aib, eacr); /* - * Select ports to enable. The prefered synchronization mode is PPS. + * Select ports to enable. The preferred synchronization mode is PPS. * If a port can be enabled depends on a number of things: * 1) The port needs to be online and uptodate. A port is not * disabled just because it is not uptodate, but it is only @@ -1091,7 +1091,7 @@ static void etr_work_fn(struct work_struct *work) /* * Update eacr and try to synchronize the clock. If the update * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occured) or the + * assume that a stepping port switch has occurred) or the * clock syncing failed, reset the sync check control bit * and set up a timer to try again after 0.5 seconds */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5eb78dd584c..b5a4a739b47 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -237,43 +237,6 @@ void show_regs(struct pt_regs *regs) show_last_breaking_event(regs); } -/* This is called from fs/proc/array.c */ -void task_show_regs(struct seq_file *m, struct task_struct *task) -{ - struct pt_regs *regs; - - regs = task_pt_regs(task); - seq_printf(m, "task: %p, ksp: %p\n", - task, (void *)task->thread.ksp); - seq_printf(m, "User PSW : %p %p\n", - (void *) regs->psw.mask, (void *)regs->psw.addr); - - seq_printf(m, "User GPRS: " FOURLONG, - regs->gprs[0], regs->gprs[1], - regs->gprs[2], regs->gprs[3]); - seq_printf(m, " " FOURLONG, - regs->gprs[4], regs->gprs[5], - regs->gprs[6], regs->gprs[7]); - seq_printf(m, " " FOURLONG, - regs->gprs[8], regs->gprs[9], - regs->gprs[10], regs->gprs[11]); - seq_printf(m, " " FOURLONG, - regs->gprs[12], regs->gprs[13], - regs->gprs[14], regs->gprs[15]); - seq_printf(m, "User ACRS: %08x %08x %08x %08x\n", - task->thread.acrs[0], task->thread.acrs[1], - task->thread.acrs[2], task->thread.acrs[3]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[4], task->thread.acrs[5], - task->thread.acrs[6], task->thread.acrs[7]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[8], task->thread.acrs[9], - task->thread.acrs[10], task->thread.acrs[11]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[12], task->thread.acrs[13], - task->thread.acrs[14], task->thread.acrs[15]); -} - static DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedb..d73630b4fe1 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -337,17 +337,17 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a68ac10213b..1bc18cdb525 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -77,7 +77,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - PERCPU(PAGE_SIZE) + PERCPU(0x100, PAGE_SIZE) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 1ccdf4d8aa8..5e8ead4b4ab 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -44,7 +44,7 @@ static inline void set_vtimer(__u64 expires) __u64 timer; asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ + " SPT %1" /* Set new value immediately afterwards */ : "=m" (timer) : "m" (expires) ); S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; S390_lowcore.last_update_timer = expires; diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index e5221ec0b8e..860d26514c0 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) -EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm +ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bade533ba28..30ca85cce31 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -721,7 +721,7 @@ static int __init kvm_s390_init(void) /* * guests can ask for up to 255+1 double words, we need a full page - * to hold the maximum amount of facilites. On the other hand, we + * to hold the maximum amount of facilities. On the other hand, we * only set facilities that are known to work in KVM. */ facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9194a4b52b2..73c47bd95db 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -311,7 +311,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) /* * a lot of B2 instructions are priviledged. We first check for - * the priviledges ones, that we can handle in the kernel. If the + * the privileged ones, that we can handle in the kernel. If the * kernel can handle this instruction, we check for the problem * state bit and (a) handle the instruction or (b) send a code 2 * program check. diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index 126011df14f..1d2536cb630 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *); extern size_t copy_to_user_std(size_t, void __user *, const void *); extern size_t strnlen_user_std(size_t, const char __user *); extern size_t strncpy_from_user_std(size_t, const char __user *, char *); -extern int futex_atomic_cmpxchg_std(int __user *, int, int); -extern int futex_atomic_op_std(int, int __user *, int, int *); +extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32); +extern int futex_atomic_op_std(int, u32 __user *, int, int *); extern size_t copy_from_user_pt(size_t, const void __user *, void *); extern size_t copy_to_user_pt(size_t, void __user *, const void *); -extern int futex_atomic_op_pt(int, int __user *, int, int *); -extern int futex_atomic_cmpxchg_pt(int __user *, int, int); +extern int futex_atomic_op_pt(int, u32 __user *, int, int *); +extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32); #endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 404f2de296d..74833831417 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -302,7 +302,7 @@ fault: : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ "m" (*uaddr) : "cc" ); -static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) +static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; @@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) +int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) { int ret; @@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) return ret; } -static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; asm volatile("0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2:\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } -int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; if (segment_eq(get_fs(), KERNEL_DS)) - return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); if (!uaddr) { @@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) } get_page(virt_to_page(uaddr)); spin_unlock(¤t->mm->page_table_lock); - ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); put_page(virt_to_page(uaddr)); return ret; } diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index 07deaeee14c..bb1a7eed42c 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -125,9 +125,9 @@ static size_t copy_in_user_std(size_t size, void __user *to, unsigned long tmp1; asm volatile( + " sacf 256\n" " "AHI" %0,-1\n" " jo 5f\n" - " sacf 256\n" " bras %3,3f\n" "0:"AHI" %0,257\n" "1: mvc 0(1,%1),0(%2)\n" @@ -142,9 +142,8 @@ static size_t copy_in_user_std(size_t size, void __user *to, "3:"AHI" %0,-256\n" " jnm 2b\n" "4: ex %0,1b-0b(%3)\n" - " sacf 0\n" "5: "SLR" %0,%0\n" - "6:\n" + "6: sacf 0\n" EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); @@ -156,9 +155,9 @@ static size_t clear_user_std(size_t size, void __user *to) unsigned long tmp1, tmp2; asm volatile( + " sacf 256\n" " "AHI" %0,-1\n" " jo 5f\n" - " sacf 256\n" " bras %3,3f\n" " xc 0(1,%1),0(%1)\n" "0:"AHI" %0,257\n" @@ -178,9 +177,8 @@ static size_t clear_user_std(size_t size, void __user *to) "3:"AHI" %0,-256\n" " jnm 2b\n" "4: ex %0,0(%3)\n" - " sacf 0\n" "5: "SLR" %0,%0\n" - "6:\n" + "6: sacf 0\n" EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); @@ -257,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst) : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ "m" (*uaddr) : "cc"); -int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) +int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; @@ -289,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2: sacf 0\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile index c8489034105..51d399549f6 100644 --- a/arch/s390/math-emu/Makefile +++ b/arch/s390/math-emu/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_MATHEMU) := math.o -EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w +ccflags-y := -I$(src) -Iinclude/math-emu -w diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 6fbc6f3fbdf..d98fe9004a5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -6,3 +6,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ page-states.o gup.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2c57806c085..9217e332b11 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -392,7 +392,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, { int fault; - /* Protection exception is supressing, decrement psw address. */ + /* Protection exception is suppressing, decrement psw address. */ regs->psw.addr -= (pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c new file mode 100644 index 00000000000..122ffbd08ce --- /dev/null +++ b/arch/s390/mm/pageattr.c @@ -0,0 +1,55 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <asm/pgtable.h> + +static void change_page_attr(unsigned long addr, int numpages, + pte_t (*set) (pte_t)) +{ + pte_t *ptep, pte; + pmd_t *pmdp; + pud_t *pudp; + pgd_t *pgdp; + int i; + + for (i = 0; i < numpages; i++) { + pgdp = pgd_offset(&init_mm, addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + if (pmd_huge(*pmdp)) { + WARN_ON_ONCE(1); + continue; + } + ptep = pte_offset_kernel(pmdp, addr + i * PAGE_SIZE); + + pte = *ptep; + pte = set(pte); + ptep_invalidate(&init_mm, addr + i * PAGE_SIZE, ptep); + *ptep = pte; + } +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_wrprotect); + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_ro); + +int set_memory_rw(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_mkwrite); + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_rw); + +/* not possible */ +int set_memory_nx(unsigned long addr, int numpages) +{ + return 0; +} +EXPORT_SYMBOL_GPL(set_memory_nx); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0c719c61972..e1850c28cd6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -336,7 +336,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ - list_move(&page->lru, &mm->context.pgtable_list); + if (!list_empty(&page->lru)) + list_move(&page->lru, &mm->context.pgtable_list); page = NULL; } else /* All fragments of the 4K page have been freed. */ diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 537b2d840e6..524c4b61582 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_64BIT) += hwsampler.o diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 00000000000..4952872d6f0 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1252 @@ +/** + * arch/s390/oprofile/hwsampler.c + * + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs <graalfs@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/semaphore.h> +#include <linux/oom.h> +#include <linux/oprofile.h> + +#include <asm/lowcore.h> +#include <asm/s390_ext.h> + +#include "hwsampler.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +#define ALERT_REQ_MASK 0x4000000000000000ul +#define BUFFER_FULL_MASK 0x8000000000000000ul + +#define EI_IEA (1 << 31) /* invalid entry address */ +#define EI_ISE (1 << 30) /* incorrect SDBT entry */ +#define EI_PRA (1 << 29) /* program request alert */ +#define EI_SACA (1 << 23) /* sampler authorization change alert */ +#define EI_LSDA (1 << 22) /* loss of sample data alert */ + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static int ssctl(void *buffer) +{ + int cc; + + /* set in order to detect a program check */ + cc = 1; + + asm volatile( + "0: .insn s,0xB2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (buffer) + : "m" (*((struct hws_ssctl_request_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0 ; +} + +static int qsi(void *buffer) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xB2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (buffer) + : "m" (*((struct hws_qsi_info_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = ssctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* prototypes for external interrupt handler and worker */ +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64); + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +static unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} + +static int prepare_cpu_buffers(void) +{ + int cpu; + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } + + return rc; +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + if (!test_facility(68)) + return -EOPNOTSUPP; + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) +{ + int cpu; + struct hws_cpu_buffer *cb; + + cpu = smp_processor_id(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + atomic_xchg( + &cb->ext_params, + atomic_read(&cb->ext_params) + | S390_lowcore.ext_params); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & EI_IEA) + cb->req_alert++; + + if (ext_params & EI_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & EI_IEA) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & EI_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & EI_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_data_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_data_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +long hwsampler_query_min_interval(void) +{ + if (min_sampler_rate) + return min_sampler_rate; + else + return -EINVAL; +} + +long hwsampler_query_max_interval(void) +{ + if (max_sampler_rate) + return max_sampler_rate; + else + return -EINVAL; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup() +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_interrupt(0x1407, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_interrupt(0x1407, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + rc = prepare_cpu_buffers(); + if (rc) + goto start_all_exit; + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all() +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return rc; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 00000000000..8c72b59316b --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,113 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs <graalfs@de.ibm.com> + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include <linux/workqueue.h> + +struct hws_qsi_info_block /* QUERY SAMPLING information block */ +{ /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of sampling entry */ + unsigned int:16; /* 6-7: reserved */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +}; + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long lpp; /* Logical-Partition Program Param. */ + unsigned long long vpp; /* Virtual-Machine Program Param. */ +}; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned long:62; /* 2 - 63: Reserved */ + unsigned long overflow; /* 64 - sample Overflow count */ + unsigned long timestamp; /* 16 - time-stamp */ + unsigned long timestamp1; /* */ + unsigned long reserved1; /* 32 -Reserved */ + unsigned long reserved2; /* */ + unsigned long progusage1; /* 48 - reserved for programming use */ + unsigned long progusage2; /* */ +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +long hwsampler_query_min_interval(void); +long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 7a995113b91..c63d7e58352 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -4,23 +4,193 @@ * S390 Version * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2011 OProfile authors */ #include <linux/oprofile.h> #include <linux/init.h> #include <linux/errno.h> +#include <linux/oprofile.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include "../../../drivers/oprofile/oprof.h" extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); -int __init oprofile_arch_init(struct oprofile_operations* ops) +#ifdef CONFIG_64BIT + +#include "hwsampler.h" + +#define DEFAULT_INTERVAL 4096 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_file; +static int hwsampler_running; /* start_mutex must be held to change */ + +static struct oprofile_operations timer_ops; + +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_file; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + return; +} + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_file = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +static int oprofile_create_hwsampling_files(struct super_block *sb, + struct dentry *root) +{ + struct dentry *hw_dir; + + /* reinitialize default values */ + hwsampler_file = 1; + + hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!hw_dir) + return -EINVAL; + + oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); + oprofilefs_create_ulong(sb, hw_dir, "hw_interval", + &oprofile_hw_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + return 0; +} + +static int oprofile_hwsampler_init(struct oprofile_operations *ops) +{ + if (hwsampler_setup()) + return -ENODEV; + + /* + * create hwsampler files only if hwsampler_setup() succeeds. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval < 0) { + oprofile_min_interval = 0; + return -ENODEV; + } + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval < 0) { + oprofile_max_interval = 0; + return -ENODEV; + } + + if (oprofile_timer_init(ops)) + return -ENODEV; + + printk(KERN_INFO "oprofile: using hardware sampling\n"); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + ops->create_files = oprofile_create_hwsampling_files; + + return 0; +} + +static void oprofile_hwsampler_exit(void) +{ + oprofile_timer_exit(); + hwsampler_shutdown(); +} + +#endif /* CONFIG_64BIT */ + +int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; + +#ifdef CONFIG_64BIT + return oprofile_hwsampler_init(ops); +#else return -ENODEV; +#endif } void oprofile_arch_exit(void) { +#ifdef CONFIG_64BIT + oprofile_hwsampler_exit(); +#endif } |