diff options
Diffstat (limited to 'arch/s390')
47 files changed, 506 insertions, 456 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2b7c0fbe578..b403c533432 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -28,12 +28,6 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_HWEIGHT def_bool y -config GENERIC_TIME_VSYSCALL - def_bool y - -config GENERIC_CLOCKEVENTS - def_bool y - config GENERIC_BUG def_bool y if BUG @@ -90,7 +84,6 @@ config S390 select HAVE_KERNEL_XZ select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_RCU_TABLE_FREE if SMP select ARCH_SAVE_PAGE_KEYS if HIBERNATION select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP @@ -123,6 +116,10 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select GENERIC_SMP_IDLE_THREAD + select GENERIC_TIME_VSYSCALL + select GENERIC_CLOCKEVENTS + select KTIME_SCALAR if 32BIT config SCHED_OMIT_FRAME_POINTER def_bool y @@ -135,8 +132,6 @@ menu "Base setup" comment "Processor type and features" -source "kernel/time/Kconfig" - config 64BIT def_bool y prompt "64 bit kernel" @@ -147,9 +142,6 @@ config 64BIT config 32BIT def_bool y if !64BIT -config KTIME_SCALAR - def_bool 32BIT - config SMP def_bool y prompt "Symmetric multi-processing support" @@ -218,7 +210,7 @@ config COMPAT def_bool y prompt "Kernel support for 31 bit emulation" depends on 64BIT - select COMPAT_BINFMT_ELF + select COMPAT_BINFMT_ELF if BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC help Select this option if you want to enable your system kernel to @@ -235,6 +227,25 @@ config KEYS_COMPAT config AUDIT_ARCH def_bool y +config HAVE_MARCH_Z900_FEATURES + def_bool n + +config HAVE_MARCH_Z990_FEATURES + def_bool n + select HAVE_MARCH_Z900_FEATURES + +config HAVE_MARCH_Z9_109_FEATURES + def_bool n + select HAVE_MARCH_Z990_FEATURES + +config HAVE_MARCH_Z10_FEATURES + def_bool n + select HAVE_MARCH_Z9_109_FEATURES + +config HAVE_MARCH_Z196_FEATURES + def_bool n + select HAVE_MARCH_Z10_FEATURES + comment "Code generation options" choice @@ -250,6 +261,7 @@ config MARCH_G5 config MARCH_Z900 bool "IBM zSeries model z800 and z900" + select HAVE_MARCH_Z900_FEATURES if 64BIT help Select this to enable optimizations for model z800/z900 (2064 and 2066 series). This will enable some optimizations that are not @@ -257,6 +269,7 @@ config MARCH_Z900 config MARCH_Z990 bool "IBM zSeries model z890 and z990" + select HAVE_MARCH_Z990_FEATURES if 64BIT help Select this to enable optimizations for model z890/z990 (2084 and 2086 series). The kernel will be slightly faster but will not work @@ -264,6 +277,7 @@ config MARCH_Z990 config MARCH_Z9_109 bool "IBM System z9" + select HAVE_MARCH_Z9_109_FEATURES if 64BIT help Select this to enable optimizations for IBM System z9 (2094 and 2096 series). The kernel will be slightly faster but will not work @@ -271,6 +285,7 @@ config MARCH_Z9_109 config MARCH_Z10 bool "IBM System z10" + select HAVE_MARCH_Z10_FEATURES if 64BIT help Select this to enable optimizations for IBM System z10 (2097 and 2098 series). The kernel will be slightly faster but will not work @@ -278,6 +293,7 @@ config MARCH_Z10 config MARCH_Z196 bool "IBM zEnterprise 114 and 196" + select HAVE_MARCH_Z196_FEATURES if 64BIT help Select this to enable optimizations for IBM zEnterprise 114 and 196 (2818 and 2817 series). The kernel will be slightly faster but will @@ -407,33 +423,6 @@ config CHSC_SCH comment "Misc" -config IPL - def_bool y - prompt "Builtin IPL record support" - help - If you want to use the produced kernel to IPL directly from a - device, you have to merge a bootsector specific to the device - into the first bytes of the kernel. You will have to select the - IPL device. - -choice - prompt "IPL method generated into head.S" - depends on IPL - default IPL_VM - help - Select "tape" if you want to IPL the image from a Tape. - - Select "vm_reader" if you are running under VM/ESA and want - to IPL the image from the emulated card reader. - -config IPL_TAPE - bool "tape" - -config IPL_VM - bool "vm_reader" - -endchoice - source "fs/Kconfig.binfmt" config FORCE_MAX_ZONEORDER @@ -570,7 +559,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" - depends on 64BIT + depends on 64BIT && SMP select KEXEC help Generate crash dump after being started by kexec. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0ad2f1e1ce9..49e76e8b477 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -91,7 +91,6 @@ OBJCOPYFLAGS := -O binary head-y := arch/s390/kernel/head.o head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) -head-y += arch/s390/kernel/init_task.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore new file mode 100644 index 00000000000..017d5912ad2 --- /dev/null +++ b/arch/s390/boot/.gitignore @@ -0,0 +1,2 @@ +image +bzImage diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore new file mode 100644 index 00000000000..ae06b9b4c02 --- /dev/null +++ b/arch/s390/boot/compressed/.gitignore @@ -0,0 +1,3 @@ +sizes.h +vmlinux +vmlinux.lds diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 6cf8e26b313..37d2bf26796 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,8 +1,12 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y -CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -14,16 +18,22 @@ CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set -CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -34,18 +44,15 @@ CONFIG_KSM=y CONFIG_BINFMT_MISC=m CONFIG_CMM=m CONFIG_HZ_100=y -CONFIG_KEXEC=y -CONFIG_PM=y +CONFIG_CRASH_DUMP=y CONFIG_HIBERNATION=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_NET_KEY=y -CONFIG_AFIUCV=m CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_INET_LRO is not set CONFIG_IPV6=y -CONFIG_NET_SCTPPROBE=m CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m CONFIG_VLAN_8021Q=y @@ -84,15 +91,14 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_ZFCP=y -CONFIG_ZFCP_DIF=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_EQUALIZER=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_VIRTIO_NET=y CONFIG_RAW_DRIVER=m +CONFIG_VIRTIO_BALLOON=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -103,27 +109,21 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_DLM=m CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_TIMER_STATS=y CONFIG_PROVE_LOCKING=y CONFIG_PROVE_RCU=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_NOTIFIERS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_RCU_TRACE=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_PAGEALLOC=y -# CONFIG_FTRACE is not set +CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_STRICT_DEVMEM is not set CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_CRYPTD=m @@ -155,7 +155,6 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -173,4 +172,3 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRC7=m -CONFIG_VIRTIO_BALLOON=y diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 451273ad4d3..10a50880294 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -11,25 +11,28 @@ * Force strict CPU ordering. * And yes, this is required on UP too when we're talking * to devices. - * - * This is very similar to the ppc eieio/sync instruction in that is - * does a checkpoint syncronisation & makes sure that - * all memory ops have completed wrt other CPU's ( see 7-15 POP DJB ). */ -#define eieio() asm volatile("bcr 15,0" : : : "memory") -#define SYNC_OTHER_CORES(x) eieio() -#define mb() eieio() -#define rmb() eieio() -#define wmb() eieio() -#define read_barrier_depends() do { } while(0) -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() +static inline void mb(void) +{ +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + /* Fast-BCR without checkpoint synchronization */ + asm volatile("bcr 14,0" : : : "memory"); +#else + asm volatile("bcr 15,0" : : : "memory"); +#endif +} + +#define rmb() mb() +#define wmb() mb() +#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { var = value; mb(); } while (0) #endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index f2ea2c56a7e..f2ef34f6d6e 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -29,9 +29,7 @@ struct ccwgroup_device { /** * struct ccwgroup_driver - driver for ccw group devices - * @max_slaves: maximum number of slave devices - * @driver_id: unique id - * @probe: function called on probe + * @setup: function called during device creation to setup the device * @remove: function called on remove * @set_online: function called when device is set online * @set_offline: function called when device is set offline @@ -44,10 +42,7 @@ struct ccwgroup_device { * @driver: embedded driver structure */ struct ccwgroup_driver { - int max_slaves; - unsigned long driver_id; - - int (*probe) (struct ccwgroup_device *); + int (*setup) (struct ccwgroup_device *); void (*remove) (struct ccwgroup_device *); int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); @@ -63,9 +58,8 @@ struct ccwgroup_driver { extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); -int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, - struct ccw_driver *cdrv, int num_devices, - const char *buf); +int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, + int num_devices, const char *buf); extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 1e5b27edc0c..2ee66a65f2d 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -38,12 +38,11 @@ static inline void stfle(u64 *stfle_fac_list, int size) unsigned long nr; preempt_disable(); - S390_lowcore.stfl_fac_list = 0; asm volatile( " .insn s,0xb2b10000,0(0)\n" /* stfl */ "0:\n" EX_TABLE(0b, 0b) - : "=m" (S390_lowcore.stfl_fac_list)); + : "+m" (S390_lowcore.stfl_fac_list)); nr = 4; /* bytes stored by stfl */ memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); if (S390_lowcore.stfl_fac_list & 0x01000000) { diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index b7ff6afc3ca..27216d31799 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -38,11 +38,8 @@ static inline void * phys_to_virt(unsigned long address) return (void *) address; } -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) +void *xlate_dev_mem_ptr(unsigned long phys); +void unxlate_dev_mem_ptr(unsigned long phys, void *addr); /* * Convert a virtual cached pointer to an uncached pointer diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 96076676e22..bdcbe0f8dd7 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -52,4 +52,9 @@ struct kvm_sync_regs { __u32 acrs[16]; /* access registers */ __u64 crs[16]; /* control registers */ }; + +#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) +#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 7343872890a..dd17537b9a9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -148,6 +148,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_restart; u32 diagnose_10; u32 diagnose_44; + u32 diagnose_9c; }; struct kvm_s390_io_info { diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 6964db226f8..a9883296103 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -149,6 +149,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + #endif #endif /* __S390_KVM_PARA_H */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 8eef9b5b3cf..78e3041919d 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,10 +22,7 @@ void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); -#ifdef CONFIG_HAVE_RCU_TABLE_FREE void page_table_free_rcu(struct mmu_gather *, unsigned long *); -void __tlb_remove_table(void *_table); -#endif static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d499b30ea48..6cbf3131167 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -141,9 +141,6 @@ struct seq_file; extern void release_thread(struct task_struct *); extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); -/* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) - /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index d75c8e78f7e..f039d86adf6 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -258,11 +258,6 @@ struct slsb { u8 val[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(256))); -#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 -#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 -#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 -#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 - /** * struct qdio_outbuf_state - SBAL related asynchronous operation information * (for communication with upper layer programs) @@ -293,6 +288,8 @@ struct qdio_outbuf_state { #define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ #define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 @@ -328,11 +325,13 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, int, int, unsigned long); /* qdio errors reported to the upper-layer program */ -#define QDIO_ERROR_SIGA_TARGET 0x02 -#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 -#define QDIO_ERROR_SIGA_BUSY 0x20 -#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 -#define QDIO_ERROR_SLSB_STATE 0x80 +#define QDIO_ERROR_ACTIVATE 0x0001 +#define QDIO_ERROR_GET_BUF_STATE 0x0002 +#define QDIO_ERROR_SET_BUF_STATE 0x0004 +#define QDIO_ERROR_SLSB_STATE 0x0100 + +#define QDIO_ERROR_FATAL 0x00ff +#define QDIO_ERROR_TEMPORARY 0xff00 /* for qdio_cleanup */ #define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index fed7bee650a..bf238c55740 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -48,6 +48,7 @@ int sclp_cpu_deconfigure(u8 cpu); void sclp_facilities_detect(void); unsigned long long sclp_get_rnmax(void); unsigned long long sclp_get_rzm(void); +u8 sclp_get_fac85(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b21e46e5d4b..7244e1f6412 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -82,7 +82,6 @@ extern unsigned int user_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) -#define MACHINE_FLAG_STCKF (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -101,7 +100,6 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) -#define MACHINE_HAS_STCKF (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -113,7 +111,6 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) -#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c77c6de6f6c..0b6f586c138 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -16,7 +16,7 @@ extern struct mutex smp_cpu_state_mutex; extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -extern int __cpu_up(unsigned int cpu); +extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/s390/include/asm/swab.h b/arch/s390/include/asm/swab.h index 6bdee21c077..a3e4ebb3209 100644 --- a/arch/s390/include/asm/swab.h +++ b/arch/s390/include/asm/swab.h @@ -77,7 +77,7 @@ static inline __u16 __arch_swab16p(const __u16 *x) asm volatile( #ifndef __s390x__ - " icm %0,2,%O+1(%R1)\n" + " icm %0,2,%O1+1(%R1)\n" " ic %0,%1\n" : "=&d" (result) : "Q" (*x) : "cc"); #else /* __s390x__ */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a73038155e0..003b04edcff 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -95,7 +95,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ -#define TIF_SIE 12 /* guest execution active */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_31BIT 17 /* 32bit process */ @@ -114,7 +113,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) -#define _TIF_SIE (1<<TIF_SIE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) #define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index c447a27a7fd..239ece9e53c 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -73,11 +73,15 @@ static inline void local_tick_enable(unsigned long long comp) typedef unsigned long long cycles_t; -static inline unsigned long long get_clock (void) +static inline unsigned long long get_clock(void) { unsigned long long clk; +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); +#else asm volatile("stck %0" : "=Q" (clk) : : "cc"); +#endif return clk; } @@ -86,17 +90,6 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } -static inline unsigned long long get_clock_fast(void) -{ - unsigned long long clk; - - if (MACHINE_HAS_STCKF) - asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); - else - clk = get_clock(); - return clk; -} - static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index c687a2c8346..775a5eea8f9 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -30,14 +30,10 @@ struct mmu_gather { struct mm_struct *mm; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE struct mmu_table_batch *batch; -#endif unsigned int fullmm; - unsigned int need_flush; }; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE struct mmu_table_batch { struct rcu_head rcu; unsigned int nr; @@ -49,7 +45,6 @@ struct mmu_table_batch { extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -#endif static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, @@ -57,29 +52,20 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb, { tlb->mm = mm; tlb->fullmm = full_mm_flush; - tlb->need_flush = 0; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb->batch = NULL; -#endif if (tlb->fullmm) __tlb_flush_mm(mm); } static inline void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->need_flush) - return; - tlb->need_flush = 0; - __tlb_flush_mm(tlb->mm); -#ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); -#endif } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_flush_mmu(tlb); + tlb_table_flush(tlb); } /* @@ -105,10 +91,8 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { -#ifdef CONFIG_HAVE_RCU_TABLE_FREE if (!tlb->fullmm) return page_table_free_rcu(tlb, (unsigned long *) pte); -#endif page_table_free(tlb->mm, (unsigned long *) pte); } @@ -125,10 +109,8 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 31)) return; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE if (!tlb->fullmm) return tlb_remove_table(tlb, pmd); -#endif crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -146,10 +128,8 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 42)) return; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE if (!tlb->fullmm) return tlb_remove_table(tlb, pud); -#endif crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/s390/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 884b18afc86..9733b3f0eb6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -28,7 +28,7 @@ obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \ obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) -extra-y += head.o init_task.o vmlinux.lds +extra-y += head.o vmlinux.lds extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index ab64bdbab2a..65426525d9f 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -173,11 +173,14 @@ asmlinkage long sys32_setfsgid16(u16 gid) static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = (u16)GROUP_AT(group_info, i); + kgid = GROUP_AT(group_info, i); + group = (u16)from_kgid_munged(user_ns, kgid); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -187,13 +190,20 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = (gid_t)group; + + kgid = make_kgid(user_ns, (gid_t)group); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; @@ -537,8 +547,8 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) tmp.__st_ino = (u32)stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blksize = (u32)stat->blksize; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 28040fd5e8a..377c096ca4a 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -437,13 +437,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 9475e682727..d84181f1f5e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -374,8 +374,6 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; - if (test_facility(25)) - S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; #endif } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 74ee563fe62..1ae93b573d7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -145,22 +145,23 @@ STACK_SIZE = 1 << STACK_SHIFT * gpr2 = prev */ ENTRY(__switch_to) + stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + st %r15,__THREAD_ksp(%r2) # store kernel stack of prev l %r4,__THREAD_info(%r2) # get thread_info of prev l %r5,__THREAD_info(%r3) # get thread_info of next + lr %r15,%r5 + ahi %r15,STACK_SIZE # end of kernel stack of next + st %r3,__LC_CURRENT # store task struct of next + st %r5,__LC_THREAD_INFO # store thread info of next + st %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + l %r15,__THREAD_ksp(%r3) # load kernel stack of next tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? jz 0f ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next -0: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - st %r15,__THREAD_ksp(%r2) # store kernel stack of prev - l %r15,__THREAD_ksp(%r3) # load kernel stack of next - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - st %r3,__LC_CURRENT # store task struct of next - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next - st %r5,__LC_THREAD_INFO # store thread info of next - ahi %r5,STACK_SIZE # end of kernel stack of next - st %r5,__LC_KERNEL_STACK # store end of kernel stack +0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4e1c292fa7e..229fe1d0774 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -81,16 +81,14 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) .macro HANDLE_SIE_INTERCEPT scratch #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __TI_flags+6(%r12),_TIF_SIE>>8 - jz .+42 - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP - jz .+8 - .insn s,0xb2800000,BASED(.Lhost_id) # set host id + tmhh %r8,0x0001 # interrupting from user ? + jnz .+42 lgr \scratch,%r9 slg \scratch,BASED(.Lsie_loop) clg \scratch,BASED(.Lsie_length) - jhe .+10 + jhe .+22 lg %r9,BASED(.Lsie_loop) + SPP BASED(.Lhost_id) # set host id #endif .endm @@ -148,6 +146,14 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) ssm __LC_RETURN_PSW .endm + .macro STCK savearea +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + .insn s,0xb27c0000,\savearea # store clock fast +#else + .insn s,0xb2050000,\savearea # store clock +#endif + .endm + .section .kprobes.text, "ax" /* @@ -158,22 +164,23 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) * gpr2 = prev */ ENTRY(__switch_to) + stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev lg %r4,__THREAD_info(%r2) # get thread_info of prev lg %r5,__THREAD_info(%r3) # get thread_info of next + lgr %r15,%r5 + aghi %r15,STACK_SIZE # end of kernel stack of next + stg %r3,__LC_CURRENT # store task struct of next + stg %r5,__LC_THREAD_INFO # store thread info of next + stg %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? jz 0f ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next -0: stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev - lg %r15,__THREAD_ksp(%r3) # load kernel stack of next - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - stg %r3,__LC_CURRENT # store task struct of next - mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next - stg %r5,__LC_THREAD_INFO # store thread info of next - aghi %r5,STACK_SIZE # end of kernel stack of next - stg %r5,__LC_KERNEL_STACK # store end of kernel stack +0: lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -458,7 +465,7 @@ pgm_svcper: * IO interrupt handler routine */ ENTRY(io_int_handler) - stck __LC_INT_CLOCK + STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK @@ -604,7 +611,7 @@ io_notify_resume: * External interrupt handler routine */ ENTRY(ext_int_handler) - stck __LC_INT_CLOCK + STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK @@ -622,6 +629,7 @@ ext_skip: mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code @@ -638,7 +646,7 @@ ENTRY(psw_idle) larl %r1,psw_idle_lpsw+4 stg %r1,__SF_EMPTY+8(%r15) larl %r1,.Lvtimer_max - stck __IDLE_ENTER(%r2) + STCK __IDLE_ENTER(%r2) ltr %r5,%r5 stpt __VQ_IDLE_ENTER(%r3) jz psw_idle_lpsw @@ -654,7 +662,7 @@ __critical_end: * Machine check handler routines */ ENTRY(mcck_int_handler) - stck __LC_MCCK_CLOCK + STCK __LC_MCCK_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -967,7 +975,6 @@ ENTRY(sie64a) xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_THREAD_INFO # pointer thread_info struct - oi __TI_flags+6(%r14),_TIF_SIE>>8 sie_loop: lg %r14,__LC_THREAD_INFO # pointer thread_info struct tm __TI_flags+7(%r14),_TIF_EXIT_SIE @@ -985,7 +992,6 @@ sie_done: lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers @@ -994,7 +1000,6 @@ sie_exit: sie_fault: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct - ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index c27a0727f93..4939d15375a 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -34,125 +34,7 @@ #endif __HEAD -#ifndef CONFIG_IPL - .org 0 - .long 0x00080000,0x80000000+startup # Just a restart PSW -#else -#ifdef CONFIG_IPL_TAPE -#define IPL_BS 1024 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x27000000,0x60000001 # by ipl to addresses 0-23. - .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs). - .long 0x00000000,0x00000000 # external old psw - .long 0x00000000,0x00000000 # svc old psw - .long 0x00000000,0x00000000 # program check old psw - .long 0x00000000,0x00000000 # machine check old psw - .long 0x00000000,0x00000000 # io old psw - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x000a0000,0x00000058 # external new psw - .long 0x000a0000,0x00000060 # svc new psw - .long 0x000a0000,0x00000068 # program check new psw - .long 0x000a0000,0x00000070 # machine check new psw - .long 0x00080000,0x80000000+.Lioint # io new psw - .org 0x100 -# -# subroutine for loading from tape -# Parameters: -# R1 = device number -# R2 = load address -.Lloader: - st %r14,.Lldret - la %r3,.Lorbread # r3 = address of orb - la %r5,.Lirb # r5 = address of irb - st %r2,.Lccwread+4 # initialize CCW data addresses - lctl %c6,%c6,.Lcr6 - slr %r2,%r2 -.Lldlp: - la %r6,3 # 3 retries -.Lssch: - ssch 0(%r3) # load chunk of IPL_BS bytes - bnz .Llderr -.Lw4end: - bas %r14,.Lwait4io - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lrecov - slr %r7,%r7 - icm %r7,3,10(%r5) # get residual count - lcr %r7,%r7 - la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read - ar %r2,%r7 # add to total size - tm 8(%r5),0x01 # found a tape mark ? - bnz .Ldone - l %r0,.Lccwread+4 # update CCW data addresses - ar %r0,%r7 - st %r0,.Lccwread+4 - b .Lldlp -.Ldone: - l %r14,.Lldret - br %r14 # r2 contains the total size -.Lrecov: - bas %r14,.Lsense # do the sensing - bct %r6,.Lssch # dec. retry count & branch - b .Llderr -# -# Sense subroutine -# -.Lsense: - st %r14,.Lsnsret - la %r7,.Lorbsense - ssch 0(%r7) # start sense command - bnz .Llderr - bas %r14,.Lwait4io - l %r14,.Lsnsret - tm 8(%r5),0x82 # do we have a problem ? - bnz .Llderr - br %r14 -# -# Wait for interrupt subroutine -# -.Lwait4io: - lpsw .Lwaitpsw -.Lioint: - c %r1,0xb8 # compare subchannel number - bne .Lwait4io - tsch 0(%r5) - slr %r0,%r0 - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lwtexit - tm 8(%r5),0x04 # got device end ? - bz .Lwait4io -.Lwtexit: - br %r14 -.Llderr: - lpsw .Lcrash - - .align 8 -.Lorbread: - .long 0x00000000,0x0080ff00,.Lccwread - .align 8 -.Lorbsense: - .long 0x00000000,0x0080ff00,.Lccwsense - .align 8 -.Lccwread: - .long 0x02200000+IPL_BS,0x00000000 -.Lccwsense: - .long 0x04200001,0x00000000 -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 -.Lldret:.long 0 -.Lsnsret: .long 0 -#endif /* CONFIG_IPL_TAPE */ - -#ifdef CONFIG_IPL_VM #define IPL_BS 0x730 .org 0 .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded @@ -256,7 +138,6 @@ __HEAD .long 0x02600050,0x00000000 .endr .long 0x02200050,0x00000000 -#endif /* CONFIG_IPL_VM */ iplstart: lh %r1,0xb8 # test if subchannel number @@ -325,7 +206,6 @@ iplstart: clc 0(3,%r2),.L_eof bz .Lagain2 -#ifdef CONFIG_IPL_VM # # reset files in VM reader # @@ -358,7 +238,6 @@ iplstart: .long 0x00080000,0x80000000+.Lrdrint .Lrdrwaitpsw: .long 0x020a0000,0x80000000+.Lrdrint -#endif # # everything loaded, go for it @@ -376,8 +255,6 @@ iplstart: .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ -#endif /* CONFIG_IPL */ - # # SALIPL loader support. Based on a patch by Rob van der Heij. # This entry point is called directly from the SALIPL loader and @@ -474,9 +351,9 @@ ENTRY(startup_kdump) stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c deleted file mode 100644 index 4d1c9fb0b54..00000000000 --- a/arch/s390/kernel/init_task.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * arch/s390/kernel/init_task.c - * - * S390 version - * - * Derived from "arch/i386/kernel/init_task.c" - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1c2cdd59ccd..8a22c27219d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -118,9 +118,10 @@ asmlinkage void do_softirq(void) "a" (__do_softirq) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); - } else + } else { /* We are already on the async stack. */ __do_softirq(); + } } local_irq_restore(flags); @@ -192,11 +193,12 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) int index = ext_hash(code); spin_lock_irqsave(&ext_int_hash_lock, flags); - list_for_each_entry_rcu(p, &ext_int_hash[index], entry) + list_for_each_entry_rcu(p, &ext_int_hash[index], entry) { if (p->code == code && p->handler == handler) { list_del_rcu(&p->entry); kfree_rcu(p, rcu); } + } spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; } @@ -211,9 +213,10 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, old_regs = set_irq_regs(regs); irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { /* Serve timer interrupts first. */ clock_comparator_work(); + } kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 46405086479..cb019f429e8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -178,7 +178,7 @@ static void cpumf_pmu_enable(struct pmu *pmu) err = lcctl(cpuhw->state); if (err) { pr_err("Enabling the performance measuring unit " - "failed with rc=%lx\n", err); + "failed with rc=%x\n", err); return; } @@ -203,7 +203,7 @@ static void cpumf_pmu_disable(struct pmu *pmu) err = lcctl(inactive); if (err) { pr_err("Disabling the performance measuring unit " - "failed with rc=%lx\n", err); + "failed with rc=%x\n", err); return; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 02f300fbf07..4993e689b2c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -719,7 +719,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - secure_computing(regs->gprs[2]); + secure_computing_strict(regs->gprs[2]); /* * The sysc_tracesys code in entry.S stored the system diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index f7582b27f60..f626232e216 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -59,15 +59,8 @@ typedef struct SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) { sigset_t blocked; - - current->saved_sigmask = current->blocked; - mask &= _BLOCKABLE; siginitset(&blocked, mask); - set_current_blocked(&blocked); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_restore_sigmask(); - return -ERESTARTNOHAND; + return sigsuspend(&blocked); } SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act, @@ -235,13 +228,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -414,15 +400,6 @@ void do_signal(struct pt_regs *regs) struct k_sigaction ka; sigset_t *oldset; - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if (!user_mode(regs)) - return; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1f77227669e..647ba942589 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -85,7 +85,6 @@ enum { struct pcpu { struct cpu cpu; - struct task_struct *idle; /* idle process for the cpu */ struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long async_stack; /* async stack for the cpu */ unsigned long panic_stack; /* panic stack for the cpu */ @@ -226,6 +225,8 @@ out: return -ENOMEM; } +#ifdef CONFIG_HOTPLUG_CPU + static void pcpu_free_lowcore(struct pcpu *pcpu) { pcpu_sigp_retry(pcpu, sigp_set_prefix, 0); @@ -247,6 +248,8 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) } } +#endif /* CONFIG_HOTPLUG_CPU */ + static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct _lowcore *lc = pcpu->lowcore; @@ -721,26 +724,9 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) cpu_idle(); } -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit smp_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle; - - c_idle = container_of(work, struct create_idle, work); - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu) +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct create_idle c_idle; struct pcpu *pcpu; int rc; @@ -750,22 +736,12 @@ int __cpuinit __cpu_up(unsigned int cpu) if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) != sigp_order_code_accepted) return -EIO; - if (!pcpu->idle) { - c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); - INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); - c_idle.cpu = cpu; - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - if (IS_ERR(c_idle.idle)) - return PTR_ERR(c_idle.idle); - pcpu->idle = c_idle.idle; - } - init_idle(pcpu->idle, cpu); + rc = pcpu_alloc_lowcore(pcpu, cpu); if (rc) return rc; pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, pcpu->idle); + pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); @@ -852,7 +828,6 @@ void __init smp_prepare_boot_cpu(void) struct pcpu *pcpu = pcpu_devices; boot_cpu_address = stap(); - pcpu->idle = current; pcpu->state = CPU_STATE_CONFIGURED; pcpu->address = boot_cpu_address; pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore new file mode 100644 index 00000000000..e45fba9d0ce --- /dev/null +++ b/arch/s390/kernel/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore new file mode 100644 index 00000000000..3fd18cf9fec --- /dev/null +++ b/arch/s390/kernel/vdso64/.gitignore @@ -0,0 +1 @@ +vdso64.lds diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a353f0ea45c..b23d9ac77df 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -47,9 +47,30 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - vcpu_put(vcpu); - yield(); - vcpu_load(vcpu); + kvm_vcpu_on_spin(vcpu); + return 0; +} + +static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tcpu; + int tid; + int i; + + tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + vcpu->stat.diagnose_9c++; + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); + + if (tid == vcpu->vcpu_id) + return 0; + + kvm_for_each_vcpu(i, tcpu, kvm) + if (tcpu->vcpu_id == tid) { + kvm_vcpu_yield_to(tcpu); + break; + } + return 0; } @@ -89,6 +110,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); + case 0x9c: + return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); default: diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 361456577c6..979cbe55bf5 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -101,6 +101,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) } static intercept_handler_t instruction_handlers[256] = { + [0x01] = kvm_s390_handle_01, [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 217ce44395a..664766d0c83 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> +#include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -74,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, + { "diagnose_9c", VCPU_STAT(diagnose_9c) }, { NULL } }; @@ -133,8 +135,16 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_S390_UCONTROL: #endif case KVM_CAP_SYNC_REGS: + case KVM_CAP_ONE_REG: r = 1; break; + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_S390_COW: + r = sclp_get_fac85() & 0x2; + break; default: r = 0; } @@ -423,6 +433,71 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = put_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = put_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = put_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = put_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = get_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = get_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = get_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = get_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) { kvm_s390_vcpu_initial_reset(vcpu); @@ -753,6 +828,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_S390_INITIAL_RESET: r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); break; + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } #ifdef CONFIG_KVM_S390_UCONTROL case KVM_S390_UCAS_MAP: { struct kvm_s390_ucas_mapping ucasmap; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ff28f9d1c9e..2294377975e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -79,6 +79,7 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); +int kvm_s390_handle_01(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e5a45dbd26a..68a6b2ed16b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -380,3 +380,34 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_sckpf(struct kvm_vcpu *vcpu) +{ + u32 value; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + + if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) + return kvm_s390_inject_program_int(vcpu, + PGM_SPECIFICATION); + + value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff; + vcpu->arch.sie_block->todpr = value; + + return 0; +} + +static intercept_handler_t x01_handlers[256] = { + [0x07] = handle_sckpf, +}; + +int kvm_s390_handle_01(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 46ef3fd0663..72cec9ecd96 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -294,7 +294,7 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { + if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { @@ -549,19 +549,15 @@ static void pfault_interrupt(struct ext_code ext_code, if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; - if (subcode & 0x0080) { - /* Get the token (= pid of the affected task). */ - pid = sizeof(void *) == 4 ? param32 : param64; - rcu_read_lock(); - tsk = find_task_by_pid_ns(pid, &init_pid_ns); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return; - } else { - tsk = current; - } + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ @@ -574,6 +570,7 @@ static void pfault_interrupt(struct ext_code ext_code, tsk->thread.pfault_wait = 0; list_del(&tsk->thread.list); wake_up_process(tsk); + put_task_struct(tsk); } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial @@ -585,24 +582,35 @@ static void pfault_interrupt(struct ext_code ext_code, if (tsk->state == TASK_RUNNING) tsk->thread.pfault_wait = -1; } - put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - if (tsk->thread.pfault_wait == -1) { + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait * back to zero and exit. */ tsk->thread.pfault_wait = 0; } else { /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. */ + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); } } +out: spin_unlock(&pfault_lock); + put_task_struct(tsk); } static int __cpuinit pfault_cpu_notify(struct notifier_block *self, @@ -620,6 +628,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, list_del(&thread->list); tsk = container_of(thread, struct task_struct, thread); wake_up_process(tsk); + put_task_struct(tsk); } spin_unlock_irq(&pfault_lock); break; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 597bb2d27c3..900de2b3cf2 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -58,6 +58,8 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; + clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, + PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 7bb15fcca75..795a0a9bb2e 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/gfp.h> +#include <linux/cpu.h> #include <asm/ctl_reg.h> /* @@ -61,21 +62,14 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } -/* - * Copy memory in real mode (kernel to kernel) - */ -int memcpy_real(void *dest, void *src, size_t count) +static int __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; register unsigned long _src asm("4") = (unsigned long) src; register unsigned long _len2 asm("5") = (unsigned long) count; - unsigned long flags; int rc = -EFAULT; - if (!count) - return 0; - flags = __arch_local_irq_stnsm(0xf8UL); asm volatile ( "0: mvcle %1,%2,0x0\n" "1: jo 0b\n" @@ -86,7 +80,23 @@ int memcpy_real(void *dest, void *src, size_t count) "+d" (_len2), "=m" (*((long *) dest)) : "m" (*((long *) src)) : "cc", "memory"); - arch_local_irq_restore(flags); + return rc; +} + +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + unsigned long flags; + int rc; + + if (!count) + return 0; + local_irq_save(flags); + __arch_local_irq_stnsm(0xfbUL); + rc = __memcpy_real(dest, src, count); + local_irq_restore(flags); return rc; } @@ -157,3 +167,69 @@ out: free_page((unsigned long) buf); return rc; } + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(unsigned long addr) +{ + unsigned long lc; + int cpu; + + if (addr < sizeof(struct _lowcore)) + return 1; + for_each_online_cpu(cpu) { + lc = (unsigned long) lowcore_ptr[cpu]; + if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + continue; + return 1; + } + return 0; +} + +/* + * Return swapped prefix or zero page address + */ +static unsigned long get_swapped(unsigned long addr) +{ + unsigned long prefix = store_prefix(); + + if (addr < sizeof(struct _lowcore)) + return addr + prefix; + if (addr >= prefix && addr < prefix + sizeof(struct _lowcore)) + return addr - prefix; + return addr; +} + +/* + * Convert a physical pointer for /dev/mem access + * + * For swapped prefix pages a new buffer is returned that contains a copy of + * the absolute memory. The buffer size is maximum one page large. + */ +void *xlate_dev_mem_ptr(unsigned long addr) +{ + void *bounce = (void *) addr; + unsigned long size; + + get_online_cpus(); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_real(bounce, (void *) get_swapped(addr), size); + } + preempt_enable(); + put_online_cpus(); + return bounce; +} + +/* + * Free converted buffer for /dev/mem access (if necessary) + */ +void unxlate_dev_mem_ptr(unsigned long addr, void *buf) +{ + if ((void *) addr != buf) + free_page((unsigned long) buf); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 373adf69b01..a3db5a3ea08 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -678,8 +678,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } } -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; @@ -733,7 +731,66 @@ void __tlb_remove_table(void *_table) free_pages((unsigned long) table, ALLOC_ORDER); } -#endif +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + __tlb_flush_mm(tlb->mm); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} /* * switch on pgstes for its userspace process (for kvm) @@ -765,6 +822,8 @@ int s390_enable_sie(void) /* we copy the mm and let dup_mm create the page tables with_pgstes */ tsk->mm->context.alloc_pgste = 1; + /* make sure that both mms have a correct rss state */ + sync_mm_rss(tsk->mm); mm = dup_mm(tsk); tsk->mm->context.alloc_pgste = 0; if (!mm) |