diff options
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig.debug | 12 | ||||
-rw-r--r-- | arch/s390/include/asm/chpid.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/css_chars.h | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/page.h | 14 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 30 | ||||
-rw-r--r-- | arch/s390/include/asm/setup.h | 14 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/chsc.h | 10 | ||||
-rw-r--r-- | arch/s390/kernel/early.c | 17 | ||||
-rw-r--r-- | arch/s390/kernel/entry64.S | 2 | ||||
-rw-r--r-- | arch/s390/kernel/head.S | 101 | ||||
-rw-r--r-- | arch/s390/kernel/head31.S | 3 | ||||
-rw-r--r-- | arch/s390/kernel/head64.S | 3 | ||||
-rw-r--r-- | arch/s390/kernel/module.c | 11 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 51 | ||||
-rw-r--r-- | arch/s390/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 226 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 40 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 45 |
18 files changed, 474 insertions, 111 deletions
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index d76cef3fef3..fc32a2df497 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -31,6 +31,18 @@ config DEBUG_STRICT_USER_COPY_CHECKS If unsure, or if you run an older (pre 4.4) gcc, say N. +config S390_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + ---help--- + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + config DEBUG_SET_MODULE_RONX def_bool y depends on MODULES diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h index 64c76ddde3c..38c405ef89c 100644 --- a/arch/s390/include/asm/chpid.h +++ b/arch/s390/include/asm/chpid.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007, 2012 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ #ifndef _ASM_S390_CHPID_H diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index a06ebc2623f..7e1c917bbba 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -3,8 +3,6 @@ #include <linux/types.h> -#ifdef __KERNEL__ - struct css_general_char { u64 : 12; u32 dynio : 1; /* bit 12 */ @@ -35,5 +33,4 @@ struct css_general_char { extern struct css_general_char css_general_characteristics; -#endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 27ab3c7c1e8..6d5367060a5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -30,12 +30,20 @@ #include <asm/setup.h> #ifndef __ASSEMBLY__ +static unsigned long pfmf(unsigned long function, unsigned long address) +{ + asm volatile( + " .insn rre,0xb9af0000,%[function],%[address]" + : [address] "+a" (address) + : [function] "d" (function) + : "memory"); + return address; +} + static inline void clear_page(void *page) { if (MACHINE_HAS_PFMF) { - asm volatile( - " .insn rre,0xb9af0000,%0,%1" - : : "d" (0x10000), "a" (page) : "memory", "cc"); + pfmf(0x10000, (unsigned long)page); } else { register unsigned long reg1 asm ("1") = 0; register void *reg2 asm ("2") = page; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 979fe3dc078..dd647c919a6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -119,13 +119,12 @@ static inline int is_zero_pfn(unsigned long pfn) #ifndef __ASSEMBLY__ /* - * The vmalloc area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, - * which should be enough for any sane case. - * By putting vmalloc at the top, we maximise the gap between physical - * memory and vmalloc to catch misplaced memory accesses. As a side - * effect, this also makes sure that 64 bit module code cannot be used - * as system call address. + * The vmalloc and module area will always be on the topmost area of the kernel + * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where + * modules will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a 2GB frame + * is branch prediction unit friendly. */ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; @@ -133,6 +132,14 @@ extern struct page *vmemmap; #define VMEM_MAX_PHYS ((unsigned long) vmemmap) +#ifdef CONFIG_64BIT +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; +#define MODULES_VADDR MODULES_VADDR +#define MODULES_END MODULES_END +#define MODULES_LEN (1UL << 31) +#endif + /* * A 31 bit pagetable entry of S390 has following format: * | PFRA | | OS | @@ -507,6 +514,15 @@ static inline int pmd_none(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL; } +static inline int pmd_large(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); +#else + return 0; +#endif +} + static inline int pmd_bad(pmd_t pmd) { unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 55ad134bced..f69f76b3447 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -71,8 +71,8 @@ extern unsigned int s390_user_mode; #define MACHINE_FLAG_DIAG9C (1UL << 7) #define MACHINE_FLAG_MVCOS (1UL << 8) #define MACHINE_FLAG_KVM (1UL << 9) -#define MACHINE_FLAG_HPAGE (1UL << 10) -#define MACHINE_FLAG_PFMF (1UL << 11) +#define MACHINE_FLAG_EDAT1 (1UL << 10) +#define MACHINE_FLAG_EDAT2 (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) @@ -84,6 +84,8 @@ extern unsigned int s390_user_mode; #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) +#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1 +#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1 #ifndef CONFIG_64BIT #define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) @@ -92,8 +94,8 @@ extern unsigned int s390_user_mode; #define MACHINE_HAS_DIAG44 (1) #define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) #define MACHINE_HAS_MVCOS (0) -#define MACHINE_HAS_HPAGE (0) -#define MACHINE_HAS_PFMF (0) +#define MACHINE_HAS_EDAT1 (0) +#define MACHINE_HAS_EDAT2 (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) @@ -105,8 +107,8 @@ extern unsigned int s390_user_mode; #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_MVPG (1) #define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) -#define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) -#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) +#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) +#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index aea451fd182..1c6a7f85a58 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -1,7 +1,7 @@ /* * ioctl interface for /dev/chsc * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2012 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> */ @@ -9,9 +9,12 @@ #define _ASM_CHSC_H #include <linux/types.h> +#include <linux/ioctl.h> #include <asm/chpid.h> #include <asm/schid.h> +#define CHSC_SIZE 0x1000 + struct chsc_async_header { __u16 length; __u16 code; @@ -23,15 +26,14 @@ struct chsc_async_header { struct chsc_async_area { struct chsc_async_header header; - __u8 data[PAGE_SIZE - 16 /* size of chsc_async_header */]; + __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; } __attribute__ ((packed)); - struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; - __u8 data[PAGE_SIZE - 8]; + __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; } __attribute__ ((packed)); struct chsc_chp_cd { diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 00d11444506..1f0eee9e7da 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -283,14 +283,6 @@ static noinline __init void setup_facility_list(void) ARRAY_SIZE(S390_lowcore.stfle_fac_list)); } -static noinline __init void setup_hpage(void) -{ - if (!test_facility(2) || !test_facility(8)) - return; - S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; - __ctl_set_bit(0, 23); -} - static __init void detect_mvpg(void) { #ifndef CONFIG_64BIT @@ -378,10 +370,14 @@ static __init void detect_diag44(void) static __init void detect_machine_facilities(void) { #ifdef CONFIG_64BIT + if (test_facility(8)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(8)) - S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; if (test_facility(27)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) @@ -484,7 +480,6 @@ void __init startup_init(void) detect_diag9c(); detect_diag44(); detect_machine_facilities(); - setup_hpage(); setup_topology(); sclp_facilities_detect(); detect_memory_layout(memory_chunk); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 7549985402f..8f211ad1c69 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -295,7 +295,7 @@ sysc_sigpending: jno sysc_return lmg %r2,%r7,__PT_R2(%r11) # load svc arguments lghi %r8,0 # svc 0 returns -ENOSYS - lh %r1,__PT_INT_CODE+2(%r11) # load new svc number + llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number cghi %r1,NR_syscalls jnl sysc_nr_ok # invalid svc number -> do svc 0 slag %r8,%r1,2 diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 805b6686b64..984726cbce1 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -52,7 +52,7 @@ __HEAD .long 0x02000370,0x60000050 # the channel program the PSW .long 0x020003c0,0x60000050 # at location 0 is loaded. .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0xf0 = iplstart. + .long 0x02000460,0x60000050 # at 0x200 = iplstart. .long 0x020004b0,0x60000050 .long 0x02000500,0x60000050 .long 0x02000550,0x60000050 @@ -62,11 +62,54 @@ __HEAD .long 0x02000690,0x60000050 .long 0x020006e0,0x20000050 - .org 0xf0 + .org 0x200 +# +# subroutine to set architecture mode +# +.Lsetmode: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + br %r14 + +# +# subroutine to wait for end I/O +# +.Lirqwait: +#ifdef CONFIG_64BIT + mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .quad 0x0000000080000000,.Lioint +#else + mvc 0x78(8),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .long 0x00080000,0x80000000+.Lioint +#endif +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + # # subroutine for loading cards from the reader # .Lloader: + la %r4,0(%r14) la %r3,.Lorb # r2 = address of orb into r2 la %r5,.Lirb # r4 = address of irb la %r6,.Lccws @@ -83,9 +126,7 @@ __HEAD ssch 0(%r3) # load chunk of 1600 bytes bnz .Llderr .Lwait4irq: - mvc 0x78(8),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: + bas %r14,.Lirqwait c %r1,0xb8 # compare subchannel number bne .Lwait4irq tsch 0(%r5) @@ -104,7 +145,7 @@ __HEAD sr %r0,%r3 # #ccws*80-residual=#bytes read ar %r2,%r0 - br %r14 # r2 contains the total size + br %r4 # r2 contains the total size .Lcont: ahi %r2,0x640 # add 0x640 to total size @@ -128,10 +169,6 @@ __HEAD .Lloadp:.long 0,0 .align 8 .Lcrash:.long 0x000a0000,0x00000000 -.Lnewpsw: - .long 0x00080000,0x80000000+.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint .align 8 .Lccws: .rept 19 @@ -140,6 +177,7 @@ __HEAD .long 0x02200050,0x00000000 iplstart: + bas %r14,.Lsetmode # Immediately switch to 64 bit mode lh %r1,0xb8 # test if subchannel number bct %r1,.Lnoload # is valid l %r1,0xb8 # load ipl subchannel number @@ -209,8 +247,8 @@ iplstart: # # reset files in VM reader # - stidp __LC_SAVE_AREA_SYNC # store cpuid - tm __LC_SAVE_AREA_SYNC,0xff# running VM ? + stidp .Lcpuid # store cpuid + tm .Lcpuid,0xff # running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 @@ -222,23 +260,14 @@ iplstart: tm 31(%r5),0xff # bits is set in the schib bz .Lnoreset .Lwaitforirq: - mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw -.Lwaitrdrirq: - lpsw .Lrdrwaitpsw -.Lrdrint: + bas %r14,.Lirqwait # wait for IO interrupt c %r1,0xb8 # compare subchannel number - bne .Lwaitrdrirq + bne .Lwaitforirq la %r5,.Lirb tsch 0(%r5) .Lnoreset: b .Lnoload - .align 8 -.Lrdrnewpsw: - .long 0x00080000,0x80000000+.Lrdrint -.Lrdrwaitpsw: - .long 0x020a0000,0x80000000+.Lrdrint - # # everything loaded, go for it # @@ -254,6 +283,8 @@ iplstart: .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ + .align 8 +.Lcpuid:.fill 8,1,0 # # SALIPL loader support. Based on a patch by Rob van der Heij. @@ -263,6 +294,7 @@ iplstart: .org 0x800 ENTRY(start) stm %r0,%r15,0x07b0 # store registers + bas %r14,.Lsetmode # Immediately switch to 64 bit mode basr %r12,%r0 .base: l %r11,.parm @@ -343,6 +375,18 @@ ENTRY(startup) ENTRY(startup_kdump) j .Lep_startup_kdump .Lep_startup_normal: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore @@ -410,22 +454,17 @@ ENTRY(startup_kdump) #endif #ifdef CONFIG_64BIT - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode + /* Continue with 64bit startup code in head64.S */ sam64 # switch to 64 bit mode - larl %r13,4f - lmh %r0,%r15,0(%r13) # clear high-order half jg startup_continue -4: .fill 16,4,0x0 #else - mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) + /* Continue with 31bit startup code in head31.S */ l %r13,4f-.LPG0(%r13) b 0(%r13) .align 8 4: .long startup_continue #endif + .align 8 5: .long 0x7fffffff,0xffffffff diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index a1372ae24ae..9a99856df1c 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -78,10 +78,7 @@ ENTRY(startup_continue) ENTRY(_ehead) -#ifdef CONFIG_SHARED_KERNEL .org 0x100000 - 0x11000 # head.o ends at 0x11000 -#endif - # # startup-code, running in absolute addressing mode # diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index c108af28bbe..b9e25ae2579 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -76,10 +76,7 @@ ENTRY(startup_continue) ENTRY(_ehead) -#ifdef CONFIG_SHARED_KERNEL .org 0x100000 - 0x11000 # head.o ends at 0x11000 -#endif - # # startup-code, running in absolute addressing mode # diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 46412b1d7e1..4610deafd95 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -44,6 +44,17 @@ #define PLT_ENTRY_SIZE 20 #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT +void *module_alloc(unsigned long size) +{ + if (PAGE_ALIGN(size) > MODULES_LEN) + return NULL; + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, -1, + __builtin_return_address(0)); +} +#endif + /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index afa9fdba200..b1f2be9aaaa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -105,6 +105,11 @@ EXPORT_SYMBOL(VMALLOC_END); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); +#ifdef CONFIG_64BIT +unsigned long MODULES_VADDR; +unsigned long MODULES_END; +#endif + /* An array with a pointer to the lowcore of every CPU. */ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); @@ -544,19 +549,23 @@ static void __init setup_memory_end(void) /* Choose kernel address space layout: 2, 3, or 4 levels. */ #ifdef CONFIG_64BIT - vmalloc_size = VMALLOC_END ?: 128UL << 30; + vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; if (tmp <= (1UL << 42)) vmax = 1UL << 42; /* 3-level kernel page table */ else vmax = 1UL << 53; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; #else vmalloc_size = VMALLOC_END ?: 96UL << 20; vmax = 1UL << 31; /* 2-level kernel page table */ -#endif /* vmalloc area is at the end of the kernel address space. */ VMALLOC_END = vmax; +#endif VMALLOC_START = vmax - vmalloc_size; /* Split remaining virtual space between 1:1 mapping & vmemmap array */ @@ -768,6 +777,40 @@ static void __init reserve_crashkernel(void) #endif } +static void __init init_storage_keys(unsigned long start, unsigned long end) +{ + unsigned long boundary, function, size; + + while (start < end) { + if (MACHINE_HAS_EDAT2) { + /* set storage keys for a 2GB frame */ + function = 0x22000 | PAGE_DEFAULT_KEY; + size = 1UL << 31; + boundary = (start + size) & ~(size - 1); + if (boundary <= end) { + do { + start = pfmf(function, start); + } while (start < boundary); + continue; + } + } + if (MACHINE_HAS_EDAT1) { + /* set storage keys for a 1MB frame */ + function = 0x21000 | PAGE_DEFAULT_KEY; + size = 1UL << 20; + boundary = (start + size) & ~(size - 1); + if (boundary <= end) { + do { + start = pfmf(function, start); + } while (start < boundary); + continue; + } + } + page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + start += PAGE_SIZE; + } +} + static void __init setup_memory(void) { unsigned long bootmap_size; @@ -846,9 +889,7 @@ static void __init setup_memory(void) memblock_add_node(PFN_PHYS(start_chunk), PFN_PHYS(end_chunk - start_chunk), 0); pfn = max(start_chunk, start_pfn); - for (; pfn < end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), - PAGE_DEFAULT_KEY, 0); + init_storage_keys(PFN_PHYS(pfn), PFN_PHYS(end_chunk)); } psw_set_key(PAGE_DEFAULT_KEY); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 0f5536b0c1a..1bea6d1f55a 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -7,3 +7,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o +obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c new file mode 100644 index 00000000000..cbc6668acb8 --- /dev/null +++ b/arch/s390/mm/dump_pagetables.c @@ -0,0 +1,226 @@ +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/sections.h> +#include <asm/pgtable.h> + +static unsigned long max_addr; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + IDENTITY_NR = 0, + KERNEL_START_NR, + KERNEL_END_NR, + VMEMMAP_NR, + VMALLOC_NR, +#ifdef CONFIG_64BIT + MODULES_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + [IDENTITY_NR] = {0, "Identity Mapping"}, + [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, + [VMEMMAP_NR] = {0, "vmemmap Area"}, + [VMALLOC_NR] = {0, "vmalloc Area"}, +#ifdef CONFIG_64BIT + [MODULES_NR] = {0, "Modules Area"}, +#endif + { -1, NULL } +}; + +struct pg_state { + int level; + unsigned int current_prot; + unsigned long start_address; + unsigned long current_address; + const struct addr_marker *marker; +}; + +static void print_prot(struct seq_file *m, unsigned int pr, int level) +{ + static const char * const level_name[] = + { "ASCE", "PGD", "PUD", "PMD", "PTE" }; + + seq_printf(m, "%s ", level_name[level]); + if (pr & _PAGE_INVALID) + seq_printf(m, "I\n"); + else + seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW"); +} + +static void note_page(struct seq_file *m, struct pg_state *st, + unsigned int new_prot, int level) +{ + static const char units[] = "KMGTPE"; + int width = sizeof(unsigned long) * 2; + const char *unit = units; + unsigned int prot, cur; + unsigned long delta; + + /* + * If we have a "break" in the series, we need to flush the state + * that we have now. "break" is either changing perms, levels or + * address space marker. + */ + prot = new_prot; + cur = st->current_prot; + + if (!st->level) { + /* First entry */ + st->current_prot = new_prot; + st->level = level; + st->marker = address_markers; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } else if (prot != cur || level != st->level || + st->current_address >= st->marker[1].start_address) { + /* Print the actual finished series */ + seq_printf(m, "0x%0*lx-0x%0*lx", + width, st->start_address, + width, st->current_address); + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 0x3ff) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + print_prot(m, st->current_prot, st->level); + if (st->current_address >= st->marker[1].start_address) { + st->marker++; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = st->current_address; + st->current_prot = new_prot; + st->level = level; + } +} + +/* + * The actual page table walker functions. In order to keep the implementation + * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO + * flags to note_page() if a region, segment or page table entry is invalid or + * read-only. + * After all it's just a hint that the current level being walked contains an + * invalid or read-only entry. + */ +static void walk_pte_level(struct seq_file *m, struct pg_state *st, + pmd_t *pmd, unsigned long addr) +{ + unsigned int prot; + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { + st->current_address = addr; + pte = pte_offset_kernel(pmd, addr); + prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); + note_page(m, st, prot, 4); + addr += PAGE_SIZE; + } +} + +static void walk_pmd_level(struct seq_file *m, struct pg_state *st, + pud_t *pud, unsigned long addr) +{ + unsigned int prot; + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { + st->current_address = addr; + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + if (pmd_large(*pmd)) { + prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO; + note_page(m, st, prot, 3); + } else + walk_pte_level(m, st, pmd, addr); + } else + note_page(m, st, _PAGE_INVALID, 3); + addr += PMD_SIZE; + } +} + +static void walk_pud_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { + st->current_address = addr; + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) + walk_pmd_level(m, st, pud, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += PUD_SIZE; + } +} + +static void walk_pgd_level(struct seq_file *m) +{ + unsigned long addr = 0; + struct pg_state st; + pgd_t *pgd; + int i; + + memset(&st, 0, sizeof(st)); + for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { + st.current_address = addr; + pgd = pgd_offset_k(addr); + if (!pgd_none(*pgd)) + walk_pud_level(m, &st, pgd, addr); + else + note_page(m, &st, _PAGE_INVALID, 1); + addr += PGDIR_SIZE; + } + /* Flush out the last page */ + st.current_address = max_addr; + note_page(m, &st, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + walk_pgd_level(m); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pt_dump_init(void) +{ + /* + * Figure out the maximum virtual address being accessible with the + * kernel ASCE. We need this to keep the page table walker functions + * from accessing non-existent entries. + */ +#ifdef CONFIG_32BIT + max_addr = 1UL << 31; +#else + max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; + max_addr = 1UL << (max_addr * 11 + 31); + address_markers[MODULES_NR].start_address = MODULES_VADDR; +#endif + address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; + address_markers[VMALLOC_NR].start_address = VMALLOC_START; + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + return 0; +} +device_initcall(pt_dump_init); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index b36537a5f43..00be01c4b4f 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -8,25 +8,38 @@ #include <asm/cacheflush.h> #include <asm/pgtable.h> +static pte_t *walk_page_table(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(addr); + if (pgd_none(*pgdp)) + return NULL; + pudp = pud_offset(pgdp, addr); + if (pud_none(*pudp)) + return NULL; + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp) || pmd_large(*pmdp)) + return NULL; + ptep = pte_offset_kernel(pmdp, addr); + if (pte_none(*ptep)) + return NULL; + return ptep; +} + static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { pte_t *ptep, pte; - pmd_t *pmdp; - pud_t *pudp; - pgd_t *pgdp; int i; for (i = 0; i < numpages; i++) { - pgdp = pgd_offset(&init_mm, addr); - pudp = pud_offset(pgdp, addr); - pmdp = pmd_offset(pudp, addr); - if (pmd_huge(*pmdp)) { - WARN_ON_ONCE(1); - continue; - } - ptep = pte_offset_kernel(pmdp, addr); - + ptep = walk_page_table(addr); + if (WARN_ON_ONCE(!ptep)) + break; pte = *ptep; pte = set(pte); __ptep_ipte(addr, ptep); @@ -40,21 +53,18 @@ int set_memory_ro(unsigned long addr, int numpages) change_page_attr(addr, numpages, pte_wrprotect); return 0; } -EXPORT_SYMBOL_GPL(set_memory_ro); int set_memory_rw(unsigned long addr, int numpages) { change_page_attr(addr, numpages, pte_mkwrite); return 0; } -EXPORT_SYMBOL_GPL(set_memory_rw); /* not possible */ int set_memory_nx(unsigned long addr, int numpages) { return 0; } -EXPORT_SYMBOL_GPL(set_memory_nx); int set_memory_x(unsigned long addr, int numpages) { |