diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-06 12:34:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-06 12:34:53 -0700 |
commit | 2bf1bef0d6c3636353938dd1f183a4522957fc35 (patch) | |
tree | 3d0aae8c8454e60b0e53f456b7e94127d91f9ccf /arch/s390 | |
parent | d7ab7302f970a254997687a1cdede421a5635c68 (diff) | |
parent | 996b4a7d8f4e5dd531369396f2312b97e9400cdc (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull more s390 updates from Martin Schwidefsky:
"This is the second batch of s390 patches for the 3.10 merge window.
Heiko improved the memory detection, this fixes kdump for large memory
sizes. Some kvm related memory management work, new ipldev/condev
keywords in cio and bug fixes."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
s390/mem_detect: remove artificial kdump memory types
s390/mm: add pte invalidation notifier for kvm
s390/zcrypt: ap bus rescan problem when toggle crypto adapters on/off
s390/memory hotplug,sclp: get rid of per memory increment usecount
s390/memory hotplug: provide memory_block_size_bytes() function
s390/mem_detect: limit memory detection loop to "mem=" parameter
s390/kdump,bootmem: fix bootmem allocator bitmap size
s390: get rid of odd global real_memory_size
s390/kvm: Change the virtual memory mapping location for Virtio devices
s390/zcore: calculate real memory size using own get_mem_size function
s390/mem_detect: add DAT sanity check
s390/mem_detect: fix lockdep irq tracing
s390/mem_detect: move memory detection code to mm folder
s390/zfcpdump: exploit new cio_ignore keywords
s390/cio: add condev keyword to cio_ignore
s390/cio: add ipldev keyword to cio_ignore
s390/uaccess: add "fallthrough" comments
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 66 | ||||
-rw-r--r-- | arch/s390/include/asm/setup.h | 9 | ||||
-rw-r--r-- | arch/s390/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/kernel/crash_dump.c | 6 | ||||
-rw-r--r-- | arch/s390/kernel/early.c | 1 | ||||
-rw-r--r-- | arch/s390/kernel/mem_detect.c | 145 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 65 | ||||
-rw-r--r-- | arch/s390/lib/uaccess_pt.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 11 | ||||
-rw-r--r-- | arch/s390/mm/mem_detect.c | 134 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 121 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 8 |
13 files changed, 356 insertions, 217 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b4622915bd1..4105b8221fd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -306,6 +306,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x00200000UL #define RCP_GR_BIT 0x00040000UL #define RCP_GC_BIT 0x00020000UL +#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x00008000UL @@ -373,6 +374,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x0020000000000000UL #define RCP_GR_BIT 0x0004000000000000UL #define RCP_GC_BIT 0x0002000000000000UL +#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x0000800000000000UL @@ -746,30 +748,42 @@ struct gmap { /** * struct gmap_rmap - reverse mapping for segment table entries - * @next: pointer to the next gmap_rmap structure in the list + * @gmap: pointer to the gmap_struct * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space */ struct gmap_rmap { struct list_head list; + struct gmap *gmap; unsigned long *entry; + unsigned long vmaddr; }; /** * struct gmap_pgtable - gmap information attached to a page table * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries maping a page table + * @mapper: list of segment table entries mapping a page table */ struct gmap_pgtable { unsigned long vmaddr; struct list_head mapper; }; +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + struct gmap *gmap_alloc(struct mm_struct *mm); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long length); + unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(unsigned long address, struct gmap *); unsigned long gmap_translate(unsigned long address, struct gmap *); @@ -777,6 +791,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & RCP_IN_BIT) { + pgste_val(pgste) &= ~RCP_IN_BIT; + gmap_do_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1032,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1052,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + pgste_t pgste; pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) - pgste_get_lock(ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1082,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pgste_t pgste; pte_t pte; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } pte = *ptep; __ptep_ipte(address, ptep); @@ -1111,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + if (!full) + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!full) @@ -1135,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); @@ -1160,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } __ptep_ipte(address, ptep); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index ff67d730c00..59880dbaf36 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -33,8 +33,6 @@ #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 -#define CHUNK_OLDMEM 4 -#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -43,13 +41,12 @@ struct mem_chunk { }; extern struct mem_chunk memory_chunk[]; -extern unsigned long real_memory_size; extern int memory_end_set; extern unsigned long memory_end; -void detect_memory_layout(struct mem_chunk chunk[]); -void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, - unsigned long size, int type); +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize); +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1386fcaf4ef..4bb2a465616 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -30,7 +30,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += dumpstack.o diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index fb8d8781a01..f703d91bf72 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -88,8 +88,8 @@ static struct mem_chunk *get_memory_layout(void) struct mem_chunk *chunk_array; chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + detect_memory_layout(chunk_array, 0); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); return chunk_array; } @@ -344,7 +344,7 @@ static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) for (i = 0; i < MEMORY_CHUNKS; i++) { mem_chunk = &chunk_array[i]; if (mem_chunk->size == 0) - break; + continue; if (chunk_array[i].type != CHUNK_READ_WRITE && chunk_array[i].type != CHUNK_READ_ONLY) continue; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index bda011e2f8a..dc8770d7173 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -482,7 +482,6 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_facilities_detect(); - detect_memory_layout(memory_chunk); #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c deleted file mode 100644 index 22d502e885e..00000000000 --- a/arch/s390/kernel/mem_detect.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/ipl.h> -#include <asm/sclp.h> -#include <asm/setup.h> - -#define ADDR2G (1ULL << 31) - -static void find_memory_chunks(struct mem_chunk chunk[]) -{ - unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; - - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); - memsize = rzm * rnmax; - if (!rzm) - rzm = 1ULL << 17; - if (sizeof(long) == 4) { - rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; - } - do { - size = 0; - type = tprot(addr); - do { - size += rzm; - if (memsize && addr + size >= memsize) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; - } - addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -void detect_memory_layout(struct mem_chunk chunk[]) -{ - unsigned long flags, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - flags = __arch_local_irq_stnsm(0xf8); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); - -/* - * Move memory chunks array from index "from" to index "to" - */ -static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) -{ - int cnt = MEMORY_CHUNKS - to; - - memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); -} - -/* - * Initialize memory chunk - */ -static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, - unsigned long size, int type) -{ - chunk->type = type; - chunk->addr = addr; - chunk->size = size; -} - -/* - * Create memory hole with given address, size, and type - */ -void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, - unsigned long size, int type) -{ - unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; - int i, ch_type; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (chunk[i].size == 0) - continue; - - /* Define chunk properties */ - ch_start = chunk[i].addr; - ch_size = chunk[i].size; - ch_end = ch_start + ch_size - 1; - ch_type = chunk[i].type; - - /* Is memory chunk hit by memory hole? */ - if (addr + size <= ch_start) - continue; /* No: memory hole in front of chunk */ - if (addr > ch_end) - continue; /* No: memory hole after chunk */ - - /* Yes: Define local hole properties */ - lh_start = max(addr, chunk[i].addr); - lh_end = min(addr + size - 1, ch_end); - lh_size = lh_end - lh_start + 1; - - if (lh_start == ch_start && lh_end == ch_end) { - /* Hole covers complete memory chunk */ - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - } else if (lh_end == ch_end) { - /* Hole starts in memory chunk and convers chunk end */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, - ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - i += 1; - } else if (lh_start == ch_start) { - /* Hole ends in memory chunk */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 1], lh_end + 1, - ch_size - lh_size, ch_type); - break; - } else { - /* Hole splits memory chunk */ - mem_chunk_move(chunk, i + 2, i); - mem_chunk_init(&chunk[i], ch_start, - lh_start - ch_start, ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 2], lh_end + 1, - ch_end - lh_end, ch_type); - break; - } - } -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0f419c5765c..0a49095104c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -226,25 +226,17 @@ static void __init conmode_default(void) } #ifdef CONFIG_ZFCPDUMP -static void __init setup_zfcpdump(unsigned int console_devno) +static void __init setup_zfcpdump(void) { - static char str[41]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; if (OLDMEM_BASE) return; - if (console_devno != -1) - sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, " cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(boot_command_line, str); + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} +static inline void setup_zfcpdump(void) {} #endif /* CONFIG_ZFCPDUMP */ /* @@ -471,14 +463,10 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_OLDMEM || - memory_chunk[i].type == CHUNK_CRASHK) - continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: - case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -510,12 +498,10 @@ static void __init setup_resources(void) } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; + unsigned long real_memory_size = 0; int i; @@ -525,7 +511,6 @@ static void __init setup_memory_end(void) memory_end_set = 1; } #endif - real_memory_size = 0; memory_end &= PAGE_MASK; /* @@ -538,6 +523,8 @@ static void __init setup_memory_end(void) unsigned long align; chunk = &memory_chunk[i]; + if (!chunk->size) + continue; align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); start = (chunk->addr + align - 1) & ~(align - 1); end = (chunk->addr + chunk->size) & ~(align - 1); @@ -588,6 +575,8 @@ static void __init setup_memory_end(void) for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; + if (!chunk->size) + continue; if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; @@ -688,15 +677,6 @@ static int __init verify_crash_base(unsigned long crash_base, } /* - * Reserve kdump memory by creating a memory hole in the mem_chunk array - */ -static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, - int type) -{ - create_mem_hole(memory_chunk, addr, size, type); -} - -/* * When kdump is enabled, we have to ensure that no memory from * the area [0 - crashkernel memory size] and * [crashk_res.start - crashk_res.end] is set offline. @@ -727,16 +707,22 @@ static struct notifier_block kdump_mem_nb = { static void reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP + unsigned long real_size = 0; + int i; + if (!OLDMEM_BASE) return; + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; - reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); - reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, - CHUNK_OLDMEM); - if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + real_size = max(real_size, chunk->addr + chunk->size); + } + create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); + create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); + if (OLDMEM_BASE + OLDMEM_SIZE == real_size) saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; else - saved_max_pfn = PFN_DOWN(real_memory_size) - 1; + saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -775,7 +761,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + create_mem_hole(memory_chunk, crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -847,11 +833,10 @@ static void __init setup_memory(void) * Register RAM areas with the bootmem allocator. */ - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + for (i = 0; i < MEMORY_CHUNKS; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE && - memory_chunk[i].type != CHUNK_CRASHK) + if (!memory_chunk[i].size) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -1067,12 +1052,12 @@ void __init setup_arch(char **cmdline_p) memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); parse_early_param(); - + detect_memory_layout(memory_chunk, memory_end); os_info_init(); setup_ipl(); + reserve_oldmem(); setup_memory_end(); setup_addressing_mode(); - reserve_oldmem(); reserve_crashkernel(); setup_memory(); setup_resources(); @@ -1097,5 +1082,5 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); } diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 466fb338396..50ea137a2d3 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -89,16 +89,19 @@ static unsigned long follow_table(struct mm_struct *mm, if (unlikely(*table & _REGION_ENTRY_INV)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); if (unlikely(*table & _SEGMENT_ENTRY_INV)) diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 640bea12303..839592ca265 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o -obj-y += page-states.o gup.o extable.o pageattr.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0b09b234230..89ebae4008f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/memory.h> #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> @@ -36,6 +37,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); @@ -214,6 +216,15 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int arch_remove_memory(u64 start, u64 size) { diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 00000000000..3cbd3b8bf31 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,134 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr = 0, size; + int i = 0, type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (sizeof(long) == 4) { + rzm = min(ADDR2G, rzm); + memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + } + if (maxsize) + memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize; + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (memsize && addr + size >= memsize) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (memsize && (addr + size > memsize)) + size = memsize - addr; + chunk[i].addr = addr; + chunk[i].size = size; + chunk[i].type = type; + i++; + } + addr += size; + } while (addr < memsize && i < MEMORY_CHUNKS); +} + +/** + * detect_memory_layout - fill mem_chunk array with memory layout data + * @chunk: mem_chunk array to be filled + * @maxsize: maximum address where memory detection should stop + * + * Fills the passed in memory chunk array with the memory layout of the + * machine. The array must have a size of at least MEMORY_CHUNKS and will + * be fully initialized afterwards. + * If the maxsize paramater has a value > 0 memory detection will stop at + * that address. It is guaranteed that all chunks have an ending address + * that is smaller than maxsize. + * If maxsize is 0 all memory will be detected. + */ +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long flags, flags_dat, cr0; + + memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); + /* + * Disable IRQs, DAT and low address protection so tprot does the + * right thing and we don't get scheduled away with low address + * protection disabled. + */ + local_irq_save(flags); + flags_dat = __arch_local_irq_stnsm(0xfb); + /* + * In case DAT was enabled, make sure chunk doesn't reside in vmalloc + * space. We have disabled DAT and any access to vmalloc area will + * cause an exception. + * If DAT was disabled we are called from early ipl code. + */ + if (test_bit(5, &flags_dat)) { + if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk))) + goto out; + } + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); + find_memory_chunks(chunk, maxsize); + __ctl_load(cr0, 0, 0); +out: + __arch_local_irq_ssm(flags_dat); + local_irq_restore(flags); +} +EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address and size. + */ +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size) +{ + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &mem_chunk[i]; + + if (chunk->size == 0) + continue; + if (addr > chunk->addr + chunk->size) + continue; + if (addr + size <= chunk->addr) + continue; + /* Split */ + if ((addr > chunk->addr) && + (addr + size < chunk->addr + chunk->size)) { + struct mem_chunk *new = chunk + 1; + + memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new)); + new->addr = addr + size; + new->size = chunk->addr + chunk->size - new->addr; + chunk->size = addr - chunk->addr; + continue; + } else if ((addr <= chunk->addr) && + (addr + size >= chunk->addr + chunk->size)) { + memset(chunk, 0 , sizeof(*chunk)); + } else if (addr + size < chunk->addr + chunk->size) { + chunk->size = chunk->addr + chunk->size - addr - size; + chunk->addr = addr + size; + } else if (addr > chunk->addr) { + chunk->size = addr - chunk->addr; + } + } +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index bd954e96f51..7805ddca833 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,9 +454,8 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(unsigned long segment, - unsigned long *segment_ptr, - struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) { unsigned long vmaddr; struct vm_area_struct *vma; @@ -491,7 +490,9 @@ static int gmap_connect_pgtable(unsigned long segment, /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; rmap->entry = segment_ptr; + rmap->vmaddr = address; spin_lock(&mm->page_table_lock); if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); @@ -553,7 +554,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) if (!(segment & _SEGMENT_ENTRY_RO)) /* Nothing mapped in the gmap address space. */ break; - rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); if (rc) return rc; } @@ -619,6 +620,118 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @address: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, + NULL, NULL) != 1) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= RCP_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 35837054f73..8b268fcc461 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -375,9 +375,8 @@ void __init vmem_map_init(void) ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; @@ -412,9 +411,6 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) - continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); |