From 6252d702c5311ce916caf75ed82e5c8245171c92 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sat, 9 Feb 2008 18:24:37 +0100 Subject: [S390] dynamic page tables. Add support for different number of page table levels dependent on the highest address used for a process. This will cause a 31 bit process to use a two level page table instead of the four level page table that is the default after the pud has been introduced. Likewise a normal 64 bit process will use three levels instead of four. Only if a process runs out of the 4 tera bytes which can be addressed with a three level page table the fourth level is dynamically added. Then the process can use up to 8 peta byte. Signed-off-by: Martin Schwidefsky --- include/asm-s390/elf.h | 2 +- include/asm-s390/mmu.h | 1 + include/asm-s390/mmu_context.h | 8 ++++---- include/asm-s390/pgalloc.h | 24 +++++++++++++----------- include/asm-s390/pgtable.h | 38 +++++++++++++++++++++++++++++++------- include/asm-s390/processor.h | 25 +++---------------------- include/asm-s390/tlb.h | 10 ++++++++++ 7 files changed, 63 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/asm-s390/elf.h b/include/asm-s390/elf.h index b760cd4de38..b3ac262c458 100644 --- a/include/asm-s390/elf.h +++ b/include/asm-s390/elf.h @@ -138,7 +138,7 @@ typedef s390_regs elf_gregset_t; use of this is to invoke "./ld.so someprog" to test out a new version of the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) /* Wow, the "main" arch needs arch dependent functions too.. :) */ diff --git a/include/asm-s390/mmu.h b/include/asm-s390/mmu.h index 13ec4215f43..1698e29c5b2 100644 --- a/include/asm-s390/mmu.h +++ b/include/asm-s390/mmu.h @@ -5,6 +5,7 @@ typedef struct { struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; + unsigned long asce_limit; int noexec; } mm_context_t; diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index b3ea3e19992..b5a34c6f91a 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -18,9 +18,11 @@ static inline int init_new_context(struct task_struct *tsk, { mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; #ifdef CONFIG_64BIT - mm->context.asce_bits |= _ASCE_TYPE_REGION2; + mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif mm->context.noexec = s390_noexec; + mm->context.asce_limit = STACK_TOP_MAX; + crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; } @@ -47,13 +49,12 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) /* Load home space page table origin. */ asm volatile(LCTL_OPCODE" 13,13,%0" : : "m" (S390_lowcore.user_asce) ); + set_fs(current->thread.mm_segment); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - if (unlikely(prev == next)) - return; cpu_set(smp_processor_id(), next->cpu_vm_mask); update_mm(next, tsk); } @@ -65,7 +66,6 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); - set_fs(current->thread.mm_segment); } #endif /* __S390_MMU_CONTEXT_H */ diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index cc47dd65a49..f5b2bf3d7c1 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -73,9 +73,16 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) static inline unsigned long pgd_entry_type(struct mm_struct *mm) { + if (mm->context.asce_limit <= (1UL << 31)) + return _SEGMENT_ENTRY_EMPTY; + if (mm->context.asce_limit <= (1UL << 42)) + return _REGION3_ENTRY_EMPTY; return _REGION2_ENTRY_EMPTY; } +int crst_table_upgrade(struct mm_struct *, unsigned long limit); +void crst_table_downgrade(struct mm_struct *, unsigned long limit); + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm, mm->context.noexec); @@ -102,12 +109,12 @@ static inline void pgd_populate_kernel(struct mm_struct *mm, static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_t *shadow_pgd = get_shadow_table(pgd); - pud_t *shadow_pud = get_shadow_table(pud); - - if (shadow_pgd && shadow_pud) - pgd_populate_kernel(mm, shadow_pgd, shadow_pud); pgd_populate_kernel(mm, pgd, pud); + if (mm->context.noexec) { + pgd = get_shadow_table(pgd); + pud = get_shadow_table(pud); + pgd_populate_kernel(mm, pgd, pud); + } } static inline void pud_populate_kernel(struct mm_struct *mm, @@ -130,14 +137,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - unsigned long *crst; - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - crst = crst_table_alloc(mm, s390_noexec); - if (crst) - crst_table_init(crst, pgd_entry_type(mm)); - return (pgd_t *) crst; + return (pgd_t *) crst_table_alloc(mm, s390_noexec); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 8f473a71811..65154dc9a9e 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -421,36 +421,54 @@ static inline int pud_bad(pud_t pud) { return 0; } static inline int pgd_present(pgd_t pgd) { + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 1; return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pgd_none(pgd_t pgd) { + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 0; return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; } static inline int pgd_bad(pgd_t pgd) { + /* + * With dynamic page table levels the pgd can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ unsigned long mask = - ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pgd_val(pgd) & mask) != 0; } static inline int pud_present(pud_t pud) { + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 1; return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pud_none(pud_t pud) { + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 0; return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; } static inline int pud_bad(pud_t pud) { + /* + * With dynamic page table levels the pud can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ unsigned long mask = - ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pud_val(pud) & mask) != 0; } @@ -535,7 +553,8 @@ static inline int pte_young(pte_t pte) static inline void pgd_clear_kernel(pgd_t * pgd) { - pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; } static inline void pgd_clear(pgd_t * pgd) @@ -549,10 +568,11 @@ static inline void pgd_clear(pgd_t * pgd) static inline void pud_clear_kernel(pud_t *pud) { - pud_val(*pud) = _REGION3_ENTRY_EMPTY; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pud_val(*pud) = _REGION3_ENTRY_EMPTY; } -static inline void pud_clear(pud_t * pud) +static inline void pud_clear(pud_t *pud) { pud_t *shadow = get_shadow_table(pud); @@ -841,13 +861,17 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { - pud_t *pud = (pud_t *) pgd_deref(*pgd); + pud_t *pud = (pud_t *) pgd; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pud = (pud_t *) pgd_deref(*pgd); return pud + pud_index(address); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - pmd_t *pmd = (pmd_t *) pud_deref(*pud); + pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmd = (pmd_t *) pud_deref(*pud); return pmd + pmd_index(address); } diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 5a21f457d58..51d88912aa2 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -81,11 +81,12 @@ extern int get_cpu_capability(unsigned int *); #ifndef __s390x__ #define STACK_TOP (1UL << 31) +#define STACK_TOP_MAX (1UL << 31) #else /* __s390x__ */ -#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:53)) +#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) +#define STACK_TOP_MAX (1UL << 42) #endif /* __s390x__ */ -#define STACK_TOP_MAX STACK_TOP #endif @@ -142,8 +143,6 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#ifndef __s390x__ - #define start_thread(regs, new_psw, new_stackp) do { \ set_fs(USER_DS); \ regs->psw.mask = psw_user_bits; \ @@ -151,24 +150,6 @@ struct stack_frame { regs->gprs[15] = new_stackp ; \ } while (0) -#else /* __s390x__ */ - -#define start_thread(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw; \ - regs->gprs[15] = new_stackp; \ -} while (0) - -#define start_thread31(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw; \ - regs->gprs[15] = new_stackp; \ -} while (0) - -#endif /* __s390x__ */ - /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 9b2ddb7aac4..3d8a96d39d9 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -109,10 +109,15 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte) /* * pmd_free_tlb frees a pmd table and clears the CRSTE for the * segment table entry from the tlb. + * If the mm uses a two level page table the single pmd is freed + * as the pgd. pmd_free_tlb checks the asce_limit against 2GB + * to avoid the double free of the pmd in this case. */ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { #ifdef __s390x__ + if (tlb->mm->context.asce_limit <= (1UL << 31)) + return; if (!tlb->fullmm) { tlb->array[--tlb->nr_pxds] = pmd; if (tlb->nr_ptes >= tlb->nr_pxds) @@ -125,10 +130,15 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) /* * pud_free_tlb frees a pud table and clears the CRSTE for the * region third table entry from the tlb. + * If the mm uses a three level page table the single pud is freed + * as the pgd. pud_free_tlb checks the asce_limit against 4TB + * to avoid the double free of the pud in this case. */ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { #ifdef __s390x__ + if (tlb->mm->context.asce_limit <= (1UL << 42)) + return; if (!tlb->fullmm) { tlb->array[--tlb->nr_pxds] = pud; if (tlb->nr_ptes >= tlb->nr_pxds) -- cgit v1.2.3-18-g5258