diff options
-rw-r--r-- | arch/powerpc/Kconfig | 5 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 548 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 52 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 633 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 9 | ||||
-rw-r--r-- | include/asm-powerpc/mmu-hash64.h | 11 | ||||
-rw-r--r-- | include/asm-powerpc/paca.h | 2 | ||||
-rw-r--r-- | include/asm-powerpc/page_64.h | 86 |
13 files changed, 769 insertions, 635 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 808d2ef80e2..5226f701634 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -352,6 +352,11 @@ config PPC_STD_MMU_32 def_bool y depends on PPC_STD_MMU && PPC32 +config PPC_MM_SLICES + bool + default y if HUGETLB_PAGE + default n + config VIRT_CPU_ACCOUNTING bool "Deterministic task and CPU time accounting" depends on PPC64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8f48560b7ee..d6803fb7b28 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -122,12 +122,18 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); -#ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); - DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); -#endif /* CONFIG_HUGETLB_PAGE */ +#ifdef CONFIG_PPC_MM_SLICES + DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, + context.low_slices_psize)); + DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, + context.high_slices_psize)); + DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); + DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); +#else + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); + +#endif /* CONFIG_PPC_MM_SLICES */ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 38a81967ca0..4f839c6a976 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 71092c2f65c..5610ffb1421 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,6 +51,7 @@ #include <asm/cputable.h> #include <asm/abs_addr.h> #include <asm/sections.h> +#include <asm/spu.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -601,8 +602,13 @@ static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { if (mm->context.user_psize == MMU_PAGE_4K) return; +#ifdef CONFIG_PPC_MM_SLICES + slice_set_user_psize(mm, MMU_PAGE_4K); +#else /* CONFIG_PPC_MM_SLICES */ mm->context.user_psize = MMU_PAGE_4K; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; +#endif /* CONFIG_PPC_MM_SLICES */ + #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif @@ -670,11 +676,14 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; +#ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (unlikely(in_hugepage_area(mm->context, ea))) { + if (HPAGE_SHIFT && + unlikely(get_slice_psize(mm, ea) == mmu_huge_psize)) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } +#endif /* CONFIG_HUGETLB_PAGE */ /* Get PTE and page size from page tables */ ptep = find_linux_pte(pgdir, ea); @@ -770,10 +779,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long flags; int local = 0; - /* We don't want huge pages prefaulted for now - */ - if (unlikely(in_hugepage_area(mm->context, ea))) + BUG_ON(REGION_ID(ea) != USER_REGION_ID); + +#ifdef CONFIG_PPC_MM_SLICES + /* We only prefault standard pages for now */ + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); return; +#endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," " trap=%lx\n", mm, mm->pgd, ea, access, trap); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb959264c10..92a1b16fb7e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgd_t *pg; pud_t *pu; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pud_t *pu; hugepd_t *hpdp = NULL; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, start = addr; pgd = pgd_offset((*tlb)->mm, addr); do { - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); + BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(old); } -struct slb_flush_info { - struct mm_struct *mm; - u16 newareas; -}; - -static void flush_low_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i; - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - asm volatile("slbie %0" - : : "r" ((i << SID_SHIFT) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static void flush_high_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i, j; - - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_LOW_AREAS); - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << HTLB_AREA_SHIFT; - unsigned long end = (area+1) << HTLB_AREA_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_HIGH_AREAS); - - /* Hack, so that each addresses is controlled by exactly one - * of the high or low area bitmaps, the first high area starts - * at 4GB, not 0 */ - if (start == 0) - start = 0x100000000UL; - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - unsigned long i; - struct slb_flush_info fi; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); - - newareas &= ~(mm->context.low_htlb_areas); - if (! newareas) - return 0; /* The segments we want are already open */ - - for (i = 0; i < NUM_LOW_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_low_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.low_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_low_segments, &fi, 0, 1); - - return 0; -} - -static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - struct slb_flush_info fi; - unsigned long i; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) - != NUM_HIGH_AREAS); - - newareas &= ~(mm->context.high_htlb_areas); - if (! newareas) - return 0; /* The areas we want are already open */ - - for (i = 0; i < NUM_HIGH_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_high_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.high_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_high_segments, &fi, 0, 1); - - return 0; -} - -int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) -{ - int err = 0; - - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - - if (addr < 0x100000000UL) - err = open_low_hpage_areas(current->mm, - LOW_ESID_MASK(addr, len)); - if ((addr + len) > 0x100000000UL) - err = open_high_hpage_areas(current->mm, - HTLB_AREA_MASK(addr, len)); -#ifdef CONFIG_SPE_BASE - spu_flush_all_slbs(current->mm); -#endif - if (err) { - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", - addr, len, - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); - return err; - } - - return 0; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; - if (! in_hugepage_area(mm->context, address)) + if (get_slice_psize(mm, address) != mmu_huge_psize) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); @@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -/* Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. */ -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (((TASK_SIZE - len) >= addr) - && (!vma || (addr+len) <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - vma = find_vma(mm, addr); - while (TASK_SIZE - len >= addr) { - BUG_ON(vma && (addr >= vma->vm_end)); - - if (touches_hugepage_low_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (touches_hugepage_high_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - vma = vma->vm_next; - } - - /* Make sure we didn't miss any holes */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; -} - -/* - * This mmap-allocator allocates new areas top-down from below the - * stack's low limit (the base): - * - * Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. - */ -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* requested length too big for entire address space */ - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - /* dont allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - /* requesting a specific address */ - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & PAGE_MASK; - do { -hugepage_recheck: - if (touches_hugepage_low_range(mm, addr, len)) { - addr = (addr & ((~0) << SID_SHIFT)) - len; - goto hugepage_recheck; - } else if (touches_hugepage_high_range(mm, addr, len)) { - addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; - goto hugepage_recheck; - } - - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -static int htlb_check_hinted_area(unsigned long addr, unsigned long len) -{ - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || ((addr + len) <= vma->vm_start))) - return 0; - - return -ENOMEM; -} - -static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) -{ - unsigned long addr = 0; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= 0x100000000UL) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_low_range(addr, len, segmask)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} - -static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) -{ - unsigned long addr = 0x100000000UL; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= TASK_SIZE_USER64) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_high_range(addr, len, areamask)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - int lastshift; - u16 areamask, curareas; - - if (HPAGE_SHIFT == 0) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) - return -EINVAL; - - /* Paranoia, caller should have dealt with this */ - BUG_ON((addr + len) < addr); - - /* Handle MAP_FIXED */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) - return -EINVAL; - return addr; - } - - if (test_thread_flag(TIF_32BIT)) { - curareas = current->mm->context.low_htlb_areas; - - /* First see if we can use the hint address */ - if (addr && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = LOW_ESID_MASK(addr, len); - if (open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing low areas */ - addr = htlb_get_low_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_low_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } else { - curareas = current->mm->context.high_htlb_areas; - - /* First see if we can use the hint address */ - /* We discourage 64-bit processes from doing hugepage - * mappings below 4GB (must use MAP_FIXED) */ - if ((addr >= 0x100000000UL) - && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = HTLB_AREA_MASK(addr, len); - if (open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing high areas */ - addr = htlb_get_high_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_high_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough areas\n"); - return -ENOMEM; + return slice_get_unmapped_area(addr, len, flags, + mmu_huge_psize, 1, 0); } /* diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c index 90a06ac02d5..7a78cdc0515 100644 --- a/arch/powerpc/mm/mmu_context_64.c +++ b/arch/powerpc/mm/mmu_context_64.c @@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; int err; + int new_context = (mm->context.id == 0); again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) @@ -50,9 +51,18 @@ again: } mm->context.id = index; +#ifdef CONFIG_PPC_MM_SLICES + /* The old code would re-promote on fork, we don't do that + * when using slices as it could cause problem promoting slices + * that have been forced down to 4K + */ + if (new_context) + slice_set_user_psize(mm, mmu_virtual_psize); +#else mm->context.user_psize = mmu_virtual_psize; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[mmu_virtual_psize].sllp; +#endif return 0; } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 224e960650a..304375a7357 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -198,12 +198,6 @@ void slb_initialize(void) static int slb_encoding_inited; extern unsigned int *slb_miss_kernel_load_linear; extern unsigned int *slb_miss_kernel_load_io; -#ifdef CONFIG_HUGETLB_PAGE - extern unsigned int *slb_miss_user_load_huge; - unsigned long huge_llp; - - huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; -#endif /* Prepare our SLB miss handler based on our page size */ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -220,11 +214,6 @@ void slb_initialize(void) DBG("SLB: linear LLP = %04x\n", linear_llp); DBG("SLB: io LLP = %04x\n", io_llp); -#ifdef CONFIG_HUGETLB_PAGE - patch_slb_encoding(slb_miss_user_load_huge, - SLB_VSID_USER | huge_llp); - DBG("SLB: huge LLP = %04x\n", huge_llp); -#endif } get_paca()->stab_rr = SLB_NUM_BOLTED; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b10e4707d7c..cd1a93d4948 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io) srdi. r9,r10,USER_ESID_BITS bne- 8f /* invalid ea bits set */ - /* Figure out if the segment contains huge pages */ -#ifdef CONFIG_HUGETLB_PAGE -BEGIN_FTR_SECTION - b 1f -END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) + + /* when using slices, we extract the psize off the slice bitmaps + * and then we need to get the sllp encoding off the mmu_psize_defs + * array. + * + * XXX This is a bit inefficient especially for the normal case, + * so we should try to implement a fast path for the standard page + * size using the old sllp value so we avoid the array. We cannot + * really do dynamic patching unfortunately as processes might flip + * between 4k and 64k standard page size + */ +#ifdef CONFIG_PPC_MM_SLICES cmpldi r10,16 - lhz r9,PACALOWHTLBAREAS(r13) - mr r11,r10 + /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ + ld r9,PACALOWSLICESPSIZE(r13) + sldi r11,r10,2 blt 5f + ld r9,PACAHIGHSLICEPSIZE(r13) + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) + andi. r11,r11,0x3c - lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) - -5: srd r9,r9,r11 - andi. r9,r9,1 - beq 1f -_GLOBAL(slb_miss_user_load_huge) - li r11,0 - b 2f -1: -#endif /* CONFIG_HUGETLB_PAGE */ +5: /* Extract the psize and multiply to get an array offset */ + srd r9,r9,r11 + andi. r9,r9,0xf + mulli r9,r9,MMUPSIZEDEFSIZE + /* Now get to the array and obtain the sllp + */ + ld r11,PACATOC(r13) + ld r11,mmu_psize_defs@got(r11) + add r11,r11,r9 + ld r11,MMUPSIZESLLP(r11) + ori r11,r11,SLB_VSID_USER +#else + /* paca context sllp already contains the SLB_VSID_USER bits */ lhz r11,PACACONTEXTSLLP(r13) -2: +#endif /* CONFIG_PPC_MM_SLICES */ + ld r9,PACACONTEXTID(r13) rldimi r10,r9,USER_ESID_BITS,0 b slb_finish_load diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c new file mode 100644 index 00000000000..f833dba2a02 --- /dev/null +++ b/arch/powerpc/mm/slice.c @@ -0,0 +1,633 @@ +/* + * address space "slices" (meta-segments) support + * + * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. + * + * Based on hugetlb implementation + * + * Copyright (C) 2003 David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/mman.h> +#include <asm/mmu.h> +#include <asm/spu.h> + +static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; + + +#ifdef DEBUG +int _slice_debug = 1; + +static void slice_print_mask(const char *label, struct slice_mask mask) +{ + char *p, buf[16 + 3 + 16 + 1]; + int i; + + if (!_slice_debug) + return; + p = buf; + for (i = 0; i < SLICE_NUM_LOW; i++) + *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; + *(p++) = ' '; + *(p++) = '-'; + *(p++) = ' '; + for (i = 0; i < SLICE_NUM_HIGH; i++) + *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; + *(p++) = 0; + + printk(KERN_DEBUG "%s:%s\n", label, buf); +} + +#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) + +#else + +static void slice_print_mask(const char *label, struct slice_mask mask) {} +#define slice_dbg(fmt...) + +#endif + +static struct slice_mask slice_range_to_mask(unsigned long start, + unsigned long len) +{ + unsigned long end = start + len - 1; + struct slice_mask ret = { 0, 0 }; + + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, SLICE_LOW_TOP); + unsigned long mstart = min(start, SLICE_LOW_TOP); + + ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(mstart)); + } + + if ((start + len) > SLICE_LOW_TOP) + ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) + - (1u << GET_HIGH_SLICE_INDEX(start)); + + return ret; +} + +static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct vm_area_struct *vma; + + if ((mm->task_size - len) < addr) + return 0; + vma = find_vma(mm, addr); + return (!vma || (addr + len) <= vma->vm_start); +} + +static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) +{ + return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, + 1ul << SLICE_LOW_SHIFT); +} + +static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) +{ + unsigned long start = slice << SLICE_HIGH_SHIFT; + unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); + + /* Hack, so that each addresses is controlled by exactly one + * of the high or low area bitmaps, the first high area starts + * at 4GB, not 0 */ + if (start == 0) + start = SLICE_LOW_TOP; + + return !slice_area_is_free(mm, start, end - start); +} + +static struct slice_mask slice_mask_for_free(struct mm_struct *mm) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + + for (i = 0; i < SLICE_NUM_LOW; i++) + if (!slice_low_has_vma(mm, i)) + ret.low_slices |= 1u << i; + + if (mm->task_size <= SLICE_LOW_TOP) + return ret; + + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (!slice_high_has_vma(mm, i)) + ret.high_slices |= 1u << i; + + return ret; +} + +static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + u64 psizes; + + psizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.low_slices |= 1u << i; + + psizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.high_slices |= 1u << i; + + return ret; +} + +static int slice_check_fit(struct slice_mask mask, struct slice_mask available) +{ + return (mask.low_slices & available.low_slices) == mask.low_slices && + (mask.high_slices & available.high_slices) == mask.high_slices; +} + +static void slice_flush_segments(void *parm) +{ + struct mm_struct *mm = parm; + unsigned long flags; + + if (mm != current->active_mm) + return; + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + local_irq_save(flags); + slb_flush_and_rebolt(); + local_irq_restore(flags); +} + +static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +{ + /* Write the new slice psize bits */ + u64 lpsizes, hpsizes; + unsigned long i, flags; + + slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); + slice_print_mask(" mask", mask); + + /* We need to use a spinlock here to protect against + * concurrent 64k -> 4k demotion ... + */ + spin_lock_irqsave(&slice_convert_lock, flags); + + lpsizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (mask.low_slices & (1u << i))< |