diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 12:56:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 12:56:01 -0700 |
commit | aabded9c3aab5160ae2ca3dd1fa0fa37f3d510e4 (patch) | |
tree | 8544d546735bcb975b8dec296eb9b6dc6531fb2a /arch/powerpc | |
parent | 9a9136e270af14da506f66bcafcc506b86a86498 (diff) | |
parent | f1a1eb299a8422c3e8d41753095bec44b2493398 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
[POWERPC] Further fixes for the removal of 4level-fixup hack from ppc32
[POWERPC] EEH: log all PCI-X and PCI-E AER registers
[POWERPC] EEH: capture and log pci state on error
[POWERPC] EEH: Split up long error msg
[POWERPC] EEH: log error only after driver notification.
[POWERPC] fsl_soc: Make mac_addr const in fs_enet_of_init().
[POWERPC] Don't use SLAB/SLUB for PTE pages
[POWERPC] Spufs support for 64K LS mappings on 4K kernels
[POWERPC] Add ability to 4K kernel to hash in 64K pages
[POWERPC] Introduce address space "slices"
[POWERPC] Small fixes & cleanups in segment page size demotion
[POWERPC] iSeries: Make HVC_ISERIES the default
[POWERPC] iSeries: suppress build warning in lparmap.c
[POWERPC] Mark pages that don't exist as nosave
[POWERPC] swsusp: Introduce register_nosave_region_late
Diffstat (limited to 'arch/powerpc')
28 files changed, 1219 insertions, 714 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 808d2ef80e2..ccc5410af99 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -120,19 +120,6 @@ config GENERIC_BUG config SYS_SUPPORTS_APM_EMULATION bool -# -# Powerpc uses the slab allocator to manage its ptes and the -# page structs of ptes are used for splitting the page table -# lock for configurations supporting more than SPLIT_PTLOCK_CPUS. -# -# In that special configuration the page structs of slabs are modified. -# This setting disables the selection of SLUB as a slab allocator. -# -config ARCH_USES_SLAB_PAGE_STRUCT - bool - default y - depends on SPLIT_PTLOCK_CPUS <= NR_CPUS - config DEFAULT_UIMAGE bool help @@ -352,6 +339,11 @@ config PPC_STD_MMU_32 def_bool y depends on PPC_STD_MMU && PPC32 +config PPC_MM_SLICES + bool + default y if HUGETLB_PAGE + default n + config VIRT_CPU_ACCOUNTING bool "Deterministic task and CPU time accounting" depends on PPC64 @@ -541,9 +533,15 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES +config PPC_HAS_HASH_64K + bool + depends on PPC64 + default n + config PPC_64K_PAGES bool "64k page size" depends on PPC64 + select PPC_HAS_HASH_64K help This option changes the kernel logical page size to 64k. On machines without processor support for 64k pages, the kernel will simulate diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 37bc35e69db..2cb1d948779 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -122,12 +122,18 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); -#ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); - DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); -#endif /* CONFIG_HUGETLB_PAGE */ +#ifdef CONFIG_PPC_MM_SLICES + DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, + context.low_slices_psize)); + DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, + context.high_slices_psize)); + DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); + DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); +#else + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); + +#endif /* CONFIG_PPC_MM_SLICES */ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c index 584d1e3c013..af11285ffbd 100644 --- a/arch/powerpc/kernel/lparmap.c +++ b/arch/powerpc/kernel/lparmap.c @@ -10,7 +10,8 @@ #include <asm/pgtable.h> #include <asm/iseries/lpar_map.h> -const struct LparMap __attribute__((__section__(".text"))) xLparMap = { +/* The # is to stop gcc trying to make .text nonexecutable */ +const struct LparMap __attribute__((__section__(".text #"))) xLparMap = { .xNumberEsids = HvEsidsToMap, .xNumberRanges = HvRangesToMap, .xSegmentTableOffs = STAB0_PAGE, diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 38a81967ca0..4f839c6a976 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e64ce3eec36..4762ff7c14d 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -615,6 +615,9 @@ htab_pte_insert_failure: li r3,-1 b htab_bail +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K /***************************************************************************** * * @@ -870,7 +873,7 @@ ht64_pte_insert_failure: b ht64_bail -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_HAS_HASH_64K */ /***************************************************************************** diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 9b226fa7006..028ba4ed03d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,6 +51,7 @@ #include <asm/cputable.h> #include <asm/abs_addr.h> #include <asm/sections.h> +#include <asm/spu.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -419,7 +420,7 @@ static void __init htab_finish_init(void) extern unsigned int *htab_call_hpte_remove; extern unsigned int *htab_call_hpte_updatepp; -#ifdef CONFIG_PPC_64K_PAGES +#ifdef CONFIG_PPC_HAS_HASH_64K extern unsigned int *ht64_call_hpte_insert1; extern unsigned int *ht64_call_hpte_insert2; extern unsigned int *ht64_call_hpte_remove; @@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. */ -void demote_segment_4k(struct mm_struct *mm, unsigned long addr) -{ #ifdef CONFIG_PPC_64K_PAGES +static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +{ if (mm->context.user_psize == MMU_PAGE_4K) return; +#ifdef CONFIG_PPC_MM_SLICES + slice_set_user_psize(mm, MMU_PAGE_4K); +#else /* CONFIG_PPC_MM_SLICES */ mm->context.user_psize = MMU_PAGE_4K; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; - get_paca()->context = mm->context; - slb_flush_and_rebolt(); +#endif /* CONFIG_PPC_MM_SLICES */ + #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif -#endif } - -EXPORT_SYMBOL_GPL(demote_segment_4k); +#endif /* CONFIG_PPC_64K_PAGES */ /* Result code is: * 0 - handled @@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } vsid = get_vsid(mm->context.id, ea); +#ifdef CONFIG_PPC_MM_SLICES + psize = get_slice_psize(mm, ea); +#else psize = mm->context.user_psize; +#endif break; case VMALLOC_REGION_ID: mm = &init_mm; @@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; +#ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (unlikely(in_hugepage_area(mm->context, ea))) { + if (HPAGE_SHIFT && psize == mmu_huge_psize) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } +#endif /* CONFIG_HUGETLB_PAGE */ + +#ifndef CONFIG_PPC_64K_PAGES + /* If we use 4K pages and our psize is not 4K, then we are hitting + * a special driver mapping, we need to align the address before + * we fetch the PTE + */ + if (psize != MMU_PAGE_4K) + ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); +#endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ ptep = find_linux_pte(pgdir, ea); @@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } /* Do actual hashing */ -#ifndef CONFIG_PPC_64K_PAGES - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else +#ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ if (pte_val(*ptep) & _PAGE_4K_PFN) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) { - if (user_region) { - demote_segment_4k(mm, ea); - psize = MMU_PAGE_4K; - } else if (ea < VMALLOC_END) { - /* - * some driver did a non-cacheable mapping - * in vmalloc space, so switch vmalloc - * to 4k pages - */ - printk(KERN_ALERT "Reducing vmalloc segment " - "to 4kB pages because of " - "non-cacheable mapping\n"); - psize = mmu_vmalloc_psize = MMU_PAGE_4K; - } + /* If this PTE is non-cacheable and we have restrictions on + * using non cacheable large pages, then we switch to 4k + */ + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && + (pte_val(*ptep) & _PAGE_NO_CACHE)) { + if (user_region) { + demote_segment_4k(mm, ea); + psize = MMU_PAGE_4K; + } else if (ea < VMALLOC_END) { + /* + * some driver did a non-cacheable mapping + * in vmalloc space, so switch vmalloc + * to 4k pages + */ + printk(KERN_ALERT "Reducing vmalloc segment " + "to 4kB pages because of " + "non-cacheable mapping\n"); + psize = mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif } - if (user_region) { - if (psize != get_paca()->context.user_psize) { - get_paca()->context = mm->context; - slb_flush_and_rebolt(); - } - } else if (get_paca()->vmalloc_sllp != - mmu_psize_defs[mmu_vmalloc_psize].sllp) { - get_paca()->vmalloc_sllp = - mmu_psize_defs[mmu_vmalloc_psize].sllp; + } + if (user_region) { + if (psize != get_paca()->context.user_psize) { + get_paca()->context.user_psize = + mm->context.user_psize; slb_flush_and_rebolt(); } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_flush_and_rebolt(); } +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); else +#endif /* CONFIG_PPC_HAS_HASH_64K */ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#endif /* CONFIG_PPC_64K_PAGES */ #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); @@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long flags; int local = 0; - /* We don't want huge pages prefaulted for now - */ - if (unlikely(in_hugepage_area(mm->context, ea))) + BUG_ON(REGION_ID(ea) != USER_REGION_ID); + +#ifdef CONFIG_PPC_MM_SLICES + /* We only prefault standard pages for now */ + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); return; +#endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," " trap=%lx\n", mm, mm->pgd, ea, access, trap); - /* Get PTE, VSID, access mask */ + /* Get Linux PTE if available */ pgdir = mm->pgd; if (pgdir == NULL) return; ptep = find_linux_pte(pgdir, ea); if (!ptep) return; + +#ifdef CONFIG_PPC_64K_PAGES + /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on + * a 64K kernel), then we don't preload, hash_page() will take + * care of it once we actually try to access the page. + * That way we don't have to duplicate all of the logic for segment + * page size demotion here + */ + if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) + return; +#endif /* CONFIG_PPC_64K_PAGES */ + + /* Get VSID */ vsid = get_vsid(mm->context.id, ea); - /* Hash it in */ + /* Hash doesn't like irqs */ local_irq_save(flags); + + /* Is that local to this CPU ? */ mask = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(mm->cpu_vm_mask, mask)) local = 1; -#ifndef CONFIG_PPC_64K_PAGES - __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (mm->context.user_psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) - demote_segment_4k(mm, ea); - } + + /* Hash it in */ +#ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) __hash_page_64K(ea, access, vsid, ptep, trap, local); else - __hash_page_4K(ea, access, vsid, ptep, trap, local); #endif /* CONFIG_PPC_64K_PAGES */ + __hash_page_4K(ea, access, vsid, ptep, trap, local); + local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb959264c10..92a1b16fb7e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgd_t *pg; pud_t *pu; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pud_t *pu; hugepd_t *hpdp = NULL; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, start = addr; pgd = pgd_offset((*tlb)->mm, addr); do { - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); + BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(old); } -struct slb_flush_info { - struct mm_struct *mm; - u16 newareas; -}; - -static void flush_low_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i; - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - asm volatile("slbie %0" - : : "r" ((i << SID_SHIFT) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static void flush_high_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i, j; - - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_LOW_AREAS); - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << HTLB_AREA_SHIFT; - unsigned long end = (area+1) << HTLB_AREA_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_HIGH_AREAS); - - /* Hack, so that each addresses is controlled by exactly one - * of the high or low area bitmaps, the first high area starts - * at 4GB, not 0 */ - if (start == 0) - start = 0x100000000UL; - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - unsigned long i; - struct slb_flush_info fi; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); - - newareas &= ~(mm->context.low_htlb_areas); - if (! newareas) - return 0; /* The segments we want are already open */ - - for (i = 0; i < NUM_LOW_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_low_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.low_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_low_segments, &fi, 0, 1); - - return 0; -} - -static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - struct slb_flush_info fi; - unsigned long i; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) - != NUM_HIGH_AREAS); - - newareas &= ~(mm->context.high_htlb_areas); - if (! newareas) - return 0; /* The areas we want are already open */ - - for (i = 0; i < NUM_HIGH_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_high_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.high_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_high_segments, &fi, 0, 1); - - return 0; -} - -int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) -{ - int err = 0; - - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - - if (addr < 0x100000000UL) - err = open_low_hpage_areas(current->mm, - LOW_ESID_MASK(addr, len)); - if ((addr + len) > 0x100000000UL) - err = open_high_hpage_areas(current->mm, - HTLB_AREA_MASK(addr, len)); -#ifdef CONFIG_SPE_BASE - spu_flush_all_slbs(current->mm); -#endif - if (err) { - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", - addr, len, - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); - return err; - } - - return 0; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; - if (! in_hugepage_area(mm->context, address)) + if (get_slice_psize(mm, address) != mmu_huge_psize) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); @@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -/* Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. */ -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (((TASK_SIZE - len) >= addr) - && (!vma || (addr+len) <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - vma = find_vma(mm, addr); - while (TASK_SIZE - len >= addr) { - BUG_ON(vma && (addr >= vma->vm_end)); - - if (touches_hugepage_low_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (touches_hugepage_high_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - vma = vma->vm_next; - } - - /* Make sure we didn't miss any holes */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; -} - -/* - * This mmap-allocator allocates new areas top-down from below the - * stack's low limit (the base): - * - * Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. - */ -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* requested length too big for entire address space */ - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - /* dont allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - /* requesting a specific address */ - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & PAGE_MASK; - do { -hugepage_recheck: - if (touches_hugepage_low_range(mm, addr, len)) { - addr = (addr & ((~0) << SID_SHIFT)) - len; - goto hugepage_recheck; - } else if (touches_hugepage_high_range(mm, addr, len)) { - addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; - goto hugepage_recheck; - } - - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -static int htlb_check_hinted_area(unsigned long addr, unsigned long len) -{ - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || ((addr + len) <= vma->vm_start))) - return 0; - - return -ENOMEM; -} - -static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) -{ - unsigned long addr = 0; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= 0x100000000UL) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_low_range(addr, len, segmask)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); |