diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /arch/powerpc/mm | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 43 | ||||
-rw-r--r-- | arch/powerpc/mm/gup.c | 12 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-book3e.c | 121 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 401 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 41 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 75 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash32.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash64.c | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash32.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 230 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 132 |
24 files changed, 984 insertions, 203 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 024acab588f..f60e006d90c 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -186,10 +186,11 @@ void __init MMU_init_hw(void) unsigned long __init mmu_mapin_ram(unsigned long top) { unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S */ - for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; addr += PPC_PIN_SIZE) { if (mmu_has_feature(MMU_FTR_TYPE_47x)) ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); @@ -218,19 +219,25 @@ unsigned long __init mmu_mapin_ram(unsigned long top) void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { + u64 size; + +#ifndef CONFIG_RELOCATABLE /* We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); +#endif /* 44x has a 256M TLB entry pinned at boot */ - memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); + size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE)); + memblock_set_current_limit(first_memblock_base + size); } #ifdef CONFIG_SMP void __cpuinit mmu_init_secondary(int cpu) { unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S @@ -241,7 +248,7 @@ void __cpuinit mmu_init_secondary(int cpu) * stack. current (r2) isn't initialized, smp_processor_id() * will not work, current thread info isn't accessible, ... */ - for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; addr += PPC_PIN_SIZE) { if (mmu_has_feature(MMU_FTR_TYPE_47x)) ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index bdca46e0838..991ee813d2a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o ifeq ($(CONFIG_HUGETLB_PAGE),y) obj-y += hugetlbpage.o obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o +obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o endif obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b42f76c4948..329be36c0a8 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -30,6 +30,7 @@ #include <linux/types.h> #include <linux/highmem.h> #include <linux/dma-mapping.h> +#include <linux/export.h> #include <asm/tlbflush.h> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ad35f66c69e..5efe8c96d37 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -320,7 +320,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -331,7 +331,7 @@ good_area: #endif } else { current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index f7802c8bba0..66a6fd38e9c 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -101,17 +101,17 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) /* * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; - * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus - * that support extended page sizes). Note that while some cpus support a - * page size of 4G, we don't allow its use here. + * in particular size must be a power of 4 between 4k and the max supported by + * an implementation; max may further be limited by what can be represented in + * an unsigned long (for example, 32-bit implementations cannot support a 4GB + * size). */ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long size, unsigned long flags, unsigned int pid) { - unsigned int tsize, lz; + unsigned int tsize; - asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size)); - tsize = 21 - lz; + tsize = __ilog2(size) - 10; #ifdef CONFIG_SMP if ((flags & _PAGE_NO_CACHE) == 0) @@ -146,29 +146,36 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, loadcam_entry(index); } +unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys) +{ + unsigned int camsize = __ilog2(ram) & ~1U; + unsigned int align = __ffs(virt | phys) & ~1U; + unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; + + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = max_cam * 2 + 10; + + if (camsize > align) + camsize = align; + if (camsize > max_cam) + camsize = max_cam; + + return 1UL << camsize; +} + unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) { int i; unsigned long virt = PAGE_OFFSET; phys_addr_t phys = memstart_addr; unsigned long amount_mapped = 0; - unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; - - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = max_cam * 2 + 10; /* Calculate CAM values */ for (i = 0; ram && i < max_cam_idx; i++) { - unsigned int camsize = __ilog2(ram) & ~1U; - unsigned int align = __ffs(virt | phys) & ~1U; unsigned long cam_sz; - if (camsize > align) - camsize = align; - if (camsize > max_cam) - camsize = max_cam; - - cam_sz = 1UL << camsize; + cam_sz = calc_cam_sz(ram, virt, phys); settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); ram -= cam_sz; diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index fec13200868..d7efdbf640c 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -16,16 +16,6 @@ #ifdef __HAVE_ARCH_PTE_SPECIAL -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); -} - /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -57,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(page); return 0; } - if (PageTail(page)) - get_huge_page_tail(page); pages[*nr] = page; (*nr)++; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index dfd764896db..90039bc6411 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_RAW_SPINLOCK(native_tlbie_lock); +DEFINE_RAW_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { @@ -51,7 +51,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va &= ~0xffful; va |= ssize << 8; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; default: @@ -61,7 +61,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va |= ssize << 8; va |= 1; /* L */ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 26b2872b3d0..2d282186cb4 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/sysctl.h> +#include <linux/export.h> #include <linux/ctype.h> #include <linux/cache.h> #include <linux/init.h> @@ -105,9 +106,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); -#ifdef CONFIG_HUGETLB_PAGE -unsigned int HPAGE_SHIFT; -#endif #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif @@ -534,11 +532,11 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -void create_section_mapping(unsigned long start, unsigned long end) +int create_section_mapping(unsigned long start, unsigned long end) { - BUG_ON(htab_bolt_mapping(start, end, __pa(start), + return htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, - mmu_kernel_ssize)); + mmu_kernel_ssize); } int remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c new file mode 100644 index 00000000000..343ad0b8726 --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -0,0 +1,121 @@ +/* + * PPC Huge TLB Page Support for Book3E MMU + * + * Copyright (C) 2009 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor + * + */ +#include <linux/mm.h> +#include <linux/hugetlb.h> + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} + +static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) +{ + int found = 0; + + mtspr(SPRN_MAS6, pid << 16); + if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { + asm volatile( + "li %0,0\n" + "tlbsx. 0,%1\n" + "bne 1f\n" + "li %0,1\n" + "1:\n" + : "=&r"(found) : "r"(ea)); + } else { + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); + } + + return found; +} + +void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) +{ + unsigned long mas1, mas2; + u64 mas7_3; + unsigned long psize, tsize, shift; + unsigned long flags; + +#ifdef CONFIG_PPC_FSL_BOOK3E + int index, lz, ncams; + struct vm_area_struct *vma; +#endif + + if (unlikely(is_kernel_addr(ea))) + return; + +#ifdef CONFIG_PPC_MM_SLICES + psize = mmu_get_tsize(get_slice_psize(mm, ea)); + tsize = mmu_get_psize(psize); + shift = mmu_psize_defs[psize].shift; +#else + vma = find_vma(mm, ea); + psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */ + asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize)); + shift = 31 - lz; + tsize = 21 - lz; +#endif + + /* + * We can't be interrupted while we're setting up the MAS + * regusters or after we've confirmed that no tlb exists. + */ + local_irq_save(flags); + + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { + local_irq_restore(flags); + return; + } + +#ifdef CONFIG_PPC_FSL_BOOK3E + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* We have to use the CAM(TLB1) on FSL parts for hugepages */ + index = __get_cpu_var(next_tlbcam_idx); + mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + else + __get_cpu_var(next_tlbcam_idx)++; +#endif + mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); + mas2 = ea & ~((1UL << shift) - 1); + mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; + mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; + mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; + if (!pte_dirty(pte)) + mas7_3 &= ~(MAS3_SW|MAS3_UW); + + mtspr(SPRN_MAS1, mas1); + mtspr(SPRN_MAS2, mas2); + + if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { + mtspr(SPRN_MAS7_MAS3, mas7_3); + } else { + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); + } + + asm volatile ("tlbwe"); + + local_irq_restore(flags); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct hstate *hstate = hstate_file(vma->vm_file); + unsigned long tsize = huge_page_shift(hstate) - 10; + + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0); + +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1901b..8558b572e55 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1,7 +1,8 @@ /* - * PPC64 (POWER4) Huge TLB Page Support for Kernel. + * PPC Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> @@ -11,24 +12,40 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/hugetlb.h> +#include <linux/of_fdt.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/moduleparam.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> +#include <asm/setup.h> #define PAGE_SHIFT_64K 16 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 -#define MAX_NUMBER_GPAGES 1024 +unsigned int HPAGE_SHIFT; -/* Tracks the 16G pages after the device tree is scanned and before the - * huge_boot_pages list is ready. */ -static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; +/* + * Tracks gpages after the device tree is scanned and before the + * huge_boot_pages list is ready. On 64-bit implementations, this is + * just used to track 16G pages and so is a single array. 32-bit + * implementations may have more than one gpage size due to limitations + * of the memory allocators, so we need multiple arrays + */ +#ifdef CONFIG_PPC64 +#define MAX_NUMBER_GPAGES 1024 +static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; - -/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() - * will choke on pointers to hugepte tables, which is handy for - * catching screwups early. */ +#else +#define MAX_NUMBER_GPAGES 128 +struct psize_gpages { + u64 gpage_list[MAX_NUMBER_GPAGES]; + unsigned int nr_gpages; +}; +static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; +#endif static inline int shift_to_mmu_psize(unsigned int shift) { @@ -49,25 +66,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #define hugepd_none(hpd) ((hpd).pd == 0) -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return hpd.pd & HUGEPD_SHIFT_MASK; -} - -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) -{ - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); - pte_t *dir = hugepd_page(*hpdp); - - return dir + idx; -} - pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) { pgd_t *pg; @@ -93,7 +91,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (is_hugepd(pm)) hpdp = (hugepd_t *)pm; else if (!pmd_none(*pm)) { - return pte_offset_map(pm, ea); + return pte_offset_kernel(pm, ea); } } } @@ -114,8 +112,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, unsigned long address, unsigned pdshift, unsigned pshift) { - pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), - GFP_KERNEL|__GFP_REPEAT); + struct kmem_cache *cachep; + pte_t *new; + +#ifdef CONFIG_PPC64 + cachep = PGT_CACHE(pdshift - pshift); +#else + int i; + int num_hugepd = 1 << (pshift - pdshift); + cachep = hugepte_cache; +#endif + + new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -124,10 +132,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return -ENOMEM; spin_lock(&mm->page_table_lock); +#ifdef CONFIG_PPC64 if (!hugepd_none(*hpdp)) - kmem_cache_free(PGT_CACHE(pdshift - pshift), new); + kmem_cache_free(cachep, new); else - hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; +#else + /* + * We have multiple higher-level entries that point to the same + * actual pte location. Fill in each as we go and backtrack on error. + * We need all of these so the DTLB pgtable walk code can find the + * right higher-level entry without knowing if it's a hugepage or not. + */ + for (i = 0; i < num_hugepd; i++, hpdp++) { + if (unlikely(!hugepd_none(*hpdp))) + break; + else + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; + } + /* If we bailed from the for loop early, an error occurred, clean up */ + if (i < num_hugepd) { + for (i = i - 1 ; i >= 0; i--, hpdp--) + hpdp->pd = 0; + kmem_cache_free(cachep, new); + } +#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -169,11 +198,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } +#ifdef CONFIG_PPC32 +/* Build list of addresses of gigantic pages. This function is used in early + * boot before the buddy or bootmem allocator is setup. + */ +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) +{ + unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); + int i; + + if (addr == 0) + return; + + gpage_freearray[idx].nr_gpages = number_of_pages; + + for (i = 0; i < number_of_pages; i++) { + gpage_freearray[idx].gpage_list[i] = addr; + addr += page_size; + } +} + +/* + * Moves the gigantic page addresses from the temporary list to the + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) +{ + struct huge_bootmem_page *m; + int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int nr_gpages = gpage_freearray[idx].nr_gpages; + + if (nr_gpages == 0) + return 0; + +#ifdef CONFIG_HIGHMEM + /* + * If gpages can be in highmem we can't use the trick of storing the + * data structure in the page; allocate space for this + */ + m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; +#else + m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); +#endif + + list_add(&m->list, &huge_boot_pages); + gpage_freearray[idx].nr_gpages = nr_gpages; + gpage_freearray[idx].gpage_list[nr_gpages] = 0; + m->hstate = hstate; + + return 1; +} +/* + * Scan the command line hugepagesz= options for gigantic pages; store those in + * a list that we use to allocate the memory once all options are parsed. + */ + +unsigned long gpage_npages[MMU_PAGE_COUNT]; + +static int __init do_gpage_early_setup(char *param, char *val) +{ + static phys_addr_t size; + unsigned long npages; + + /* + * The hugepagesz and hugepages cmdline options are interleaved. We + * use the size variable to keep track of whether or not this was done + * properly and skip over instances where it is incorrect. Other + * command-line parsing code will issue warnings, so we don't need to. + * + */ + if ((strcmp(param, "default_hugepagesz") == 0) || + (strcmp(param, "hugepagesz") == 0)) { + size = memparse(val, NULL); + } else if (strcmp(param, "hugepages") == 0) { + if (size != 0) { + if (sscanf(val, "%lu", &npages) <= 0) + npages = 0; + gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; + size = 0; + } + } + return 0; +} + + +/* + * This function allocates physical space for pages that are larger than the + * buddy allocator can handle. We want to allocate these in highmem because + * the amount of lowmem is limited. This means that this function MUST be + * called before lowmem_end_addr is set up in MMU_init() in order for the lmb + * allocate to grab highmem. + */ +void __init reserve_hugetlb_gpages(void) +{ + static __initdata char cmdline[COMMAND_LINE_SIZE]; + phys_addr_t size, base; + int i; + + strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); + + /* + * Walk gpage list in reverse, allocating larger page sizes first. + * Skip over unsupported sizes, or sizes that have 0 gpages allocated. + * When we reach the point in the list where pages are no longer + * considered gpages, we're done. + */ + for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { + if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) + continue; + else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) + break; + + size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); + base = memblock_alloc_base(size * gpage_npages[i], size, + MEMBLOCK_ALLOC_ANYWHERE); + add_gpage(base, size, gpage_npages[i]); + } +} + +#else /* PPC64 */ + /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy or bootmem allocator is setup. */ -void add_gpage(unsigned long addr, unsigned long page_size, - unsigned long number_of_pages) +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { if (!addr) return; @@ -199,19 +349,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } +#endif int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } +#ifdef CONFIG_PPC32 +#define HUGEPD_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) + +struct hugepd_freelist { + struct rcu_head rcu; + unsigned int index; + void *ptes[0]; +}; + +static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); + +static void hugepd_free_rcu_callback(struct rcu_head *head) +{ + struct hugepd_freelist *batch = + container_of(head, struct hugepd_freelist, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + kmem_cache_free(hugepte_cache, batch->ptes[i]); + + free_page((unsigned long)batch); +} + +static void hugepd_free(struct mmu_gather *tlb, void *hugepte) +{ + struct hugepd_freelist **batchp; + + batchp = &__get_cpu_var(hugepd_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpumask_equal(mm_cpumask(tlb->mm), + cpumask_of(smp_processor_id()))) { + kmem_cache_free(hugepte_cache, hugepte); + return; + } + + if (*batchp == NULL) { + *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); + (*batchp)->index = 0; + } + + (*batchp)->ptes[(*batchp)->index++] = hugepte; + if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { + call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + *batchp = NULL; + } +} +#endif + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) { pte_t *hugepte = hugepd_page(*hpdp); - unsigned shift = hugepd_shift(*hpdp); + int i; + unsigned long pdmask = ~((1UL << pdshift) - 1); + unsigned int num_hugepd = 1; + +#ifdef CONFIG_PPC64 + unsigned int shift = hugepd_shift(*hpdp); +#else + /* Note: On 32-bit the hpdp may be the first of several */ + num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); +#endif start &= pdmask; if (start < floor) @@ -224,9 +434,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (end - 1 > ceiling - 1) return; - hpdp->pd = 0; + for (i = 0; i < num_hugepd; i++, hpdp++) + hpdp->pd = 0; + tlb->need_flush = 1; +#ifdef CONFIG_PPC64 pgtable_free_tlb(tlb, hugepte, pdshift - shift); +#else + hugepd_free(tlb, hugepte); +#endif } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -331,18 +547,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * too. */ - pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); + pgd = pgd_offset(tlb->mm, addr); if (!is_hugepd(pgd)) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { +#ifdef CONFIG_PPC32 + /* + * Increment next by the size of the huge mapping since + * on 32-bit there may be more than one entry at the pgd + * level for a single hugepage, but all of them point to + * the same kmem cache that holds the hugepte. + */ + next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); +#endif free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } - } while (pgd++, addr = next, addr != end); + } while (addr = next, addr != end); } struct page * @@ -390,7 +615,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add { unsigned long mask; unsigned long pte_end; - struct page *head, *page; + struct page *head, *page, *tail; pte_t pte; int refs; @@ -413,6 +638,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add head = pte_page(pte); page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -428,10 +654,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add if (unlikely(pte_val(pte) != pte_val(*ptep))) { /* Could be optimized better */ - while (*nr) { - put_page(page); - (*nr)--; - } + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; @@ -466,17 +702,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { +#ifdef CONFIG_PPC_MM_SLICES struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); +#else + return get_unmapped_area(file, addr, len, pgoff, flags); +#endif } unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { +#ifdef CONFIG_PPC_MM_SLICES unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); +#else + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + return huge_page_size(hstate_vma(vma)); +#endif +} + +static inline bool is_power_of_4(unsigned long x) +{ + if (is_power_of_2(x)) + return (__ilog2(x) % 2) ? false : true; + return false; } static int __init add_huge_page_size(unsigned long long size) @@ -486,9 +740,14 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if ((size < PAGE_SIZE) || !is_power_of_4(size)) + return -EINVAL; +#else if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) return -EINVAL; +#endif if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) return -EINVAL; @@ -525,6 +784,46 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); +#ifdef CONFIG_FSL_BOOKE +struct kmem_cache *hugepte_cache; +static int __init hugetlbpage_init(void) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + unsigned shift; + + if (!mmu_psize_defs[psize].shift) + continue; + + shift = mmu_psize_to_shift(psize); + + /* Don't treat normal page sizes as huge... */ + if (shift != PAGE_SHIFT) + if (add_huge_page_size(1ULL << shift) < 0) + continue; + } + + /* + * Create a kmem cache for hugeptes. The bottom bits in the pte have + * size information encoded in them, so align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache for hugeptes\n", + __func__); + + /* Default hpage size = 4M */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else + panic("%s: Unable to set default huge page size\n", __func__); + + + return 0; +} +#else static int __init hugetlbpage_init(void) { int psize; @@ -567,15 +866,23 @@ static int __init hugetlbpage_init(void) return 0; } - +#endif module_init(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { int i; + void *start; BUG_ON(!PageCompound(page)); - for (i = 0; i < (1UL << compound_order(page)); i++) - __flush_dcache_icache(page_address(page+i)); + for (i = 0; i < (1UL << compound_order(page)); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); + __flush_dcache_icache(start); + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); + } + } } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 5de0f254dbb..161cefde5c1 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -32,6 +32,8 @@ #include <linux/pagemap.h> #include <linux/memblock.h> #include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -44,6 +46,7 @@ #include <asm/tlb.h> #include <asm/sections.h> #include <asm/system.h> +#include <asm/hugetlb.h> #include "mmu_decl.h" @@ -123,6 +126,12 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); + /* + * Reserve gigantic pages for hugetlb. This MUST occur before + * lowmem_end_addr is initialized below. + */ + reserve_hugetlb_gpages(); + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock.memory.cnt = 1; @@ -191,38 +200,6 @@ void __init *early_get_page(void) return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); } -/* Free up now-unused memory */ -static void free_sec(unsigned long start, unsigned long end, const char *name) -{ - unsigned long cnt = 0; - - while (start < end) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - cnt++; - start += PAGE_SIZE; - } - if (cnt) { - printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); - totalram_pages += cnt; - } -} - -void free_initmem(void) -{ -#define FREESEC(TYPE) \ - free_sec((unsigned long)(&__ ## TYPE ## _begin), \ - (unsigned long)(&__ ## TYPE ## _end), \ - #TYPE); - - printk ("Freeing unused kernel memory:"); - FREESEC(init); - printk("\n"); - ppc_md.progress = NULL; -#undef FREESEC -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index f6dbb4c20e6..e94b57fb79a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -83,22 +83,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)__init_begin; - for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk ("Freeing unused kernel memory: %luk freed\n", - ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); -} - static void pgd_ctor(void *addr) { memset(addr, 0, PGD_TABLE_SIZE); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 29d4dde65c4..2dd6bdd31fe 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -17,7 +17,7 @@ * */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -34,6 +34,7 @@ #include <linux/suspend.h> #include <linux/memblock.h> #include <linux/hugetlb.h> +#include <linux/slab.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -123,7 +124,8 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdata = NODE_DATA(nid); start = (unsigned long)__va(start); - create_section_mapping(start, start + size); + if (create_section_mapping(start, start + size)) + return -EINVAL; /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; @@ -249,7 +251,7 @@ static int __init mark_nonram_nosave(void) */ void __init paging_init(void) { - unsigned long total_ram = memblock_phys_mem_size(); + unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -269,7 +271,7 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", + printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n", (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); @@ -337,8 +339,9 @@ void __init mem_init(void) highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { + phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (memblock_is_reserved(pfn << PAGE_SHIFT)) + if (memblock_is_reserved(paddr)) continue; ClearPageReserved(page); init_page_count(page); @@ -352,6 +355,15 @@ void __init mem_init(void) } #endif /* CONFIG_HIGHMEM */ +#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP) + /* + * If smp is enabled, next_tlbcam_idx is initialized in the cpu up + * functions.... do it here for the non-smp case. + */ + per_cpu(next_tlbcam_idx, smp_processor_id()) = + (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; +#endif + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " "%luk reserved, %luk data, %luk bss, %luk init)\n", nr_free_pages() << (PAGE_SHIFT-10), @@ -382,6 +394,25 @@ void __init mem_init(void) mem_init_done = 1; } +void free_initmem(void) +{ + unsigned long addr; + + ppc_md.progress = ppc_printk_progress; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + free_page(addr); + totalram_pages++; + } + pr_info("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - + (unsigned long)__init_begin) >> 10); +} + #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { @@ -519,4 +550,38 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; hash_preload(vma->vm_mm, address, access, trap); #endif /* CONFIG_PPC_STD_MMU */ +#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ + && defined(CONFIG_HUGETLB_PAGE) + if (is_vm_hugetlb_page(vma)) + book3e_hugetlb_preload(vma->vm_mm, address, *ptep); +#endif +} + +/* + * System memory should not be in /proc/iomem but various tools expect it + * (eg kdump). + */ +static int add_system_ram_resources(void) +{ + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + struct resource *res; + unsigned long base = reg->base; + unsigned long size = reg->size; + + res = kzalloc(sizeof(struct resource), GFP_KERNEL); + WARN_ON(!res); + + if (res) { + res->name = "System RAM"; + res->start = base; + res->end = base + size - 1; + res->flags = IORESOURCE_MEM; + WARN_ON(request_resource(&iomem_resource, res) < 0); + } + } + + return 0; } +subsys_initcall(add_system_ram_resources); diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index d0ee554e86e..78fef6726e1 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -24,6 +24,7 @@ #include <linux/mm.h> #include <linux/init.h> +#include <linux/export.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 3bafc3deca6..ca988a3d5fb 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -18,7 +18,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/idr.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/gfp.h> #include <linux/slab.h> @@ -136,8 +136,8 @@ int use_cop(unsigned long acop, struct mm_struct *mm) if (!mm || !acop) return -EINVAL; - /* We need to make sure mm_users doesn't change */ - down_read(&mm->mmap_sem); + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); spin_lock(mm->context.cop_lockp); if (mm->context.cop_pid == COP_PID_NONE) { @@ -164,7 +164,7 @@ int use_cop(unsigned long acop, struct mm_struct *mm) out: spin_unlock(mm->context.cop_lockp); - up_read(&mm->mmap_sem); + spin_unlock(&mm->page_table_lock); return ret; } @@ -185,8 +185,8 @@ void drop_cop(unsigned long acop, struct mm_struct *mm) if (WARN_ON_ONCE(!mm)) return; - /* We need to make sure mm_users doesn't change */ - down_read(&mm->mmap_sem); + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); spin_lock(mm->context.cop_lockp); mm->context.acop &= ~acop; @@ -213,7 +213,7 @@ void drop_cop(unsigned long acop, struct mm_struct *mm) } spin_unlock(mm->context.cop_lockp); - up_read(&mm->mmap_sem); + spin_unlock(&mm->page_table_lock); } EXPORT_SYMBOL_GPL(drop_cop); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 336807de550..5b63bd3da4a 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; +#ifdef CONFIG_PPC_MM_SLICES + if (slice_mm_new_context(mm)) + slice_set_user_psize(mm, mmu_virtual_psize); +#endif + return 0; } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index dd0a2589591..83eb5d5f53d 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -142,6 +142,8 @@ extern unsigned long mmu_mapin_ram(unsigned long top); #elif defined(CONFIG_PPC_FSL_BOOK3E) extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx); +extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys); #ifdef CONFIG_PPC32 extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 6f06ea53bca..261adbd3b55 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -13,7 +13,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/mmzone.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/notifier.h> @@ -295,7 +295,10 @@ static int __init find_min_common_depth(void) struct device_node *root; const char *vec5; - root = of_find_node_by_path("/rtas"); + if (firmware_has_feature(FW_FEATURE_OPAL)) + root = of_find_node_by_path("/ibm,opal"); + else + root = of_find_node_by_path("/rtas"); if (!root) root = of_find_node_by_path("/"); @@ -324,12 +327,19 @@ static int __init find_min_common_depth(void) #define VEC5_AFFINITY_BYTE 5 #define VEC5_AFFINITY 0x80 - chosen = of_find_node_by_path("/chosen"); - if (chosen) { - vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); - if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { - dbg("Using form 1 affinity\n"); - form1_affinity = 1; + + if (firmware_has_feature(FW_FEATURE_OPAL)) + form1_affinity = 1; + else { + chosen = of_find_node_by_path("/chosen"); + if (chosen) { + vec5 = of_get_property(chosen, + "ibm,architecture-vec-5", NULL); + if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & + VEC5_AFFINITY)) { + dbg("Using form 1 affinity\n"); + form1_affinity = 1; + } } } @@ -689,8 +699,7 @@ static void __init parse_drconf_memory(struct device_node *memory) static int __init parse_numa_properties(void) { - struct device_node *cpu = NULL; - struct device_node *memory = NULL; + struct device_node *memory; int default_nid = 0; unsigned long i; @@ -712,6 +721,7 @@ static int __init parse_numa_properties(void) * each node to be onlined must have NODE_DATA etc backing it. */ for_each_present_cpu(i) { + struct device_node *cpu; int nid; cpu = of_get_cpu_node(i, NULL); @@ -730,8 +740,8 @@ static int __init parse_numa_properties(void) } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); - memory = NULL; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + + for_each_node_by_type(memory, "memory") { unsigned long start; unsigned long size; int nid; @@ -780,8 +790,9 @@ new_range: } /* - * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory - * property in the ibm,dynamic-reconfiguration-memory node. + * Now do the same thing for each MEMBLOCK listed in the + * ibm,dynamic-memory property in the + * ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) @@ -1167,10 +1178,10 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, */ int hot_add_node_scn_to_nid(unsigned long scn_addr) { - struct device_node *memory = NULL; + struct device_node *memory; int nid = -1; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + for_each_node_by_type(memory, "memory") { unsigned long start, size; int ranges; const unsigned int *memcell_buf; @@ -1194,11 +1205,12 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr) break; } - of_node_put(memory); if (nid >= 0) break; } + of_node_put(memory); + return nid; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index af40c8768a7..214130a4edc 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> @@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, entry = set_access_flags_filter(entry, vma, dirty); changed = !pte_same(*(ptep), entry); if (changed) { - if (!(vma->vm_flags & VM_HUGETLB)) + if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); flush_tlb_page_nohash(vma, address); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 6e595f6496d..ad36ede469c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -26,6 +26,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/export.h> #include <linux/types.h> #include <linux/mman.h> #include <linux/mm.h> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ba5194817f8..73709f7ce92 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -29,7 +29,7 @@ #include <linux/pagemap.h> #include <linux/err.h> #include <linux/spinlock.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/mman.h> #include <asm/mmu.h> #include <asm/spu.h> diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 27b863c1494..558e30cce33 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/highmem.h> #include <linux/pagemap.h> +#include <linux/export.h> #include <asm/tlbflush.h> #include <asm/tlb.h> @@ -177,3 +178,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, flush_range(vma->vm_mm, start, end); } EXPORT_SYMBOL(flush_tlb_range); + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index af089220941..dc4a5f385e4 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -30,6 +30,212 @@ #define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) #define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) +/********************************************************************** + * * + * TLB miss handling for Book3E with a bolted linear mapping * + * No virtual page table, no nested TLB misses * + * * + **********************************************************************/ + +.macro tlb_prolog_bolted addr + mtspr SPRN_SPRG_TLB_SCRATCH,r13 + mfspr r13,SPRN_SPRG_PACA + std r10,PACA_EXTLB+EX_TLB_R10(r13) + mfcr r10 + std r11,PACA_EXTLB+EX_TLB_R11(r13) + std r16,PACA_EXTLB+EX_TLB_R16(r13) + mfspr r16,\addr /* get faulting address */ + std r14,PACA_EXTLB+EX_TLB_R14(r13) + ld r14,PACAPGD(r13) + std r15,PACA_EXTLB+EX_TLB_R15(r13) + std r10,PACA_EXTLB+EX_TLB_CR(r13) + TLB_MISS_PROLOG_STATS_BOLTED +.endm + +.macro tlb_epilog_bolted + ld r14,PACA_EXTLB+EX_TLB_CR(r13) + ld r10,PACA_EXTLB+EX_TLB_R10(r13) + ld r11,PACA_EXTLB+EX_TLB_R11(r13) + mtcr r14 + ld r14,PACA_EXTLB+EX_TLB_R14(r13) + ld r15,PACA_EXTLB+EX_TLB_R15(r13) + TLB_MISS_RESTORE_STATS_BOLTED + ld r16,PACA_EXTLB+EX_TLB_R16(r13) + mfspr r13,SPRN_SPRG_TLB_SCRATCH +.endm + +/* Data TLB miss */ + START_EXCEPTION(data_tlb_miss_bolted) + tlb_prolog_bolted SPRN_DEAR + + /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + /* We pre-test some combination of permissions to avoid double + * faults: + * + * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE + * ESR_ST is 0x00800000 + * _PAGE_BAP_SW is 0x00000010 + * So the shift is >> 19. This tests for supervisor writeability. + * If the page happens to be supervisor writeable and not user + * writeable, we will take a new fault later, but that should be + * a rare enough case. + * + * We also move ESR_ST in _PAGE_DIRTY position + * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 + * + * MAS1 is preset for all we need except for TID that needs to + * be cleared for kernel translations + */ + + mfspr r11,SPRN_ESR + + srdi r15,r16,60 /* get region */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- dtlb_miss_fault_bolted + + rlwinm r10,r11,32-19,27,27 + rlwimi r10,r11,32-16,19,19 + cmpwi r15,0 + ori r10,r10,_PAGE_PRESENT + oris r11,r10,_PAGE_ACCESSED@h + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_bolted + +tlb_miss_common_bolted: +/* + * This is the guts of the TLB miss handler for bolted-linear. + * We are entered with: + * + * r16 = faulting address + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = PTE permission mask + * r10 = crap (free to use) + */ + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq tlb_miss_fault_bolted + +BEGIN_MMU_FTR_SECTION + /* Set the TLB reservation and search for existing entry. Then load + * the entry. + */ + PPC_TLBSRX_DOT(0,r16) + ldx r14,r14,r15 + beq normal_tlb_miss_done +MMU_FTR_SECTION_ELSE + ldx r14,r14,r15 +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) + +#ifndef CONFIG_PPC_64K_PAGES + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 +#endif /* CONFIG_PPC_64K_PAGES */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 + + rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 + + /* Check if required permissions are met */ + andc. r15,r11,r14 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + bne- tlb_miss_fault_bolted + + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE need change if !base page size, not + * yet implemented for now + * MAS 2 : Defaults not useful, need to be redone + * MAS 3+7 : Needs to be done + */ + clrrdi r11,r16,12 /* Clear low crap in EA */ + clrldi r15,r15,12 /* Clear crap at the top */ + rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r11 + andi. r11,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r11,MAS3_SW|MAS3_UW + andc r15,r15,r11 +1: + mtspr SPRN_MAS7_MAS3,r15 + tlbwe + + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +itlb_miss_kernel_bolted: + li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h +tlb_miss_kernel_bolted: + mfspr r10,SPRN_MAS1 + ld r14,PACA_KERNELPGD(r13) + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ tlb_miss_common_bolted + +tlb_miss_fault_bolted: + /* We need to check if it was an instruction miss */ + andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX + bne itlb_miss_fault_bolted +dtlb_miss_fault_bolted: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_bolted: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e + +/* Instruction TLB miss */ + START_EXCEPTION(instruction_tlb_miss_bolted) + tlb_prolog_bolted SPRN_SRR0 + + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + srdi r15,r16,60 /* get region */ + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne- itlb_miss_fault_bolted + + li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + + cmpldi cr0,r15,0 /* Check for user region */ + oris r11,r11,_PAGE_ACCESSED@h + beq tlb_miss_common_bolted + b itlb_miss_kernel_bolted /********************************************************************** * * @@ -347,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault /* Ok, we're all right, we can now create a kernel translation for * a 4K or 64K page from r16 -> r15. @@ -596,24 +802,24 @@ htw_tlb_miss: rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault /* Ok, we're all right, we can now create an indirect entry for * a 1M or 256M page. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 0bdad3aecc6..4e13d6f9023 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -28,6 +28,7 @@ */ #include <linux/kernel.h> +#include <linux/export.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/highmem.h> @@ -35,14 +36,50 @@ #include <linux/preempt.h> #include <linux/spinlock.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/code-patching.h> +#include <asm/hugetlb.h> #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3E +/* + * This struct lists the sw-supported page sizes. The hardawre MMU may support + * other sizes not listed here. The .ind field is only used on MMUs that have + * indirect page table entries. + */ +#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_FSL_BOOKE +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + [MMU_PAGE_4K] = { + .shift = 12, + .enc = BOOK3E_PAGESZ_4K, + }, + [MMU_PAGE_4M] = { + .shift = 22, + .enc = BOOK3E_PAGESZ_4M, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .enc = BOOK3E_PAGESZ_16M, + }, + [MMU_PAGE_64M] = { + .shift = 26, + .enc = BOOK3E_PAGESZ_64M, + }, + [MMU_PAGE_256M] = { + .shift = 28, + .enc = BOOK3E_PAGESZ_256M, + }, + [MMU_PAGE_1G] = { + .shift = 30, + .enc = BOOK3E_PAGESZ_1GB, + }, +}; +#else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -76,6 +113,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#endif /* CONFIG_FSL_BOOKE */ + static inline int mmu_get_tsize(int psize) { return mmu_psize_defs[psize].enc; @@ -86,7 +125,7 @@ static inline int mmu_get_tsize(int psize) /* This isn't used on !Book3E for now */ return 0; } -#endif +#endif /* CONFIG_PPC_BOOK3E_MMU */ /* The variables below are currently only used on 64-bit Book3E * though this will probably be made common with other nohash @@ -102,6 +141,12 @@ unsigned long linear_map_top; /* Top of linear mapping */ #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ +DEFINE_PER_CPU(int, next_tlbcam_idx); +EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); +#endif + /* * Base TLB flushing operations: * @@ -259,6 +304,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) + flush_hugetlb_page(vma, vmaddr); +#endif + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, mmu_get_tsize(mmu_virtual_psize), 0); } @@ -266,6 +316,17 @@ EXPORT_SYMBOL(flush_tlb_page); #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_47x +void __init early_init_mmu_47x(void) +{ +#ifdef CONFIG_SMP + unsigned long root = of_get_flat_dt_root(); + if (of_get_flat_dt_prop(root, "cooperative-partition", NULL)) + mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); +#endif /* CONFIG_SMP */ +} +#endif /* CONFIG_PPC_47x */ + /* * Flush kernel TLB entries in the given range */ @@ -443,14 +504,27 @@ static void setup_page_sizes(void) } } -static void setup_mmu_htw(void) +static void __patch_exception(int exc, unsigned long addr) { extern unsigned int interrupt_base_book3e; - extern unsigned int exc_data_tlb_miss_htw_book3e; - extern unsigned int exc_instruction_tlb_miss_htw_book3e; + unsigned int *ibase = &interrupt_base_book3e; + + /* Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one + */ + + patch_branch(ibase + (exc / 4) + 1, addr, 0); +} - unsigned int *ibase = &interrupt_base_book3e; +#define patch_exception(exc, name) do { \ + extern unsigned int name; \ + __patch_exception((exc), (unsigned long)&name); \ +} while (0) +static void setup_mmu_htw(void) +{ /* Check if HW tablewalk is present, and if yes, enable it by: * * - patching the TLB miss handlers to branch to the @@ -462,19 +536,12 @@ static void setup_mmu_htw(void) if ((tlb0cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) { - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - patch_branch(ibase + (0x1c0 / 4) + 1, - (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); - patch_branch(ibase + (0x1e0 / 4) + 1, - (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); + patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); book3e_htw_enabled = 1; } - pr_info("MMU: Book3E Page Tables %s\n", - book3e_htw_enabled ? "Enabled" : "Disabled"); + pr_info("MMU: Book3E HW tablewalk %s\n", + book3e_htw_enabled ? "enabled" : "not supported"); } /* @@ -549,6 +616,9 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); memblock_analyze(); + + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); } #endif @@ -573,15 +643,37 @@ void __cpuinit early_init_mmu_secondary(void) void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* On Embedded 64-bit, we adjust the RMA size to match + /* On non-FSL Embedded 64-bit, we adjust the RMA size to match * the bolted TLB entry. We know for now that only 1G * entries are supported though that may eventually - * change. We crop it to the size of the first MEMBLOCK to + * change. + * + * on FSL Embedded 64-bit, we adjust the RMA size to match the + * first bolted TLB entry size. We still limit max to 1G even if + * the TLB could cover more. This is due to what the early init + * code is setup to do. + * + * We crop it to the size of the first MEMBLOCK to * avoid going over total available memory just in case... */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned long linear_sz; + linear_sz = calc_cam_sz(first_memblock_size, PAGE_OFFSET, + first_memblock_base); + ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); + } else +#endif + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); /* Finally limit subsequent allocations */ memblock_set_current_limit(first_memblock_base + ppc64_rma_size); } +#else /* ! CONFIG_PPC64 */ +void __init early_init_mmu(void) +{ +#ifdef CONFIG_PPC_47x + early_init_mmu_47x(); +#endif +} #endif /* CONFIG_PPC64 */ |