diff options
Diffstat (limited to 'arch/powerpc/mm')
27 files changed, 895 insertions, 300 deletions
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 6747eece84a..7b6c1075017 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -287,9 +287,7 @@ void __dma_free_coherent(size_t size, void *vaddr)  			pte_clear(&init_mm, addr, ptep);  			if (pfn_valid(pfn)) {  				struct page *page = pfn_to_page(pfn); - -				ClearPageReserved(page); -				__free_page(page); +				__free_reserved_page(page);  			}  		}  		addr += PAGE_SIZE; diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 07ba45b0f07..94cd728166d 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -52,6 +52,7 @@  #include <asm/smp.h>  #include <asm/machdep.h>  #include <asm/setup.h> +#include <asm/paca.h>  #include "mmu_decl.h" @@ -171,11 +172,10 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,  	return 1UL << camsize;  } -unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) +static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, +					unsigned long ram, int max_cam_idx)  {  	int i; -	unsigned long virt = PAGE_OFFSET; -	phys_addr_t phys = memstart_addr;  	unsigned long amount_mapped = 0;  	/* Calculate CAM values */ @@ -192,9 +192,23 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)  	}  	tlbcam_index = i; +#ifdef CONFIG_PPC64 +	get_paca()->tcd.esel_next = i; +	get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; +	get_paca()->tcd.esel_first = i; +#endif +  	return amount_mapped;  } +unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) +{ +	unsigned long virt = PAGE_OFFSET; +	phys_addr_t phys = memstart_addr; + +	return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx); +} +  #ifdef CONFIG_PPC32  #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) @@ -222,7 +236,9 @@ void __init adjust_total_lowmem(void)  	/* adjust lowmem size to __max_low_memory */  	ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); +	i = switch_to_as1();  	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); +	restore_to_as0(i, 0, 0, 1);  	pr_info("Memory CAM mapping: ");  	for (i = 0; i < tlbcam_index - 1; i++) @@ -241,4 +257,62 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,  	/* 64M mapped initially according to head_fsl_booke.S */  	memblock_set_current_limit(min_t(u64, limit, 0x04000000));  } + +#ifdef CONFIG_RELOCATABLE +int __initdata is_second_reloc; +notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) +{ +	unsigned long base = KERNELBASE; + +	kernstart_addr = start; +	if (is_second_reloc) { +		virt_phys_offset = PAGE_OFFSET - memstart_addr; +		return; +	} + +	/* +	 * Relocatable kernel support based on processing of dynamic +	 * relocation entries. Before we get the real memstart_addr, +	 * We will compute the virt_phys_offset like this: +	 * virt_phys_offset = stext.run - kernstart_addr +	 * +	 * stext.run = (KERNELBASE & ~0x3ffffff) + +	 *				(kernstart_addr & 0x3ffffff) +	 * When we relocate, we have : +	 * +	 *	(kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) +	 * +	 * hence: +	 *  virt_phys_offset = (KERNELBASE & ~0x3ffffff) - +	 *                              (kernstart_addr & ~0x3ffffff) +	 * +	 */ +	start &= ~0x3ffffff; +	base &= ~0x3ffffff; +	virt_phys_offset = base - start; +	early_get_first_memblock_info(__va(dt_ptr), NULL); +	/* +	 * We now get the memstart_addr, then we should check if this +	 * address is the same as what the PAGE_OFFSET map to now. If +	 * not we have to change the map of PAGE_OFFSET to memstart_addr +	 * and do a second relocation. +	 */ +	if (start != memstart_addr) { +		int n; +		long offset = start - memstart_addr; + +		is_second_reloc = 1; +		n = switch_to_as1(); +		/* map a 64M area for the second relocation */ +		if (memstart_addr > start) +			map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM); +		else +			map_mem_in_cams_addr(start, PAGE_OFFSET + offset, +					0x4000000, CONFIG_LOWMEM_CAM_NUM); +		restore_to_as0(n, offset, __va(dt_ptr), 1); +		/* We should never reach here */ +		panic("Relocation error"); +	} +} +#endif  #endif diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 6936547018b..d8746684f60 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -36,6 +36,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,  	do {  		pte_t pte = ACCESS_ONCE(*ptep);  		struct page *page; +		/* +		 * Similar to the PMD case, NUMA hinting must take slow path +		 */ +		if (pte_numa(pte)) +			return 0;  		if ((pte_val(pte) & mask) != result)  			return 0; @@ -75,6 +80,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,  		if (pmd_none(pmd) || pmd_trans_splitting(pmd))  			return 0;  		if (pmd_huge(pmd) || pmd_large(pmd)) { +			/* +			 * NUMA hinting faults need to be handled in the GUP +			 * slowpath for accounting purposes and so that they +			 * can be serialised against THP migration. +			 */ +			if (pmd_numa(pmd)) +				return 0; +  			if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,  					 write, pages, nr))  				return 0; @@ -123,6 +136,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,  	struct mm_struct *mm = current->mm;  	unsigned long addr, len, end;  	unsigned long next; +	unsigned long flags;  	pgd_t *pgdp;  	int nr = 0; @@ -156,7 +170,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,  	 * So long as we atomically load page table pointers versus teardown,  	 * we can follow the address down to the the page and take a ref on it.  	 */ -	local_irq_disable(); +	local_irq_save(flags);  	pgdp = pgd_offset(mm, addr);  	do { @@ -179,7 +193,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,  			break;  	} while (pgdp++, addr = next, addr != end); -	local_irq_enable(); +	local_irq_restore(flags);  	return nr;  } diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index d3cbda62857..057cbbb4c57 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -148,7 +148,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/  	andc	r0,r30,r0		/* r0 = pte & ~r0 */  	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */ -	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */ +	/* +	 * Always add "C" bit for perf. Memory coherence is always enabled +	 */ +	ori	r3,r3,HPTE_R_C | HPTE_R_M  	/* We eventually do the icache sync here (maybe inline that  	 * code rather than call a C function...)  @@ -156,7 +159,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  BEGIN_FTR_SECTION  	mr	r4,r30  	mr	r5,r7 -	bl	.hash_page_do_lazy_icache +	bl	hash_page_do_lazy_icache  END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)  	/* At this point, r3 contains new PP bits, save them in @@ -198,7 +201,8 @@ htab_insert_pte:  	li	r8,MMU_PAGE_4K		/* page size */  	li	r9,MMU_PAGE_4K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(htab_call_hpte_insert1) +.globl htab_call_hpte_insert1 +htab_call_hpte_insert1:  	bl	.			/* Patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge	htab_pte_insert_ok	/* Insertion successful */ @@ -222,7 +226,8 @@ _GLOBAL(htab_call_hpte_insert1)  	li	r8,MMU_PAGE_4K		/* page size */  	li	r9,MMU_PAGE_4K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(htab_call_hpte_insert2) +.globl htab_call_hpte_insert2 +htab_call_hpte_insert2:  	bl	.			/* Patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge+	htab_pte_insert_ok	/* Insertion successful */ @@ -239,7 +244,8 @@ _GLOBAL(htab_call_hpte_insert2)  2:	and	r0,r5,r27  	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	  	/* Call ppc_md.hpte_remove */ -_GLOBAL(htab_call_hpte_remove) +.globl htab_call_hpte_remove +htab_call_hpte_remove:  	bl	.			/* Patched by htab_finish_init() */  	/* Try all again */ @@ -293,7 +299,8 @@ htab_modify_pte:  	li	r7,MMU_PAGE_4K		/* actual page size */  	ld	r8,STK_PARAM(R9)(r1)	/* segment size */  	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */ -_GLOBAL(htab_call_hpte_updatepp) +.globl htab_call_hpte_updatepp +htab_call_hpte_updatepp:  	bl	.			/* Patched by htab_finish_init() */  	/* if we failed because typically the HPTE wasn't really here @@ -457,7 +464,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/  	andc	r0,r3,r0		/* r0 = pte & ~r0 */  	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */ -	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */ +	/* +	 * Always add "C" bit for perf. Memory coherence is always enabled +	 */ +	ori	r3,r3,HPTE_R_C | HPTE_R_M  	/* We eventually do the icache sync here (maybe inline that  	 * code rather than call a C function...) @@ -465,7 +475,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  BEGIN_FTR_SECTION  	mr	r4,r30  	mr	r5,r7 -	bl	.hash_page_do_lazy_icache +	bl	hash_page_do_lazy_icache  END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)  	/* At this point, r3 contains new PP bits, save them in @@ -520,7 +530,8 @@ htab_special_pfn:  	li	r8,MMU_PAGE_4K		/* page size */  	li	r9,MMU_PAGE_4K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(htab_call_hpte_insert1) +.globl htab_call_hpte_insert1 +htab_call_hpte_insert1:  	bl	.			/* patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge	htab_pte_insert_ok	/* Insertion successful */ @@ -548,7 +559,8 @@ _GLOBAL(htab_call_hpte_insert1)  	li	r8,MMU_PAGE_4K		/* page size */  	li	r9,MMU_PAGE_4K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(htab_call_hpte_insert2) +.globl htab_call_hpte_insert2 +htab_call_hpte_insert2:  	bl	.			/* patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge+	htab_pte_insert_ok	/* Insertion successful */ @@ -565,7 +577,8 @@ _GLOBAL(htab_call_hpte_insert2)  2:	and	r0,r5,r27  	rldicr	r3,r0,3,63-3		/* r0 = (hash & mask) << 3 */  	/* Call ppc_md.hpte_remove */ -_GLOBAL(htab_call_hpte_remove) +.globl htab_call_hpte_remove +htab_call_hpte_remove:  	bl	.			/* patched by htab_finish_init() */  	/* Try all again */ @@ -582,7 +595,7 @@ htab_inval_old_hpte:  	li	r6,MMU_PAGE_64K		/* psize */  	ld	r7,STK_PARAM(R9)(r1)	/* ssize */  	ld	r8,STK_PARAM(R8)(r1)	/* local */ -	bl	.flush_hash_page +	bl	flush_hash_page  	/* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */  	lis	r0,_PAGE_HPTE_SUB@h  	ori	r0,r0,_PAGE_HPTE_SUB@l @@ -654,7 +667,8 @@ htab_modify_pte:  	li	r7,MMU_PAGE_4K		/* actual page size */  	ld	r8,STK_PARAM(R9)(r1)	/* segment size */  	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */ -_GLOBAL(htab_call_hpte_updatepp) +.globl htab_call_hpte_updatepp +htab_call_hpte_updatepp:  	bl	.			/* patched by htab_finish_init() */  	/* if we failed because typically the HPTE wasn't really here @@ -795,7 +809,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/  	andc	r0,r30,r0		/* r0 = pte & ~r0 */  	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */ -	ori	r3,r3,HPTE_R_C		/* Always add "C" bit for perf. */ +	/* +	 * Always add "C" bit for perf. Memory coherence is always enabled +	 */ +	ori	r3,r3,HPTE_R_C | HPTE_R_M  	/* We eventually do the icache sync here (maybe inline that  	 * code rather than call a C function...) @@ -803,7 +820,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)  BEGIN_FTR_SECTION  	mr	r4,r30  	mr	r5,r7 -	bl	.hash_page_do_lazy_icache +	bl	hash_page_do_lazy_icache  END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)  	/* At this point, r3 contains new PP bits, save them in @@ -848,7 +865,8 @@ ht64_insert_pte:  	li	r8,MMU_PAGE_64K  	li	r9,MMU_PAGE_64K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(ht64_call_hpte_insert1) +.globl ht64_call_hpte_insert1 +ht64_call_hpte_insert1:  	bl	.			/* patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge	ht64_pte_insert_ok	/* Insertion successful */ @@ -872,7 +890,8 @@ _GLOBAL(ht64_call_hpte_insert1)  	li	r8,MMU_PAGE_64K  	li	r9,MMU_PAGE_64K		/* actual page size */  	ld	r10,STK_PARAM(R9)(r1)	/* segment size */ -_GLOBAL(ht64_call_hpte_insert2) +.globl ht64_call_hpte_insert2 +ht64_call_hpte_insert2:  	bl	.			/* patched by htab_finish_init() */  	cmpdi	0,r3,0  	bge+	ht64_pte_insert_ok	/* Insertion successful */ @@ -889,7 +908,8 @@ _GLOBAL(ht64_call_hpte_insert2)  2:	and	r0,r5,r27  	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */  	/* Call ppc_md.hpte_remove */ -_GLOBAL(ht64_call_hpte_remove) +.globl ht64_call_hpte_remove +ht64_call_hpte_remove:  	bl	.			/* patched by htab_finish_init() */  	/* Try all again */ @@ -943,7 +963,8 @@ ht64_modify_pte:  	li	r7,MMU_PAGE_64K		/* actual page size */  	ld	r8,STK_PARAM(R9)(r1)	/* segment size */  	ld	r9,STK_PARAM(R8)(r1)	/* get "local" param */ -_GLOBAL(ht64_call_hpte_updatepp) +.globl ht64_call_hpte_updatepp +ht64_call_hpte_updatepp:  	bl	.			/* patched by htab_finish_init() */  	/* if we failed because typically the HPTE wasn't really here diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c33d939120c..cf1d325eae8 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -35,7 +35,11 @@  #define DBG_LOW(fmt...)  #endif +#ifdef __BIG_ENDIAN__  #define HPTE_LOCK_BIT 3 +#else +#define HPTE_LOCK_BIT (56+3) +#endif  DEFINE_RAW_SPINLOCK(native_tlbie_lock); @@ -78,17 +82,14 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)  		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);  		va |= penc << 12;  		va |= ssize << 8; -		/* Add AVAL part */ -		if (psize != apsize) { -			/* -			 * MPSS, 64K base page size and 16MB parge page size -			 * We don't need all the bits, but rest of the bits -			 * must be ignored by the processor. -			 * vpn cover upto 65 bits of va. (0...65) and we need -			 * 58..64 bits of va. -			 */ -			va |= (vpn & 0xfe); -		} +		/* +		 * AVAL bits: +		 * We don't need all the bits, but rest of the bits +		 * must be ignored by the processor. +		 * vpn cover upto 65 bits of va. (0...65) and we need +		 * 58..64 bits of va. +		 */ +		va |= (vpn & 0xfe); /* AVAL */  		va |= 1; /* L */  		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)  			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) @@ -129,17 +130,14 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)  		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);  		va |= penc << 12;  		va |= ssize << 8; -		/* Add AVAL part */ -		if (psize != apsize) { -			/* -			 * MPSS, 64K base page size and 16MB parge page size -			 * We don't need all the bits, but rest of the bits -			 * must be ignored by the processor. -			 * vpn cover upto 65 bits of va. (0...65) and we need -			 * 58..64 bits of va. -			 */ -			va |= (vpn & 0xfe); -		} +		/* +		 * AVAL bits: +		 * We don't need all the bits, but rest of the bits +		 * must be ignored by the processor. +		 * vpn cover upto 65 bits of va. (0...65) and we need +		 * 58..64 bits of va. +		 */ +		va |= (vpn & 0xfe);  		va |= 1; /* L */  		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"  			     : : "r"(va) : "memory"); @@ -172,7 +170,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,  static inline void native_lock_hpte(struct hash_pte *hptep)  { -	unsigned long *word = &hptep->v; +	unsigned long *word = (unsigned long *)&hptep->v;  	while (1) {  		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) @@ -184,7 +182,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)  static inline void native_unlock_hpte(struct hash_pte *hptep)  { -	unsigned long *word = &hptep->v; +	unsigned long *word = (unsigned long *)&hptep->v;  	clear_bit_unlock(HPTE_LOCK_BIT, word);  } @@ -204,10 +202,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,  	}  	for (i = 0; i < HPTES_PER_GROUP; i++) { -		if (! (hptep->v & HPTE_V_VALID)) { +		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {  			/* retry with lock held */  			native_lock_hpte(hptep); -			if (! (hptep->v & HPTE_V_VALID)) +			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))  				break;  			native_unlock_hpte(hptep);  		} @@ -226,14 +224,14 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,  			i, hpte_v, hpte_r);  	} -	hptep->r = hpte_r; +	hptep->r = cpu_to_be64(hpte_r);  	/* Guarantee the second dword is visible before the valid bit */  	eieio();  	/*  	 * Now set the first dword including the valid bit  	 * NOTE: this also unlocks the hpte  	 */ -	hptep->v = hpte_v; +	hptep->v = cpu_to_be64(hpte_v);  	__asm__ __volatile__ ("ptesync" : : : "memory"); @@ -254,12 +252,12 @@ static long native_hpte_remove(unsigned long hpte_group)  	for (i = 0; i < HPTES_PER_GROUP; i++) {  		hptep = htab_address + hpte_group + slot_offset; -		hpte_v = hptep->v; +		hpte_v = be64_to_cpu(hptep->v);  		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {  			/* retry with lock held */  			native_lock_hpte(hptep); -			hpte_v = hptep->v; +			hpte_v = be64_to_cpu(hptep->v);  			if ((hpte_v & HPTE_V_VALID)  			    && !(hpte_v & HPTE_V_BOLTED))  				break; @@ -294,7 +292,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,  	native_lock_hpte(hptep); -	hpte_v = hptep->v; +	hpte_v = be64_to_cpu(hptep->v);  	/*  	 * We need to invalidate the TLB always because hpte_remove doesn't do  	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -308,8 +306,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,  	} else {  		DBG_LOW(" -> hit\n");  		/* Update the HPTE */ -		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | -			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); +		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | +			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));  	}  	native_unlock_hpte(hptep); @@ -334,7 +332,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)  	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;  	for (i = 0; i < HPTES_PER_GROUP; i++) {  		hptep = htab_address + slot; -		hpte_v = hptep->v; +		hpte_v = be64_to_cpu(hptep->v);  		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))  			/* HPTE matches */ @@ -369,8 +367,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,  	hptep = htab_address + slot;  	/* Update the HPTE */ -	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | -		(newpp & (HPTE_R_PP | HPTE_R_N)); +	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & +			~(HPTE_R_PP | HPTE_R_N)) | +		(newpp & (HPTE_R_PP | HPTE_R_N)));  	/*  	 * Ensure it is out of the tlb too. Bolted entries base and  	 * actual page size will be same. @@ -392,7 +391,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,  	want_v = hpte_encode_avpn(vpn, bpsize, ssize);  	native_lock_hpte(hptep); -	hpte_v = hptep->v; +	hpte_v = be64_to_cpu(hptep->v);  	/*  	 * We need to invalidate the TLB always because hpte_remove doesn't do @@ -458,7 +457,7 @@ static void native_hugepage_invalidate(struct mm_struct *mm,  		hptep = htab_address + slot;  		want_v = hpte_encode_avpn(vpn, psize, ssize);  		native_lock_hpte(hptep); -		hpte_v = hptep->v; +		hpte_v = be64_to_cpu(hptep->v);  		/* Even if we miss, we need to invalidate the TLB */  		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -519,11 +518,12 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,  			int *psize, int *apsize, int *ssize, unsigned long *vpn)  {  	unsigned long avpn, pteg, vpi; -	unsigned long hpte_v = hpte->v; +	unsigned long hpte_v = be64_to_cpu(hpte->v); +	unsigned long hpte_r = be64_to_cpu(hpte->r);  	unsigned long vsid, seg_off;  	int size, a_size, shift;  	/* Look at the 8 bit LP value */ -	unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); +	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);  	if (!(hpte_v & HPTE_V_LARGE)) {  		size   = MMU_PAGE_4K; @@ -612,7 +612,7 @@ static void native_hpte_clear(void)  		 * running,  right?  and for crash dump, we probably  		 * don't want to wait for a maybe bad cpu.  		 */ -		hpte_v = hptep->v; +		hpte_v = be64_to_cpu(hptep->v);  		/*  		 * Call __tlbie() here rather than tlbie() since we @@ -664,7 +664,7 @@ static void native_flush_hash_range(unsigned long number, int local)  			hptep = htab_address + slot;  			want_v = hpte_encode_avpn(vpn, psize, ssize);  			native_lock_hpte(hptep); -			hpte_v = hptep->v; +			hpte_v = be64_to_cpu(hptep->v);  			if (!HPTE_V_COMPARE(hpte_v, want_v) ||  			    !(hpte_v & HPTE_V_VALID))  				native_unlock_hpte(hptep); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bde8b558975..88fdd9d2507 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -169,9 +169,10 @@ static unsigned long htab_convert_pte_flags(unsigned long pteflags)  	if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&  					 (pteflags & _PAGE_DIRTY)))  		rflags |= 1; - -	/* Always add C */ -	return rflags | HPTE_R_C; +	/* +	 * Always add "C" bit for perf. Memory coherence is always enabled +	 */ +	return rflags | HPTE_R_C | HPTE_R_M;  }  int htab_bolt_mapping(unsigned long vstart, unsigned long vend, @@ -206,6 +207,24 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,  		if (overlaps_kernel_text(vaddr, vaddr + step))  			tprot &= ~HPTE_R_N; +		/* Make kvm guest trampolines executable */ +		if (overlaps_kvm_tmp(vaddr, vaddr + step)) +			tprot &= ~HPTE_R_N; + +		/* +		 * If relocatable, check if it overlaps interrupt vectors that +		 * are copied down to real 0. For relocatable kernel +		 * (e.g. kdump case) we copy interrupt vectors down to real +		 * address 0. Mark that region as executable. This is +		 * because on p8 system with relocation on exception feature +		 * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence +		 * in order to execute the interrupt handlers in virtual +		 * mode the vector region need to be marked as executable. +		 */ +		if ((PHYSICAL_START > MEMORY_START) && +			overlaps_interrupt_vector_text(vaddr, vaddr + step)) +				tprot &= ~HPTE_R_N; +  		hash = hpt_hash(vpn, shift, ssize);  		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); @@ -250,20 +269,19 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,  					 const char *uname, int depth,  					 void *data)  { -	char *type = of_get_flat_dt_prop(node, "device_type", NULL); -	u32 *prop; -	unsigned long size = 0; +	const char *type = of_get_flat_dt_prop(node, "device_type", NULL); +	const __be32 *prop; +	int size = 0;  	/* We are scanning "cpu" nodes only */  	if (type == NULL || strcmp(type, "cpu") != 0)  		return 0; -	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", -					  &size); +	prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);  	if (prop == NULL)  		return 0;  	for (; size >= 4; size -= 4, ++prop) { -		if (prop[0] == 40) { +		if (be32_to_cpu(prop[0]) == 40) {  			DBG("1T segment support detected\n");  			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;  			return 1; @@ -306,24 +324,23 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,  					  const char *uname, int depth,  					  void *data)  { -	char *type = of_get_flat_dt_prop(node, "device_type", NULL); -	u32 *prop; -	unsigned long size = 0; +	const char *type = of_get_flat_dt_prop(node, "device_type", NULL); +	const __be32 *prop; +	int size = 0;  	/* We are scanning "cpu" nodes only */  	if (type == NULL || strcmp(type, "cpu") != 0)  		return 0; -	prop = (u32 *)of_get_flat_dt_prop(node, -					  "ibm,segment-page-sizes", &size); +	prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);  	if (prop != NULL) {  		pr_info("Page sizes from device-tree:\n");  		size /= 4;  		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);  		while(size > 0) { -			unsigned int base_shift = prop[0]; -			unsigned int slbenc = prop[1]; -			unsigned int lpnum = prop[2]; +			unsigned int base_shift = be32_to_cpu(prop[0]); +			unsigned int slbenc = be32_to_cpu(prop[1]); +			unsigned int lpnum = be32_to_cpu(prop[2]);  			struct mmu_psize_def *def;  			int idx, base_idx; @@ -356,8 +373,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,  				def->tlbiel = 0;  			while (size > 0 && lpnum) { -				unsigned int shift = prop[0]; -				int penc  = prop[1]; +				unsigned int shift = be32_to_cpu(prop[0]); +				int penc  = be32_to_cpu(prop[1]);  				prop += 2; size -= 2;  				lpnum--; @@ -389,9 +406,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,  static int __init htab_dt_scan_hugepage_blocks(unsigned long node,  					const char *uname, int depth,  					void *data) { -	char *type = of_get_flat_dt_prop(node, "device_type", NULL); -	unsigned long *addr_prop; -	u32 *page_count_prop; +	const char *type = of_get_flat_dt_prop(node, "device_type", NULL); +	const __be64 *addr_prop; +	const __be32 *page_count_prop;  	unsigned int expected_pages;  	long unsigned int phys_addr;  	long unsigned int block_size; @@ -405,12 +422,12 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,  	page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);  	if (page_count_prop == NULL)  		return 0; -	expected_pages = (1 << page_count_prop[0]); +	expected_pages = (1 << be32_to_cpu(page_count_prop[0]));  	addr_prop = of_get_flat_dt_prop(node, "reg", NULL);  	if (addr_prop == NULL)  		return 0; -	phys_addr = addr_prop[0]; -	block_size = addr_prop[1]; +	phys_addr = be64_to_cpu(addr_prop[0]); +	block_size = be64_to_cpu(addr_prop[1]);  	if (block_size != (16 * GB))  		return 0;  	printk(KERN_INFO "Huge page(16GB) memory: " @@ -432,6 +449,24 @@ static void mmu_psize_set_default_penc(void)  			mmu_psize_defs[bpsize].penc[apsize] = -1;  } +#ifdef CONFIG_PPC_64K_PAGES + +static bool might_have_hea(void) +{ +	/* +	 * The HEA ethernet adapter requires awareness of the +	 * GX bus. Without that awareness we can easily assume +	 * we will never see an HEA ethernet device. +	 */ +#ifdef CONFIG_IBMEBUS +	return !cpu_has_feature(CPU_FTR_ARCH_207S); +#else +	return false; +#endif +} + +#endif /* #ifdef CONFIG_PPC_64K_PAGES */ +  static void __init htab_init_page_sizes(void)  {  	int rc; @@ -486,10 +521,11 @@ static void __init htab_init_page_sizes(void)  			mmu_linear_psize = MMU_PAGE_64K;  		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {  			/* -			 * Don't use 64k pages for ioremap on pSeries, since -			 * that would stop us accessing the HEA ethernet. +			 * When running on pSeries using 64k pages for ioremap +			 * would stop us accessing the HEA ethernet. So if we +			 * have the chance of ever seeing one, stay at 4k.  			 */ -			if (!machine_is(pseries)) +			if (!might_have_hea() || !machine_is(pseries))  				mmu_io_psize = MMU_PAGE_64K;  		} else  			mmu_ci_restrictions = 1; @@ -533,17 +569,17 @@ static int __init htab_dt_scan_pftsize(unsigned long node,  				       const char *uname, int depth,  				       void *data)  { -	char *type = of_get_flat_dt_prop(node, "device_type", NULL); -	u32 *prop; +	const char *type = of_get_flat_dt_prop(node, "device_type", NULL); +	const __be32 *prop;  	/* We are scanning "cpu" nodes only */  	if (type == NULL || strcmp(type, "cpu") != 0)  		return 0; -	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); +	prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);  	if (prop != NULL) {  		/* pft_size[0] is the NUMA CEC cookie */ -		ppc64_pft_size = prop[1]; +		ppc64_pft_size = be32_to_cpu(prop[1]);  		return 1;  	}  	return 0; @@ -590,47 +626,43 @@ int remove_section_mapping(unsigned long start, unsigned long end)  }  #endif /* CONFIG_MEMORY_HOTPLUG */ -#define FUNCTION_TEXT(A)	((*(unsigned long *)(A))) +extern u32 htab_call_hpte_insert1[]; +extern u32 htab_call_hpte_insert2[]; +extern u32 htab_call_hpte_remove[]; +extern u32 htab_call_hpte_updatepp[]; +extern u32 ht64_call_hpte_insert1[]; +extern u32 ht64_call_hpte_insert2[]; +extern u32 ht64_call_hpte_remove[]; +extern u32 ht64_call_hpte_updatepp[];  static void __init htab_finish_init(void)  { -	extern unsigned int *htab_call_hpte_insert1; -	extern unsigned int *htab_call_hpte_insert2; -	extern unsigned int *htab_call_hpte_remove; -	extern unsigned int *htab_call_hpte_updatepp; -  #ifdef CONFIG_PPC_HAS_HASH_64K -	extern unsigned int *ht64_call_hpte_insert1; -	extern unsigned int *ht64_call_hpte_insert2; -	extern unsigned int *ht64_call_hpte_remove; -	extern unsigned int *ht64_call_hpte_updatepp; -  	patch_branch(ht64_call_hpte_insert1, -		FUNCTION_TEXT(ppc_md.hpte_insert), +		ppc_function_entry(ppc_md.hpte_insert),  		BRANCH_SET_LINK);  	patch_branch(ht64_call_hpte_insert2, -		FUNCTION_TEXT(ppc_md.hpte_insert), +		ppc_function_entry(ppc_md.hpte_insert),  		BRANCH_SET_LINK);  	patch_branch(ht64_call_hpte_remove, -		FUNCTION_TEXT(ppc_md.hpte_remove), +		ppc_function_entry(ppc_md.hpte_remove),  		BRANCH_SET_LINK);  	patch_branch(ht64_call_hpte_updatepp, -		FUNCTION_TEXT(ppc_md.hpte_updatepp), +		ppc_function_entry(ppc_md.hpte_updatepp),  		BRANCH_SET_LINK); -  #endif /* CONFIG_PPC_HAS_HASH_64K */  	patch_branch(htab_call_hpte_insert1, -		FUNCTION_TEXT(ppc_md.hpte_insert), +		ppc_function_entry(ppc_md.hpte_insert),  		BRANCH_SET_LINK);  	patch_branch(htab_call_hpte_insert2, -		FUNCTION_TEXT(ppc_md.hpte_insert), +		ppc_function_entry(ppc_md.hpte_insert),  		BRANCH_SET_LINK);  	patch_branch(htab_call_hpte_remove, -		FUNCTION_TEXT(ppc_md.hpte_remove), +		ppc_function_entry(ppc_md.hpte_remove),  		BRANCH_SET_LINK);  	patch_branch(htab_call_hpte_updatepp, -		FUNCTION_TEXT(ppc_md.hpte_updatepp), +		ppc_function_entry(ppc_md.hpte_updatepp),  		BRANCH_SET_LINK);  } @@ -947,6 +979,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access,  		trap, vsid, ssize, psize, lpsize, pte);  } +static void check_paca_psize(unsigned long ea, struct mm_struct *mm, +			     int psize, bool user_region) +{ +	if (user_region) { +		if (psize != get_paca_psize(ea)) { +			get_paca()->context = mm->context; +			slb_flush_and_rebolt(); +		} +	} else if (get_paca()->vmalloc_sllp != +		   mmu_psize_defs[mmu_vmalloc_psize].sllp) { +		get_paca()->vmalloc_sllp = +			mmu_psize_defs[mmu_vmalloc_psize].sllp; +		slb_vmalloc_update(); +	} +} +  /* Result code is:   *  0 - handled   *  1 - normal page fault @@ -1068,6 +1116,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)  			WARN_ON(1);  		}  #endif +		check_paca_psize(ea, mm, psize, user_region); +  		goto bail;  	} @@ -1108,17 +1158,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)  #endif  		}  	} -	if (user_region) { -		if (psize != get_paca_psize(ea)) { -			get_paca()->context = mm->context; -			slb_flush_and_rebolt(); -		} -	} else if (get_paca()->vmalloc_sllp != -		   mmu_psize_defs[mmu_vmalloc_psize].sllp) { -		get_paca()->vmalloc_sllp = -			mmu_psize_defs[mmu_vmalloc_psize].sllp; -		slb_vmalloc_update(); -	} + +	check_paca_psize(ea, mm, psize, user_region);  #endif /* CONFIG_PPC_64K_PAGES */  #ifdef CONFIG_PPC_HAS_HASH_64K diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 34de9e0cdc3..826893fcb3a 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -127,7 +127,11 @@ repeat:  		/* Add in WIMG bits */  		rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | -				      _PAGE_COHERENT | _PAGE_GUARDED)); +				      _PAGE_GUARDED)); +		/* +		 * enable the memory coherence always +		 */ +		rflags |= HPTE_R_M;  		/* Insert into the hash table, primary slot */  		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 3bc700655fc..5e4ee257390 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -8,6 +8,44 @@  #include <linux/mm.h>  #include <linux/hugetlb.h> +#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC64 +static inline int tlb1_next(void) +{ +	struct paca_struct *paca = get_paca(); +	struct tlb_core_data *tcd; +	int this, next; + +	tcd = paca->tcd_ptr; +	this = tcd->esel_next; + +	next = this + 1; +	if (next >= tcd->esel_max) +		next = tcd->esel_first; + +	tcd->esel_next = next; +	return this; +} +#else +static inline int tlb1_next(void) +{ +	int index, ncams; + +	ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + +	index = __get_cpu_var(next_tlbcam_idx); + +	/* Just round-robin the entries and wrap when we hit the end */ +	if (unlikely(index == ncams - 1)) +		__get_cpu_var(next_tlbcam_idx) = tlbcam_index; +	else +		__get_cpu_var(next_tlbcam_idx)++; + +	return index; +} +#endif /* !PPC64 */ +#endif /* FSL */ +  static inline int mmu_get_tsize(int psize)  {  	return mmu_psize_defs[psize].enc; @@ -47,7 +85,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,  	struct mm_struct *mm;  #ifdef CONFIG_PPC_FSL_BOOK3E -	int index, ncams; +	int index;  #endif  	if (unlikely(is_kernel_addr(ea))) @@ -77,18 +115,11 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,  	}  #ifdef CONFIG_PPC_FSL_BOOK3E -	ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; -  	/* We have to use the CAM(TLB1) on FSL parts for hugepages */ -	index = __get_cpu_var(next_tlbcam_idx); +	index = tlb1_next();  	mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); - -	/* Just round-robin the entries and wrap when we hit the end */ -	if (unlikely(index == ncams - 1)) -		__get_cpu_var(next_tlbcam_idx) = tlbcam_index; -	else -		__get_cpu_var(next_tlbcam_idx)++;  #endif +  	mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);  	mas2 = ea & ~((1UL << shift) - 1);  	mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; @@ -103,7 +134,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,  	if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {  		mtspr(SPRN_MAS7_MAS3, mas7_3);  	} else { -		mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); +		if (mmu_has_feature(MMU_FTR_BIG_PHYS)) +			mtspr(SPRN_MAS7, upper_32_bits(mas7_3));  		mtspr(SPRN_MAS3, lower_32_bits(mas7_3));  	} @@ -117,6 +149,5 @@ void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)  	struct hstate *hstate = hstate_file(vma->vm_file);  	unsigned long tsize = huge_page_shift(hstate) - 10; -	__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0); - +	__flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);  } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0b7fb676101..a5bcf930119 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -99,6 +99,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  		/* Add in WIMG bits */  		rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |  				      _PAGE_COHERENT | _PAGE_GUARDED)); +		/* +		 * enable the memory coherence always +		 */ +		rflags |= HPTE_R_M;  		slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,  					     mmu_psize, ssize); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d67db4bd672..7e70ae968e5 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd)  	 */  	return ((pgd_val(pgd) & 0x3) != 0x0);  } - -int pmd_huge_support(void) -{ -	return 1; -}  #else  int pmd_huge(pmd_t pmd)  { @@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd)  {  	return 0;  } - -int pmd_huge_support(void) -{ -	return 0; -}  #endif  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) @@ -472,12 +462,13 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)  {  	struct hugepd_freelist **batchp; -	batchp = &__get_cpu_var(hugepd_freelist_cur); +	batchp = &get_cpu_var(hugepd_freelist_cur);  	if (atomic_read(&tlb->mm->mm_users) < 2 ||  	    cpumask_equal(mm_cpumask(tlb->mm),  			  cpumask_of(smp_processor_id()))) {  		kmem_cache_free(hugepte_cache, hugepte); +        put_cpu_var(hugepd_freelist_cur);  		return;  	} @@ -491,6 +482,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)  		call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);  		*batchp = NULL;  	} +	put_cpu_var(hugepd_freelist_cur);  }  #endif @@ -633,8 +625,6 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,  /*   * This function frees user-level page tables of a process. - * - * Must be called with pagetable lock held.   */  void hugetlb_free_pgd_range(struct mmu_gather *tlb,  			    unsigned long addr, unsigned long end, diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index d47d3dab487..cff59f1bec2 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -213,7 +213,12 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,  	 */  	BUG_ON(first_memblock_base != 0); +#ifdef CONFIG_PIN_TLB +	/* 8xx can only access 24MB at the moment */ +	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000)); +#else  	/* 8xx can only access 8MB at the moment */  	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +#endif  }  #endif /* CONFIG_8xx */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d0cd9e4c683..e3734edffa6 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -300,5 +300,58 @@ void vmemmap_free(unsigned long start, unsigned long end)  {  } -#endif /* CONFIG_SPARSEMEM_VMEMMAP */ +void register_page_bootmem_memmap(unsigned long section_nr, +				  struct page *start_page, unsigned long size) +{ +} + +/* + * We do not have access to the sparsemem vmemmap, so we fallback to + * walking the list of sparsemem blocks which we already maintain for + * the sake of crashdump. In the long run, we might want to maintain + * a tree if performance of that linear walk becomes a problem. + * + * realmode_pfn_to_page functions can fail due to: + * 1) As real sparsemem blocks do not lay in RAM continously (they + * are in virtual address space which is not available in the real mode), + * the requested page struct can be split between blocks so get_page/put_page + * may fail. + * 2) When huge pages are used, the get_page/put_page API will fail + * in real mode as the linked addresses in the page struct are virtual + * too. + */ +struct page *realmode_pfn_to_page(unsigned long pfn) +{ +	struct vmemmap_backing *vmem_back; +	struct page *page; +	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; +	unsigned long pg_va = (unsigned long) pfn_to_page(pfn); + +	for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) { +		if (pg_va < vmem_back->virt_addr) +			continue; + +		/* Check that page struct is not split between real pages */ +		if ((pg_va + sizeof(struct page)) > +				(vmem_back->virt_addr + page_size)) +			return NULL; + +		page = (struct page *) (vmem_back->phys + pg_va - +				vmem_back->virt_addr); +		return page; +	} + +	return NULL; +} +EXPORT_SYMBOL_GPL(realmode_pfn_to_page); + +#elif defined(CONFIG_FLATMEM) + +struct page *realmode_pfn_to_page(unsigned long pfn) +{ +	struct page *page = pfn_to_page(pfn); +	return page; +} +EXPORT_SYMBOL_GPL(realmode_pfn_to_page); +#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1cf9c5b67f2..2c8e90f5789 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -139,9 +139,14 @@ int arch_remove_memory(u64 start, u64 size)  	unsigned long start_pfn = start >> PAGE_SHIFT;  	unsigned long nr_pages = size >> PAGE_SHIFT;  	struct zone *zone; +	int ret;  	zone = page_zone(pfn_to_page(start_pfn)); -	return __remove_pages(zone, start_pfn, nr_pages); +	ret = __remove_pages(zone, start_pfn, nr_pages); +	if (!ret && (ppc_md.remove_memory)) +		ret = ppc_md.remove_memory(start, size); + +	return ret;  }  #endif  #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -209,7 +214,7 @@ void __init do_init_bootmem(void)  	/* Place all memblock_regions in the same node and merge contiguous  	 * memblock_regions  	 */ -	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); +	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);  	/* Add all physical memory to the bootmem map, mark each area  	 * present. @@ -297,12 +302,27 @@ void __init paging_init(void)  }  #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ +static void __init register_page_bootmem_info(void) +{ +	int i; + +	for_each_online_node(i) +		register_page_bootmem_info_node(NODE_DATA(i)); +} +  void __init mem_init(void)  { +	/* +	 * book3s is limited to 16 page sizes due to encoding this in +	 * a 4-bit field for slices. +	 */ +	BUILD_BUG_ON(MMU_PAGE_COUNT > 16); +  #ifdef CONFIG_SWIOTLB  	swiotlb_init(0);  #endif +	register_page_bootmem_info();  	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);  	set_max_mapnr(max_pfn);  	free_all_bootmem(); @@ -498,7 +518,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,   * System memory should not be in /proc/iomem but various tools expect it   * (eg kdump).   */ -static int add_system_ram_resources(void) +static int __init add_system_ram_resources(void)  {  	struct memblock_region *reg; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index af3d78e1930..928ebe79668 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -410,17 +410,7 @@ void __init mmu_context_init(void)  	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {  		first_context = 1;  		last_context = 65535; -	} else -#ifdef CONFIG_PPC_BOOK3E_MMU -	if (mmu_has_feature(MMU_FTR_TYPE_3E)) { -		u32 mmucfg = mfspr(SPRN_MMUCFG); -		u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK) -				>> MMUCFG_PIDSIZE_SHIFT; -		first_context = 1; -		last_context = (1UL << (pid_bits + 1)) - 1; -	} else -#endif -	{ +	} else {  		first_context = 1;  		last_context = 255;  	} diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 83eb5d5f53d..9615d82919b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,  extern void MMU_init_hw(void);  extern unsigned long mmu_mapin_ram(unsigned long top);  extern void adjust_total_lowmem(void); +extern int switch_to_as1(void); +extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);  #endif  extern void loadcam_entry(unsigned int index); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index c916127f10c..3b181b22cd4 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -31,6 +31,8 @@  #include <asm/sparsemem.h>  #include <asm/prom.h>  #include <asm/smp.h> +#include <asm/cputhreads.h> +#include <asm/topology.h>  #include <asm/firmware.h>  #include <asm/paca.h>  #include <asm/hvcall.h> @@ -152,9 +154,22 @@ static void __init get_node_active_region(unsigned long pfn,  	}  } -static void map_cpu_to_node(int cpu, int node) +static void reset_numa_cpu_lookup_table(void) +{ +	unsigned int cpu; + +	for_each_possible_cpu(cpu) +		numa_cpu_lookup_table[cpu] = -1; +} + +static void update_numa_cpu_lookup_table(unsigned int cpu, int node)  {  	numa_cpu_lookup_table[cpu] = node; +} + +static void map_cpu_to_node(int cpu, int node) +{ +	update_numa_cpu_lookup_table(cpu, node);  	dbg("adding cpu %d to node %d\n", cpu, node); @@ -195,7 +210,7 @@ static const __be32 *of_get_usable_memory(struct device_node *memory)  	u32 len;  	prop = of_get_property(memory, "linux,drconf-usable-memory", &len);  	if (!prop || len < sizeof(unsigned int)) -		return 0; +		return NULL;  	return prop;  } @@ -217,6 +232,7 @@ int __node_distance(int a, int b)  	return distance;  } +EXPORT_SYMBOL(__node_distance);  static void initialize_distance_lookup_table(int nid,  		const __be32 *associativity) @@ -522,11 +538,24 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,   */  static int numa_setup_cpu(unsigned long lcpu)  { -	int nid = 0; -	struct device_node *cpu = of_get_cpu_node(lcpu, NULL); +	int nid; +	struct device_node *cpu; + +	/* +	 * If a valid cpu-to-node mapping is already available, use it +	 * directly instead of querying the firmware, since it represents +	 * the most recent mapping notified to us by the platform (eg: VPHN). +	 */ +	if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { +		map_cpu_to_node(lcpu, nid); +		return nid; +	} + +	cpu = of_get_cpu_node(lcpu, NULL);  	if (!cpu) {  		WARN_ON(1); +		nid = 0;  		goto out;  	} @@ -542,16 +571,38 @@ out:  	return nid;  } +static void verify_cpu_node_mapping(int cpu, int node) +{ +	int base, sibling, i; + +	/* Verify that all the threads in the core belong to the same node */ +	base = cpu_first_thread_sibling(cpu); + +	for (i = 0; i < threads_per_core; i++) { +		sibling = base + i; + +		if (sibling == cpu || cpu_is_offline(sibling)) +			continue; + +		if (cpu_to_node(sibling) != node) { +			WARN(1, "CPU thread siblings %d and %d don't belong" +				" to the same node!\n", cpu, sibling); +			break; +		} +	} +} +  static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,  			     void *hcpu)  {  	unsigned long lcpu = (unsigned long)hcpu; -	int ret = NOTIFY_DONE; +	int ret = NOTIFY_DONE, nid;  	switch (action) {  	case CPU_UP_PREPARE:  	case CPU_UP_PREPARE_FROZEN: -		numa_setup_cpu(lcpu); +		nid = numa_setup_cpu(lcpu); +		verify_cpu_node_mapping((int)lcpu, nid);  		ret = NOTIFY_OK;  		break;  #ifdef CONFIG_HOTPLUG_CPU @@ -670,7 +721,8 @@ static void __init parse_drconf_memory(struct device_node *memory)  			node_set_online(nid);  			sz = numa_enforce_memory_limit(base, size);  			if (sz) -				memblock_set_node(base, sz, nid); +				memblock_set_node(base, sz, +						  &memblock.memory, nid);  		} while (--ranges);  	}  } @@ -760,7 +812,7 @@ new_range:  				continue;  		} -		memblock_set_node(start, size, nid); +		memblock_set_node(start, size, &memblock.memory, nid);  		if (--ranges)  			goto new_range; @@ -797,7 +849,8 @@ static void __init setup_nonnuma(void)  		fake_numa_create_new_node(end_pfn, &nid);  		memblock_set_node(PFN_PHYS(start_pfn), -				  PFN_PHYS(end_pfn - start_pfn), nid); +				  PFN_PHYS(end_pfn - start_pfn), +				  &memblock.memory, nid);  		node_set_online(nid);  	}  } @@ -938,8 +991,7 @@ static void __init mark_reserved_regions_for_nid(int nid)  		unsigned long start_pfn = physbase >> PAGE_SHIFT;  		unsigned long end_pfn = PFN_UP(physbase + size);  		struct node_active_region node_ar; -		unsigned long node_end_pfn = node->node_start_pfn + -					     node->node_spanned_pages; +		unsigned long node_end_pfn = pgdat_end_pfn(node);  		/*  		 * Check to make sure that this memblock.reserved area is @@ -1068,6 +1120,7 @@ void __init do_init_bootmem(void)  	 */  	setup_node_to_cpumask_map(); +	reset_numa_cpu_lookup_table();  	register_cpu_notifier(&ppc64_numa_nb);  	cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,  			  (void *)(unsigned long)boot_cpuid); @@ -1154,7 +1207,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,   * represented in the device tree as a node (i.e. memory@XXXX) for   * each memblock.   */ -int hot_add_node_scn_to_nid(unsigned long scn_addr) +static int hot_add_node_scn_to_nid(unsigned long scn_addr)  {  	struct device_node *memory;  	int nid = -1; @@ -1235,7 +1288,7 @@ static u64 hot_add_drconf_memory_max(void)          struct device_node *memory = NULL;          unsigned int drconf_cell_cnt = 0;          u64 lmb_size = 0; -	const __be32 *dm = 0; +	const __be32 *dm = NULL;          memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");          if (memory) { @@ -1446,6 +1499,33 @@ static int update_cpu_topology(void *data)  	return 0;  } +static int update_lookup_table(void *data) +{ +	struct topology_update_data *update; + +	if (!data) +		return -EINVAL; + +	/* +	 * Upon topology update, the numa-cpu lookup table needs to be updated +	 * for all threads in the core, including offline CPUs, to ensure that +	 * future hotplug operations respect the cpu-to-node associativity +	 * properly. +	 */ +	for (update = data; update; update = update->next) { +		int nid, base, j; + +		nid = update->new_nid; +		base = cpu_first_thread_sibling(update->cpu); + +		for (j = 0; j < threads_per_core; j++) { +			update_numa_cpu_lookup_table(base + j, nid); +		} +	} + +	return 0; +} +  /*   * Update the node maps and sysfs entries for each cpu whose home node   * has changed. Returns 1 when the topology has changed, and 0 otherwise. @@ -1512,8 +1592,30 @@ int arch_update_cpu_topology(void)  		cpu = cpu_last_thread_sibling(cpu);  	} +	/* +	 * In cases where we have nothing to update (because the updates list +	 * is too short or because the new topology is same as the old one), +	 * skip invoking update_cpu_topology() via stop-machine(). This is +	 * necessary (and not just a fast-path optimization) since stop-machine +	 * can end up electing a random CPU to run update_cpu_topology(), and +	 * thus trick us into setting up incorrect cpu-node mappings (since +	 * 'updates' is kzalloc()'ed). +	 * +	 * And for the similar reason, we will skip all the following updating. +	 */ +	if (!cpumask_weight(&updated_cpus)) +		goto out; +  	stop_machine(update_cpu_topology, &updates[0], &updated_cpus); +	/* +	 * Update the numa-cpu lookup table with the new mappings, even for +	 * offline CPUs. It is best to perform this update from the stop- +	 * machine context. +	 */ +	stop_machine(update_lookup_table, &updates[0], +					cpumask_of(raw_smp_processor_id())); +  	for (ud = &updates[0]; ud; ud = ud->next) {  		unregister_cpu_under_node(ud->cpu, ud->old_nid);  		register_cpu_under_node(ud->cpu, ud->new_nid); @@ -1525,6 +1627,7 @@ int arch_update_cpu_topology(void)  		changed = 1;  	} +out:  	kfree(updates);  	return changed;  } @@ -1535,7 +1638,7 @@ static void topology_work_fn(struct work_struct *work)  }  static DECLARE_WORK(topology_work, topology_work_fn); -void topology_schedule_update(void) +static void topology_schedule_update(void)  {  	schedule_work(&topology_work);  } @@ -1698,7 +1801,7 @@ static const struct file_operations topology_ops = {  static int topology_update_init(void)  {  	start_topology_update(); -	proc_create("powerpc/topology_updates", 644, NULL, &topology_ops); +	proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);  	return 0;  } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index edda589795c..c695943a513 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -24,7 +24,6 @@  #include <linux/kernel.h>  #include <linux/gfp.h>  #include <linux/mm.h> -#include <linux/init.h>  #include <linux/percpu.h>  #include <linux/hardirq.h>  #include <linux/hugetlb.h> @@ -32,8 +31,6 @@  #include <asm/tlbflush.h>  #include <asm/tlb.h> -#include "mmu_decl.h" -  static inline int is_exec_fault(void)  {  	return current->thread.regs && TRAP(current->thread.regs) == 0x400; @@ -72,7 +69,7 @@ struct page * maybe_pte_to_page(pte_t pte)   * support falls into the same category.   */ -static pte_t set_pte_filter(pte_t pte, unsigned long addr) +static pte_t set_pte_filter(pte_t pte)  {  	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);  	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || @@ -81,17 +78,6 @@ static pte_t set_pte_filter(pte_t pte, unsigned long addr)  		if (!pg)  			return pte;  		if (!test_bit(PG_arch_1, &pg->flags)) { -#ifdef CONFIG_8xx -			/* On 8xx, cache control instructions (particularly -			 * "dcbst" from flush_dcache_icache) fault as write -			 * operation if there is an unpopulated TLB entry -			 * for the address in question. To workaround that, -			 * we invalidate the TLB here, thus avoiding dcbst -			 * misbehaviour. -			 */ -			/* 8xx doesn't care about PID, size or ind args */ -			_tlbil_va(addr, 0, 0, 0); -#endif /* CONFIG_8xx */  			flush_dcache_icache_page(pg);  			set_bit(PG_arch_1, &pg->flags);  		} @@ -111,7 +97,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,   * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so   * instead we "filter out" the exec permission for non clean pages.   */ -static pte_t set_pte_filter(pte_t pte, unsigned long addr) +static pte_t set_pte_filter(pte_t pte)  {  	struct page *pg; @@ -187,13 +173,13 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,  		pte_t pte)  {  #ifdef CONFIG_DEBUG_VM -	WARN_ON(pte_present(*ptep)); +	WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);  #endif  	/* Note: mm->context.id might not yet have been assigned as  	 * this context might not have been activated yet when this  	 * is called.  	 */ -	pte = set_pte_filter(pte, addr); +	pte = set_pte_filter(pte);  	/* Perform the setting of the PTE */  	__set_pte_at(mm, addr, ptep, pte, 0); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6c856fb8c15..343a87fa78b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -121,7 +121,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)  	ptepage = alloc_pages(flags, 0);  	if (!ptepage)  		return NULL; -	pgtable_page_ctor(ptepage); +	if (!pgtable_page_ctor(ptepage)) { +		__free_page(ptepage); +		return NULL; +	}  	return ptepage;  } @@ -296,6 +299,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)  		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,  						     __pgprot(flags)));  	} +	smp_wmb();  	return err;  } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 536eec72c0f..f6ce1f111f5 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@  #include <linux/swap.h>  #include <linux/stddef.h>  #include <linux/vmalloc.h> -#include <linux/init.h>  #include <linux/bootmem.h>  #include <linux/memblock.h>  #include <linux/slab.h> @@ -153,6 +152,18 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags)  		}  #endif /* !CONFIG_PPC_MMU_NOHASH */  	} + +#ifdef CONFIG_PPC_BOOK3E_64 +	/* +	 * With hardware tablewalk, a sync is needed to ensure that +	 * subsequent accesses see the PTE we just wrote.  Unlike userspace +	 * mappings, we can't tolerate spurious faults, so make sure +	 * the new PTE will be seen the first time. +	 */ +	mb(); +#else +	smp_wmb(); +#endif  	return 0;  } @@ -378,6 +389,10 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)  				       __GFP_REPEAT | __GFP_ZERO);  	if (!page)  		return NULL; +	if (!kernel && !pgtable_page_ctor(page)) { +		__free_page(page); +		return NULL; +	}  	ret = page_address(page);  	spin_lock(&mm->page_table_lock); @@ -392,9 +407,6 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)  	}  	spin_unlock(&mm->page_table_lock); -	if (!kernel) -		pgtable_page_ctor(page); -  	return (pte_t *)ret;  } @@ -498,7 +510,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,  }  unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, -				  pmd_t *pmdp, unsigned long clr) +				  pmd_t *pmdp, unsigned long clr, +				  unsigned long set)  {  	unsigned long old, tmp; @@ -514,14 +527,15 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,  		andi.	%1,%0,%6\n\  		bne-	1b \n\  		andc	%1,%0,%4 \n\ +		or	%1,%1,%7\n\  		stdcx.	%1,0,%3 \n\  		bne-	1b"  	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp) -	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY) +	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)  	: "cc" );  #else  	old = pmd_val(*pmdp); -	*pmdp = __pmd(old & ~clr); +	*pmdp = __pmd((old & ~clr) | set);  #endif  	if (old & _PAGE_HASHPTE)  		hpte_do_hugepage_flush(mm, addr, pmdp); @@ -633,6 +647,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,  		if (old & _PAGE_HASHPTE)  			hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);  	} +	/* +	 * This ensures that generic code that rely on IRQ disabling +	 * to prevent a parallel THP split work as expected. +	 */ +	kick_all_cpus_sync();  }  /* @@ -686,7 +705,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,  		pmd_t *pmdp, pmd_t pmd)  {  #ifdef CONFIG_DEBUG_VM -	WARN_ON(!pmd_none(*pmdp)); +	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);  	assert_spin_locked(&mm->page_table_lock);  	WARN_ON(!pmd_trans_huge(pmd));  #endif @@ -696,7 +715,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,  void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,  		     pmd_t *pmdp)  { -	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT); +	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);  }  /* @@ -823,7 +842,7 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,  	unsigned long old;  	pgtable_t *pgtable_slot; -	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL); +	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);  	old_pmd = __pmd(old);  	/*  	 * We have pmd == none and we are holding page_table_lock. diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 9d1d33cd2be..0399a670295 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -97,7 +97,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,  static void __slb_flush_and_rebolt(void)  {  	/* If you change this make sure you change SLB_NUM_BOLTED -	 * appropriately too. */ +	 * and PR KVM appropriately too. */  	unsigned long linear_llp, vmalloc_llp, lflags, vflags;  	unsigned long ksp_esid_data, ksp_vsid_data; @@ -256,10 +256,14 @@ static inline void patch_slb_encoding(unsigned int *insn_addr,  	patch_instruction(insn_addr, insn);  } +extern u32 slb_compare_rr_to_size[]; +extern u32 slb_miss_kernel_load_linear[]; +extern u32 slb_miss_kernel_load_io[]; +extern u32 slb_compare_rr_to_size[]; +extern u32 slb_miss_kernel_load_vmemmap[]; +  void slb_set_size(u16 size)  { -	extern unsigned int *slb_compare_rr_to_size; -  	if (mmu_slb_size == size)  		return; @@ -272,11 +276,7 @@ void slb_initialize(void)  	unsigned long linear_llp, vmalloc_llp, io_llp;  	unsigned long lflags, vflags;  	static int slb_encoding_inited; -	extern unsigned int *slb_miss_kernel_load_linear; -	extern unsigned int *slb_miss_kernel_load_io; -	extern unsigned int *slb_compare_rr_to_size;  #ifdef CONFIG_SPARSEMEM_VMEMMAP -	extern unsigned int *slb_miss_kernel_load_vmemmap;  	unsigned long vmemmap_llp;  #endif diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 17aa6dfceb3..736d18b3cef 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -35,7 +35,7 @@ _GLOBAL(slb_allocate_realmode)  	 * check for bad kernel/user address  	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE  	 */ -	rldicr. r9,r3,4,(63 - 46 - 4) +	rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)  	bne-	8f  	srdi	r9,r3,60		/* get region */ @@ -59,7 +59,8 @@ _GLOBAL(slb_allocate_realmode)  	/* Linear mapping encoding bits, the "li" instruction below will  	 * be patched by the kernel at boot  	 */ -_GLOBAL(slb_miss_kernel_load_linear) +.globl slb_miss_kernel_load_linear +slb_miss_kernel_load_linear:  	li	r11,0  	/*  	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 @@ -79,7 +80,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)  	/* Check virtual memmap region. To be patches at kernel boot */  	cmpldi	cr0,r9,0xf  	bne	1f -_GLOBAL(slb_miss_kernel_load_vmemmap) +.globl slb_miss_kernel_load_vmemmap +slb_miss_kernel_load_vmemmap:  	li	r11,0  	b	6f  1: @@ -95,7 +97,8 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)  	b	6f  5:  	/* IO mapping */ -	_GLOBAL(slb_miss_kernel_load_io) +.globl slb_miss_kernel_load_io +slb_miss_kernel_load_io:  	li	r11,0  6:  	/* @@ -250,7 +253,8 @@ slb_finish_load:  7:	ld	r10,PACASTABRR(r13)  	addi	r10,r10,1  	/* This gets soft patched on boot. */ -_GLOBAL(slb_compare_rr_to_size) +.globl slb_compare_rr_to_size +slb_compare_rr_to_size:  	cmpldi	r10,0  	blt+	4f diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 3e99c149271..b0c75cc15ef 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr,  		slice = GET_HIGH_SLICE_INDEX(addr);  		*boundary_addr = (slice + end) ?  			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; -		return !!(available.high_slices & (1u << slice)); +		return !!(available.high_slices & (1ul << slice));  	}  } @@ -408,7 +408,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,  	if (fixed && (addr & ((1ul << pshift) - 1)))  		return -EINVAL;  	if (fixed && addr > (mm->task_size - len)) -		return -EINVAL; +		return -ENOMEM;  	/* If hint, make sure it matches our alignment restrictions */  	if (!fixed && addr) { diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index a770df2dae7..6c0b1f5f8d2 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -78,7 +78,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,  	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);  	arch_enter_lazy_mmu_mode();  	for (; npages > 0; --npages) { -		pte_update(mm, addr, pte, 0, 0); +		pte_update(mm, addr, pte, 0, 0, 0);  		addr += PAGE_SIZE;  		++pte;  	} diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 36e44b4260e..c99f6510a0b 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -23,7 +23,6 @@  #include <linux/kernel.h>  #include <linux/mm.h> -#include <linux/init.h>  #include <linux/percpu.h>  #include <linux/hardirq.h>  #include <asm/pgalloc.h> diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index b4113bf8635..356e8b41fb0 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -39,37 +39,49 @@   *                                                                    *   **********************************************************************/ +/* + * Note that, unlike non-bolted handlers, TLB_EXFRAME is not + * modified by the TLB miss handlers themselves, since the TLB miss + * handler code will not itself cause a recursive TLB miss. + * + * TLB_EXFRAME will be modified when crit/mc/debug exceptions are + * entered/exited. + */  .macro tlb_prolog_bolted intnum addr -	mtspr	SPRN_SPRG_GEN_SCRATCH,r13 +	mtspr	SPRN_SPRG_GEN_SCRATCH,r12 +	mfspr	r12,SPRN_SPRG_TLB_EXFRAME +	std	r13,EX_TLB_R13(r12) +	std	r10,EX_TLB_R10(r12)  	mfspr	r13,SPRN_SPRG_PACA -	std	r10,PACA_EXTLB+EX_TLB_R10(r13) +  	mfcr	r10 -	std	r11,PACA_EXTLB+EX_TLB_R11(r13) +	std	r11,EX_TLB_R11(r12)  #ifdef CONFIG_KVM_BOOKE_HV  BEGIN_FTR_SECTION  	mfspr	r11, SPRN_SRR1  END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)  #endif  	DO_KVM	\intnum, SPRN_SRR1 -	std	r16,PACA_EXTLB+EX_TLB_R16(r13) +	std	r16,EX_TLB_R16(r12)  	mfspr	r16,\addr		/* get faulting address */ -	std	r14,PACA_EXTLB+EX_TLB_R14(r13) +	std	r14,EX_TLB_R14(r12)  	ld	r14,PACAPGD(r13) -	std	r15,PACA_EXTLB+EX_TLB_R15(r13) -	std	r10,PACA_EXTLB+EX_TLB_CR(r13) -	TLB_MISS_PROLOG_STATS_BOLTED +	std	r15,EX_TLB_R15(r12) +	std	r10,EX_TLB_CR(r12) +	TLB_MISS_PROLOG_STATS  .endm  .macro tlb_epilog_bolted -	ld	r14,PACA_EXTLB+EX_TLB_CR(r13) -	ld	r10,PACA_EXTLB+EX_TLB_R10(r13) -	ld	r11,PACA_EXTLB+EX_TLB_R11(r13) +	ld	r14,EX_TLB_CR(r12) +	ld	r10,EX_TLB_R10(r12) +	ld	r11,EX_TLB_R11(r12) +	ld	r13,EX_TLB_R13(r12)  	mtcr	r14 -	ld	r14,PACA_EXTLB+EX_TLB_R14(r13) -	ld	r15,PACA_EXTLB+EX_TLB_R15(r13) -	TLB_MISS_RESTORE_STATS_BOLTED -	ld	r16,PACA_EXTLB+EX_TLB_R16(r13) -	mfspr	r13,SPRN_SPRG_GEN_SCRATCH +	ld	r14,EX_TLB_R14(r12) +	ld	r15,EX_TLB_R15(r12) +	TLB_MISS_RESTORE_STATS +	ld	r16,EX_TLB_R16(r12) +	mfspr	r12,SPRN_SPRG_GEN_SCRATCH  .endm  /* Data TLB miss */ @@ -136,7 +148,7 @@ BEGIN_MMU_FTR_SECTION  	 */  	PPC_TLBSRX_DOT(0,R16)  	ldx	r14,r14,r15		/* grab pgd entry */ -	beq	normal_tlb_miss_done	/* tlb exists already, bail */ +	beq	tlb_miss_done_bolted	/* tlb exists already, bail */  MMU_FTR_SECTION_ELSE  	ldx	r14,r14,r15		/* grab pgd entry */  ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) @@ -192,6 +204,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)  	mtspr	SPRN_MAS7_MAS3,r15  	tlbwe +tlb_miss_done_bolted:  	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)  	tlb_epilog_bolted  	rfi @@ -239,6 +252,183 @@ itlb_miss_fault_bolted:  	beq	tlb_miss_common_bolted  	b	itlb_miss_kernel_bolted +#ifdef CONFIG_PPC_FSL_BOOK3E +/* + * TLB miss handling for e6500 and derivatives, using hardware tablewalk. + * + * Linear mapping is bolted: no virtual page table or nested TLB misses + * Indirect entries in TLB1, hardware loads resulting direct entries + *    into TLB0 + * No HES or NV hint on TLB1, so we need to do software round-robin + * No tlbsrx. so we need a spinlock, and we have to deal + *    with MAS-damage caused by tlbsx + * 4K pages only + */ + +	START_EXCEPTION(instruction_tlb_miss_e6500) +	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 + +	ld	r11,PACA_TCD_PTR(r13) +	srdi.	r15,r16,60		/* get region */ +	ori	r16,r16,1 + +	TLB_MISS_STATS_SAVE_INFO_BOLTED +	bne	tlb_miss_kernel_e6500	/* user/kernel test */ + +	b	tlb_miss_common_e6500 + +	START_EXCEPTION(data_tlb_miss_e6500) +	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR + +	ld	r11,PACA_TCD_PTR(r13) +	srdi.	r15,r16,60		/* get region */ +	rldicr	r16,r16,0,62 + +	TLB_MISS_STATS_SAVE_INFO_BOLTED +	bne	tlb_miss_kernel_e6500	/* user vs kernel check */ + +/* + * This is the guts of the TLB miss handler for e6500 and derivatives. + * We are entered with: + * + * r16 = page of faulting address (low bit 0 if data, 1 if instruction) + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = tlb_per_core ptr + * r10 = cpu number + */ +tlb_miss_common_e6500: +	/* +	 * Search if we already have an indirect entry for that virtual +	 * address, and if we do, bail out. +	 * +	 * MAS6:IND should be already set based on MAS4 +	 */ +1:	lbarx	r15,0,r11 +	lhz	r10,PACAPACAINDEX(r13) +	cmpdi	r15,0 +	cmpdi	cr1,r15,1	/* set cr1.eq = 0 for non-recursive */ +	bne	2f +	stbcx.	r10,0,r11 +	bne	1b +3: +	.subsection 1 +2:	cmpd	cr1,r15,r10	/* recursive lock due to mcheck/crit/etc? */ +	beq	cr1,3b		/* unlock will happen if cr1.eq = 0 */ +	lbz	r15,0(r11) +	cmpdi	r15,0 +	bne	2b +	b	1b +	.previous + +	mfspr	r15,SPRN_MAS2 + +	tlbsx	0,r16 +	mfspr	r10,SPRN_MAS1 +	andis.	r10,r10,MAS1_VALID@h +	bne	tlb_miss_done_e6500 + +	/* Undo MAS-damage from the tlbsx */ +	mfspr	r10,SPRN_MAS1 +	oris	r10,r10,MAS1_VALID@h +	mtspr	SPRN_MAS1,r10 +	mtspr	SPRN_MAS2,r15 + +	/* Now, we need to walk the page tables. First check if we are in +	 * range. +	 */ +	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 +	bne-	tlb_miss_fault_e6500 + +	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 +	cmpldi	cr0,r14,0 +	clrrdi	r15,r15,3 +	beq-	tlb_miss_fault_e6500 /* No PGDIR, bail */ +	ldx	r14,r14,r15		/* grab pgd entry */ + +	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 +	clrrdi	r15,r15,3 +	cmpdi	cr0,r14,0 +	bge	tlb_miss_fault_e6500	/* Bad pgd entry or hugepage; bail */ +	ldx	r14,r14,r15		/* grab pud entry */ + +	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 +	clrrdi	r15,r15,3 +	cmpdi	cr0,r14,0 +	bge	tlb_miss_fault_e6500 +	ldx	r14,r14,r15		/* Grab pmd entry */ + +	mfspr	r10,SPRN_MAS0 +	cmpdi	cr0,r14,0 +	bge	tlb_miss_fault_e6500 + +	/* Now we build the MAS for a 2M indirect page: +	 * +	 * MAS 0   :	ESEL needs to be filled by software round-robin +	 * MAS 1   :	Fully set up +	 *               - PID already updated by caller if necessary +	 *               - TSIZE for now is base ind page size always +	 *               - TID already cleared if necessary +	 * MAS 2   :	Default not 2M-aligned, need to be redone +	 * MAS 3+7 :	Needs to be done +	 */ + +	ori	r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) +	mtspr	SPRN_MAS7_MAS3,r14 + +	clrrdi	r15,r16,21		/* make EA 2M-aligned */ +	mtspr	SPRN_MAS2,r15 + +	lbz	r15,TCD_ESEL_NEXT(r11) +	lbz	r16,TCD_ESEL_MAX(r11) +	lbz	r14,TCD_ESEL_FIRST(r11) +	rlwimi	r10,r15,16,0x00ff0000	/* insert esel_next into MAS0 */ +	addi	r15,r15,1		/* increment esel_next */ +	mtspr	SPRN_MAS0,r10 +	cmpw	r15,r16 +	iseleq	r15,r14,r15		/* if next == last use first */ +	stb	r15,TCD_ESEL_NEXT(r11) + +	tlbwe + +tlb_miss_done_e6500: +	.macro	tlb_unlock_e6500 +	beq	cr1,1f		/* no unlock if lock was recursively grabbed */ +	li	r15,0 +	isync +	stb	r15,0(r11) +1: +	.endm + +	tlb_unlock_e6500 +	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) +	tlb_epilog_bolted +	rfi + +tlb_miss_kernel_e6500: +	mfspr	r10,SPRN_MAS1 +	ld	r14,PACA_KERNELPGD(r13) +	cmpldi	cr0,r15,8		/* Check for vmalloc region */ +	rlwinm	r10,r10,0,16,1		/* Clear TID */ +	mtspr	SPRN_MAS1,r10 +	beq+	tlb_miss_common_e6500 + +tlb_miss_fault_e6500: +	tlb_unlock_e6500 +	/* We need to check if it was an instruction miss */ +	andi.	r16,r16,1 +	bne	itlb_miss_fault_e6500 +dtlb_miss_fault_e6500: +	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) +	tlb_epilog_bolted +	b	exc_data_storage_book3e +itlb_miss_fault_e6500: +	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) +	tlb_epilog_bolted +	b	exc_instruction_storage_book3e +#endif /* CONFIG_PPC_FSL_BOOK3E */ +  /**********************************************************************   *                                                                    *   * TLB miss handling for Book3E with TLB reservation and HES support  * @@ -918,7 +1108,8 @@ tlb_load_linear:  	ld	r11,PACATOC(r13)  	ld	r11,linear_map_top@got(r11)  	ld	r10,0(r11) -	cmpld	cr0,r10,r16 +	tovirt(10,10) +	cmpld	cr0,r16,r10  	bge	tlb_load_linear_fault  	/* MAS1 need whole new setup. */ diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 41cd68dee68..92cb18d52ea 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -43,6 +43,7 @@  #include <asm/tlb.h>  #include <asm/code-patching.h>  #include <asm/hugetlb.h> +#include <asm/paca.h>  #include "mmu_decl.h" @@ -58,6 +59,10 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {  		.shift	= 12,  		.enc	= BOOK3E_PAGESZ_4K,  	}, +	[MMU_PAGE_2M] = { +		.shift	= 21, +		.enc	= BOOK3E_PAGESZ_2M, +	},  	[MMU_PAGE_4M] = {  		.shift	= 22,  		.enc	= BOOK3E_PAGESZ_4M, @@ -136,9 +141,18 @@ static inline int mmu_get_tsize(int psize)  int mmu_linear_psize;		/* Page size used for the linear mapping */  int mmu_pte_psize;		/* Page size used for PTE pages */  int mmu_vmemmap_psize;		/* Page size used for the virtual mem map */ -int book3e_htw_enabled;		/* Is HW tablewalk enabled ? */ +int book3e_htw_mode;		/* HW tablewalk?  Value is PPC_HTW_* */  unsigned long linear_map_top;	/* Top of linear mapping */ + +/* + * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug + * exceptions.  This is used for bolted and e6500 TLB miss handlers which + * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, + * this is set to zero. + */ +int extlb_level_exc; +  #endif /* CONFIG_PPC64 */  #ifdef CONFIG_PPC_FSL_BOOK3E @@ -305,7 +319,7 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,  void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)  {  #ifdef CONFIG_HUGETLB_PAGE -	if (is_vm_hugetlb_page(vma)) +	if (vma && is_vm_hugetlb_page(vma))  		flush_hugetlb_page(vma, vmaddr);  #endif @@ -377,7 +391,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)  {  	int tsize = mmu_psize_defs[mmu_pte_psize].enc; -	if (book3e_htw_enabled) { +	if (book3e_htw_mode != PPC_HTW_NONE) {  		unsigned long start = address & PMD_MASK;  		unsigned long end = address + PMD_SIZE;  		unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; @@ -430,7 +444,7 @@ static void setup_page_sizes(void)  			def = &mmu_psize_defs[psize];  			shift = def->shift; -			if (shift == 0) +			if (shift == 0 || shift & 1)  				continue;  			/* adjust to be in terms of 4^shift Kb */ @@ -440,21 +454,40 @@ static void setup_page_sizes(void)  				def->flags |= MMU_PAGE_SIZE_DIRECT;  		} -		goto no_indirect; +		goto out;  	}  	if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { -		u32 tlb1ps = mfspr(SPRN_TLB1PS); +		u32 tlb1cfg, tlb1ps; + +		tlb0cfg = mfspr(SPRN_TLB0CFG); +		tlb1cfg = mfspr(SPRN_TLB1CFG); +		tlb1ps = mfspr(SPRN_TLB1PS); +		eptcfg = mfspr(SPRN_EPTCFG); + +		if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) +			book3e_htw_mode = PPC_HTW_E6500; + +		/* +		 * We expect 4K subpage size and unrestricted indirect size. +		 * The lack of a restriction on indirect size is a Freescale +		 * extension, indicated by PSn = 0 but SPSn != 0. +		 */ +		if (eptcfg != 2) +			book3e_htw_mode = PPC_HTW_NONE;  		for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {  			struct mmu_psize_def *def = &mmu_psize_defs[psize];  			if (tlb1ps & (1U << (def->shift - 10))) {  				def->flags |= MMU_PAGE_SIZE_DIRECT; + +				if (book3e_htw_mode && psize == MMU_PAGE_2M) +					def->flags |= MMU_PAGE_SIZE_INDIRECT;  			}  		} -		goto no_indirect; +		goto out;  	}  #endif @@ -471,8 +504,11 @@ static void setup_page_sizes(void)  	}  	/* Indirect page sizes supported ? */ -	if ((tlb0cfg & TLBnCFG_IND) == 0) -		goto no_indirect; +	if ((tlb0cfg & TLBnCFG_IND) == 0 || +	    (tlb0cfg & TLBnCFG_PT) == 0) +		goto out; + +	book3e_htw_mode = PPC_HTW_IBM;  	/* Now, we only deal with one IND page size for each  	 * direct size. Hopefully all implementations today are @@ -497,8 +533,8 @@ static void setup_page_sizes(void)  				def->ind = ps + 10;  		}  	} - no_indirect: +out:  	/* Cleanup array and print summary */  	pr_info("MMU: Supported page sizes\n");  	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { @@ -518,44 +554,28 @@ static void setup_page_sizes(void)  	}  } -static void __patch_exception(int exc, unsigned long addr) -{ -	extern unsigned int interrupt_base_book3e; - 	unsigned int *ibase = &interrupt_base_book3e; -  -	/* Our exceptions vectors start with a NOP and -then- a branch -	 * to deal with single stepping from userspace which stops on -	 * the second instruction. Thus we need to patch the second -	 * instruction of the exception, not the first one -	 */ - -	patch_branch(ibase + (exc / 4) + 1, addr, 0); -} - -#define patch_exception(exc, name) do { \ -	extern unsigned int name; \ -	__patch_exception((exc), (unsigned long)&name); \ -} while (0) -  static void setup_mmu_htw(void)  { -	/* Check if HW tablewalk is present, and if yes, enable it by: -	 * -	 * - patching the TLB miss handlers to branch to the -	 *   one dedicates to it -	 * -	 * - setting the global book3e_htw_enabled -       	 */ -	unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); +	/* +	 * If we want to use HW tablewalk, enable it by patching the TLB miss +	 * handlers to branch to the one dedicated to it. +	 */ -	if ((tlb0cfg & TLBnCFG_IND) && -	    (tlb0cfg & TLBnCFG_PT)) { +	switch (book3e_htw_mode) { +	case PPC_HTW_IBM:  		patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);  		patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); -		book3e_htw_enabled = 1; +		break; +#ifdef CONFIG_PPC_FSL_BOOK3E +	case PPC_HTW_E6500: +		extlb_level_exc = EX_TLB_SIZE; +		patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); +		patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); +		break; +#endif  	}  	pr_info("MMU: Book3E HW tablewalk %s\n", -		book3e_htw_enabled ? "enabled" : "not supported"); +		book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");  }  /* @@ -576,8 +596,13 @@ static void __early_init_mmu(int boot_cpu)  	/* XXX This should be decided at runtime based on supported  	 * page sizes in the TLB, but for now let's assume 16M is  	 * always there and a good fit (which it probably is) +	 * +	 * Freescale booke only supports 4K pages in TLB0, so use that.  	 */ -	mmu_vmemmap_psize = MMU_PAGE_16M; +	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) +		mmu_vmemmap_psize = MMU_PAGE_4K; +	else +		mmu_vmemmap_psize = MMU_PAGE_16M;  	/* XXX This code only checks for TLB 0 capabilities and doesn't  	 *     check what page size combos are supported by the HW. It @@ -595,8 +620,16 @@ static void __early_init_mmu(int boot_cpu)  	/* Set MAS4 based on page table setting */  	mas4 = 0x4 << MAS4_WIMGED_SHIFT; -	if (book3e_htw_enabled) { -		mas4 |= mas4 | MAS4_INDD; +	switch (book3e_htw_mode) { +	case PPC_HTW_E6500: +		mas4 |= MAS4_INDD; +		mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; +		mas4 |= MAS4_TLBSELD(1); +		mmu_pte_psize = MMU_PAGE_2M; +		break; + +	case PPC_HTW_IBM: +		mas4 |= MAS4_INDD;  #ifdef CONFIG_PPC_64K_PAGES  		mas4 |=	BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;  		mmu_pte_psize = MMU_PAGE_256M; @@ -604,13 +637,16 @@ static void __early_init_mmu(int boot_cpu)  		mas4 |=	BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;  		mmu_pte_psize = MMU_PAGE_1M;  #endif -	} else { +		break; + +	case PPC_HTW_NONE:  #ifdef CONFIG_PPC_64K_PAGES  		mas4 |=	BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;  #else  		mas4 |=	BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;  #endif  		mmu_pte_psize = mmu_virtual_psize; +		break;  	}  	mtspr(SPRN_MAS4, mas4); @@ -630,8 +666,12 @@ static void __early_init_mmu(int boot_cpu)  		/* limit memory so we dont have linear faults */  		memblock_enforce_memory_limit(linear_map_top); -		patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); -		patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); +		if (book3e_htw_mode == PPC_HTW_NONE) { +			extlb_level_exc = EX_TLB_SIZE; +			patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); +			patch_exception(0x1e0, +				exc_instruction_tlb_miss_bolted_book3e); +		}  	}  #endif diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 626ad081639..43ff3c797fb 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -402,7 +402,9 @@ _GLOBAL(set_context)   * Load TLBCAM[index] entry in to the L2 CAM MMU   */  _GLOBAL(loadcam_entry) -	LOAD_REG_ADDR(r4, TLBCAM) +	mflr	r5 +	LOAD_REG_ADDR_PIC(r4, TLBCAM) +	mtlr	r5  	mulli	r5,r3,TLBCAM_SIZE  	add	r3,r5,r4  	lwz	r4,TLBCAM_MAS0(r3)  | 
