diff options
Diffstat (limited to 'arch/mips/mm')
35 files changed, 3420 insertions, 1510 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index d679c772d08..7f4f93ab22b 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,34 +3,24 @@  #  obj-y				+= cache.o dma-default.o extable.o fault.o \ -				   init.o tlbex.o tlbex-fault.o uasm.o page.o +				   gup.o init.o mmap.o page.o page-funcs.o \ +				   tlbex.o tlbex-fault.o tlb-funcs.o uasm-mips.o  obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o  obj-$(CONFIG_64BIT)		+= pgtable-64.o  obj-$(CONFIG_HIGHMEM)		+= highmem.o  obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o -obj-$(CONFIG_CPU_LOONGSON2)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_MIPS32)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_MIPS64)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_NEVADA)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R10000)	+= c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o  obj-$(CONFIG_CPU_R3000)		+= c-r3k.o tlb-r3k.o -obj-$(CONFIG_CPU_R4300)		+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R4X00)		+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R5000)		+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R5432)		+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_R5500)		+= c-r4k.o cex-gen.o tlb-r4k.o  obj-$(CONFIG_CPU_R8000)		+= c-r4k.o cex-gen.o tlb-r8k.o -obj-$(CONFIG_CPU_RM7000)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_RM9000)	+= c-r4k.o cex-gen.o tlb-r4k.o  obj-$(CONFIG_CPU_SB1)		+= c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o  obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o tlb-r3k.o -obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o tlb-r4k.o -obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= c-octeon.o cex-oct.o tlb-r4k.o +obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o  obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o -obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o -obj-$(CONFIG_RM7000_CPU_SCACHE)	+= sc-rm7k.o +obj-$(CONFIG_R5000_CPU_SCACHE)	+= sc-r5k.o +obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o  obj-$(CONFIG_MIPS_CPU_SCACHE)	+= sc-mips.o + +obj-$(CONFIG_SYS_SUPPORTS_MICROMIPS) += uasm-micromips.o diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 16c4d256b76..05b1d7cf951 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -5,7 +5,7 @@   *   * Copyright (C) 2005-2007 Cavium Networks   */ -#include <linux/init.h> +#include <linux/export.h>  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/smp.h> @@ -18,16 +18,18 @@  #include <asm/bootinfo.h>  #include <asm/cacheops.h>  #include <asm/cpu-features.h> +#include <asm/cpu-type.h>  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/r4kcache.h> -#include <asm/system.h> +#include <asm/traps.h>  #include <asm/mmu_context.h>  #include <asm/war.h>  #include <asm/octeon/octeon.h>  unsigned long long cache_err_dcache[NR_CPUS]; +EXPORT_SYMBOL_GPL(cache_err_dcache);  /**   * Octeon automatically flushes the dcache on tlb changes, so @@ -81,9 +83,9 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)  	if (vma)  		mask = *mm_cpumask(vma->vm_mm);  	else -		mask = cpu_online_map; -	cpu_clear(cpu, mask); -	for_each_cpu_mask(cpu, mask) +		mask = *cpu_online_mask; +	cpumask_clear_cpu(cpu, &mask); +	for_each_cpu(cpu, &mask)  		octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH);  	preempt_enable(); @@ -104,7 +106,7 @@ static void octeon_flush_icache_all(void)   * Called to flush all memory associated with a memory   * context.   * - * @mm:     Memory context to flush + * @mm:	    Memory context to flush   */  static void octeon_flush_cache_mm(struct mm_struct *mm)  { @@ -135,8 +137,10 @@ static void octeon_flush_cache_sigtramp(unsigned long addr)  {  	struct vm_area_struct *vma; +	down_read(¤t->mm->mmap_sem);  	vma = find_vma(current->mm, addr);  	octeon_flush_icache_all_cores(vma); +	up_read(¤t->mm->mmap_sem);  } @@ -169,20 +173,25 @@ static void octeon_flush_cache_page(struct vm_area_struct *vma,  		octeon_flush_icache_all_cores(vma);  } +static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ +	BUG(); +}  /**   * Probe Octeon's caches   *   */ -static void __cpuinit probe_octeon(void) +static void probe_octeon(void)  {  	unsigned long icache_size;  	unsigned long dcache_size;  	unsigned int config1;  	struct cpuinfo_mips *c = ¤t_cpu_data; +	int cputype = current_cpu_type();  	config1 = read_c0_config1(); -	switch (c->cputype) { +	switch (cputype) {  	case CPU_CAVIUM_OCTEON:  	case CPU_CAVIUM_OCTEON_PLUS:  		c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -193,7 +202,7 @@ static void __cpuinit probe_octeon(void)  			c->icache.sets * c->icache.ways * c->icache.linesz;  		c->icache.waybit = ffs(icache_size / c->icache.ways) - 1;  		c->dcache.linesz = 128; -		if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) +		if (cputype == CPU_CAVIUM_OCTEON_PLUS)  			c->dcache.sets = 2; /* CN5XXX has two Dcache sets */  		else  			c->dcache.sets = 1; /* CN3XXX has one Dcache set */ @@ -218,8 +227,22 @@ static void __cpuinit probe_octeon(void)  		c->options |= MIPS_CPU_PREFETCH;  		break; +	case CPU_CAVIUM_OCTEON3: +		c->icache.linesz = 128; +		c->icache.sets = 16; +		c->icache.ways = 39; +		c->icache.flags |= MIPS_CACHE_VTAG; +		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + +		c->dcache.linesz = 128; +		c->dcache.ways = 32; +		c->dcache.sets = 8; +		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; +		c->options |= MIPS_CPU_PREFETCH; +		break; +  	default: -		panic("Unsupported Cavium Networks CPU type\n"); +		panic("Unsupported Cavium Networks CPU type");  		break;  	} @@ -245,19 +268,18 @@ static void __cpuinit probe_octeon(void)  	}  } +static void  octeon_cache_error_setup(void) +{ +	extern char except_vec2_octeon; +	set_handler(0x100, &except_vec2_octeon, 0x80); +}  /**   * Setup the Octeon cache flush routines   *   */ -void __cpuinit octeon_cache_init(void) +void octeon_cache_init(void)  { -	extern unsigned long ebase; -	extern char except_vec2_octeon; - -	memcpy((void *)(ebase + 0x100), &except_vec2_octeon, 0x80); -	octeon_flush_cache_sigtramp(ebase + 0x100); -  	probe_octeon();  	shm_align_mask = PAGE_SIZE - 1; @@ -273,43 +295,67 @@ void __cpuinit octeon_cache_init(void)  	flush_icache_range		= octeon_flush_icache_range;  	local_flush_icache_range	= local_octeon_flush_icache_range; +	__flush_kernel_vmap_range	= octeon_flush_kernel_vmap_range; +  	build_clear_page();  	build_copy_page(); + +	board_cache_error_setup = octeon_cache_error_setup;  } -/** +/*   * Handle a cache error exception   */ +static RAW_NOTIFIER_HEAD(co_cache_error_chain); -static void  cache_parity_error_octeon(int non_recoverable) +int register_co_cache_error_notifier(struct notifier_block *nb)  { -	unsigned long coreid = cvmx_get_core_num(); -	uint64_t icache_err = read_octeon_c0_icacheerr(); - -	pr_err("Cache error exception:\n"); -	pr_err("cp0_errorepc == %lx\n", read_c0_errorepc()); -	if (icache_err & 1) { -		pr_err("CacheErr (Icache) == %llx\n", -		       (unsigned long long)icache_err); -		write_octeon_c0_icacheerr(0); -	} -	if (cache_err_dcache[coreid] & 1) { -		pr_err("CacheErr (Dcache) == %llx\n", -		       (unsigned long long)cache_err_dcache[coreid]); -		cache_err_dcache[coreid] = 0; -	} +	return raw_notifier_chain_register(&co_cache_error_chain, nb); +} +EXPORT_SYMBOL_GPL(register_co_cache_error_notifier); -	if (non_recoverable) -		panic("Can't handle cache error: nested exception"); +int unregister_co_cache_error_notifier(struct notifier_block *nb) +{ +	return raw_notifier_chain_unregister(&co_cache_error_chain, nb);  } +EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier); -/** +static void co_cache_error_call_notifiers(unsigned long val) +{ +	int rv = raw_notifier_call_chain(&co_cache_error_chain, val, NULL); +	if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { +		u64 dcache_err; +		unsigned long coreid = cvmx_get_core_num(); +		u64 icache_err = read_octeon_c0_icacheerr(); + +		if (val) { +			dcache_err = cache_err_dcache[coreid]; +			cache_err_dcache[coreid] = 0; +		} else { +			dcache_err = read_octeon_c0_dcacheerr(); +		} + +		pr_err("Core%lu: Cache error exception:\n", coreid); +		pr_err("cp0_errorepc == %lx\n", read_c0_errorepc()); +		if (icache_err & 1) { +			pr_err("CacheErr (Icache) == %llx\n", +			       (unsigned long long)icache_err); +			write_octeon_c0_icacheerr(0); +		} +		if (dcache_err & 1) { +			pr_err("CacheErr (Dcache) == %llx\n", +			       (unsigned long long)dcache_err); +		} +	} +} + +/*   * Called when the the exception is recoverable   */  asmlinkage void cache_parity_error_octeon_recoverable(void)  { -	cache_parity_error_octeon(0); +	co_cache_error_call_notifiers(0);  }  /** @@ -318,5 +364,6 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)  asmlinkage void cache_parity_error_octeon_non_recoverable(void)  { -	cache_parity_error_octeon(1); +	co_cache_error_call_notifiers(1); +	panic("Can't handle cache error: nested exception");  } diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c index 54e5f7b9f44..135ec313c1f 100644 --- a/arch/mips/mm/c-r3k.c +++ b/arch/mips/mm/c-r3k.c @@ -1,7 +1,7 @@  /*   * r2300.c: R2000 and R3000 specific mmu/cache code.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   *   * with a lot of changes to make this thing work for R3000s   * Tx39XX R4k style caches added. HK @@ -9,7 +9,6 @@   * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov   * Copyright (C) 2001, 2004, 2007  Maciej W. Rozycki   */ -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/smp.h> @@ -18,7 +17,6 @@  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/mmu_context.h> -#include <asm/system.h>  #include <asm/isadep.h>  #include <asm/io.h>  #include <asm/bootinfo.h> @@ -27,7 +25,7 @@  static unsigned long icache_size, dcache_size;		/* Size in bytes */  static unsigned long icache_lsize, dcache_lsize;	/* Size in bytes */ -unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags) +unsigned long r3k_cache_size(unsigned long ca_flags)  {  	unsigned long flags, status, dummy, size;  	volatile unsigned long *p; @@ -62,7 +60,7 @@ unsigned long __cpuinit r3k_cache_size(unsigned long ca_flags)  	return size * sizeof(*p);  } -unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags) +unsigned long r3k_cache_lsize(unsigned long ca_flags)  {  	unsigned long flags, status, lsize, i;  	volatile unsigned long *p; @@ -91,7 +89,7 @@ unsigned long __cpuinit r3k_cache_lsize(unsigned long ca_flags)  	return lsize * sizeof(*p);  } -static void __cpuinit r3k_probe_cache(void) +static void r3k_probe_cache(void)  {  	dcache_size = r3k_cache_size(ST0_ISC);  	if (dcache_size) @@ -120,7 +118,7 @@ static void r3k_flush_icache_range(unsigned long start, unsigned long end)  	write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);  	for (i = 0; i < size; i += 0x080) { -		asm( 	"sb\t$0, 0x000(%0)\n\t" +		asm(	"sb\t$0, 0x000(%0)\n\t"  			"sb\t$0, 0x004(%0)\n\t"  			"sb\t$0, 0x008(%0)\n\t"  			"sb\t$0, 0x00c(%0)\n\t" @@ -177,7 +175,7 @@ static void r3k_flush_dcache_range(unsigned long start, unsigned long end)  	write_c0_status((ST0_ISC|flags)&~ST0_IEC);  	for (i = 0; i < size; i += 0x080) { -		asm( 	"sb\t$0, 0x000(%0)\n\t" +		asm(	"sb\t$0, 0x000(%0)\n\t"  			"sb\t$0, 0x004(%0)\n\t"  			"sb\t$0, 0x008(%0)\n\t"  			"sb\t$0, 0x00c(%0)\n\t" @@ -286,19 +284,24 @@ static void r3k_flush_cache_sigtramp(unsigned long addr)  	write_c0_status(flags&~ST0_IEC);  	/* Fill the TLB to avoid an exception with caches isolated. */ -	asm( 	"lw\t$0, 0x000(%0)\n\t" +	asm(	"lw\t$0, 0x000(%0)\n\t"  		"lw\t$0, 0x004(%0)\n\t"  		: : "r" (addr) );  	write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); -	asm( 	"sb\t$0, 0x000(%0)\n\t" +	asm(	"sb\t$0, 0x000(%0)\n\t"  		"sb\t$0, 0x004(%0)\n\t"  		: : "r" (addr) );  	write_c0_status(flags);  } +static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ +	BUG(); +} +  static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)  {  	/* Catch bad driver code */ @@ -308,7 +311,7 @@ static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)  	r3k_flush_dcache_range(start, start + size);  } -void __cpuinit r3k_cache_init(void) +void r3k_cache_init(void)  {  	extern void build_clear_page(void);  	extern void build_copy_page(void); @@ -323,6 +326,8 @@ void __cpuinit r3k_cache_init(void)  	flush_icache_range = r3k_flush_icache_range;  	local_flush_icache_range = r3k_flush_icache_range; +	__flush_kernel_vmap_range = r3k_flush_kernel_vmap_range; +  	flush_cache_sigtramp = r3k_flush_cache_sigtramp;  	local_flush_data_cache_page = local_r3k_flush_data_cache_page;  	flush_data_cache_page = r3k_flush_data_cache_page; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index b4923a75cb4..f2e8302fa70 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -3,15 +3,17 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org)   * Copyright (C) 1999, 2000 Silicon Graphics, Inc.   */ +#include <linux/cpu_pm.h>  #include <linux/hardirq.h>  #include <linux/init.h>  #include <linux/highmem.h>  #include <linux/kernel.h>  #include <linux/linkage.h> +#include <linux/preempt.h>  #include <linux/sched.h>  #include <linux/smp.h>  #include <linux/mm.h> @@ -24,16 +26,17 @@  #include <asm/cacheops.h>  #include <asm/cpu.h>  #include <asm/cpu-features.h> +#include <asm/cpu-type.h>  #include <asm/io.h>  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/r4kcache.h>  #include <asm/sections.h> -#include <asm/system.h>  #include <asm/mmu_context.h>  #include <asm/war.h>  #include <asm/cacheflush.h> /* for run_uncached() */ - +#include <asm/traps.h> +#include <asm/dma-coherence.h>  /*   * Special Variant of smp_call_function for use by cache functions: @@ -48,14 +51,14 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)  {  	preempt_disable(); -#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC) +#ifndef CONFIG_MIPS_MT_SMP  	smp_call_function(func, info, 1);  #endif  	func(info);  	preempt_enable();  } -#if defined(CONFIG_MIPS_CMP) +#if defined(CONFIG_MIPS_CMP) || defined(CONFIG_MIPS_CPS)  #define cpu_has_safe_index_cacheops 0  #else  #define cpu_has_safe_index_cacheops 1 @@ -103,27 +106,64 @@ static inline void r4k_blast_dcache_page_dc32(unsigned long addr)  static inline void r4k_blast_dcache_page_dc64(unsigned long addr)  { -	R4600_HIT_CACHEOP_WAR_IMPL;  	blast_dcache64_page(addr);  } -static void __cpuinit r4k_blast_dcache_page_setup(void) +static inline void r4k_blast_dcache_page_dc128(unsigned long addr) +{ +	blast_dcache128_page(addr); +} + +static void r4k_blast_dcache_page_setup(void)  {  	unsigned long  dc_lsize = cpu_dcache_line_size(); -	if (dc_lsize == 0) +	switch (dc_lsize) { +	case 0:  		r4k_blast_dcache_page = (void *)cache_noop; -	else if (dc_lsize == 16) +		break; +	case 16:  		r4k_blast_dcache_page = blast_dcache16_page; -	else if (dc_lsize == 32) +		break; +	case 32:  		r4k_blast_dcache_page = r4k_blast_dcache_page_dc32; -	else if (dc_lsize == 64) +		break; +	case 64:  		r4k_blast_dcache_page = r4k_blast_dcache_page_dc64; +		break; +	case 128: +		r4k_blast_dcache_page = r4k_blast_dcache_page_dc128; +		break; +	default: +		break; +	} +} + +#ifndef CONFIG_EVA +#define r4k_blast_dcache_user_page  r4k_blast_dcache_page +#else + +static void (*r4k_blast_dcache_user_page)(unsigned long addr); + +static void r4k_blast_dcache_user_page_setup(void) +{ +	unsigned long  dc_lsize = cpu_dcache_line_size(); + +	if (dc_lsize == 0) +		r4k_blast_dcache_user_page = (void *)cache_noop; +	else if (dc_lsize == 16) +		r4k_blast_dcache_user_page = blast_dcache16_user_page; +	else if (dc_lsize == 32) +		r4k_blast_dcache_user_page = blast_dcache32_user_page; +	else if (dc_lsize == 64) +		r4k_blast_dcache_user_page = blast_dcache64_user_page;  } +#endif +  static void (* r4k_blast_dcache_page_indexed)(unsigned long addr); -static void __cpuinit r4k_blast_dcache_page_indexed_setup(void) +static void r4k_blast_dcache_page_indexed_setup(void)  {  	unsigned long dc_lsize = cpu_dcache_line_size(); @@ -135,11 +175,14 @@ static void __cpuinit r4k_blast_dcache_page_indexed_setup(void)  		r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed;  	else if (dc_lsize == 64)  		r4k_blast_dcache_page_indexed = blast_dcache64_page_indexed; +	else if (dc_lsize == 128) +		r4k_blast_dcache_page_indexed = blast_dcache128_page_indexed;  } -static void (* r4k_blast_dcache)(void); +void (* r4k_blast_dcache)(void); +EXPORT_SYMBOL(r4k_blast_dcache); -static void __cpuinit r4k_blast_dcache_setup(void) +static void r4k_blast_dcache_setup(void)  {  	unsigned long dc_lsize = cpu_dcache_line_size(); @@ -151,6 +194,8 @@ static void __cpuinit r4k_blast_dcache_setup(void)  		r4k_blast_dcache = blast_dcache32;  	else if (dc_lsize == 64)  		r4k_blast_dcache = blast_dcache64; +	else if (dc_lsize == 128) +		r4k_blast_dcache = blast_dcache128;  }  /* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */ @@ -161,7 +206,7 @@ static void __cpuinit r4k_blast_dcache_setup(void)  		"1:\n\t" \  		)  #define CACHE32_UNROLL32_ALIGN	JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */ -#define CACHE32_UNROLL32_ALIGN2	JUMP_TO_ALIGN(11) +#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11)  static inline void blast_r4600_v1_icache32(void)  { @@ -178,7 +223,7 @@ static inline void tx49_blast_icache32(void)  	unsigned long end = start + current_cpu_data.icache.waysize;  	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;  	unsigned long ws_end = current_cpu_data.icache.ways << -	                       current_cpu_data.icache.waybit; +			       current_cpu_data.icache.waybit;  	unsigned long ws, addr;  	CACHE32_UNROLL32_ALIGN2; @@ -209,7 +254,7 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page)  	unsigned long end = start + PAGE_SIZE;  	unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit;  	unsigned long ws_end = current_cpu_data.icache.ways << -	                       current_cpu_data.icache.waybit; +			       current_cpu_data.icache.waybit;  	unsigned long ws, addr;  	CACHE32_UNROLL32_ALIGN2; @@ -226,7 +271,7 @@ static inline void tx49_blast_icache32_page_indexed(unsigned long page)  static void (* r4k_blast_icache_page)(unsigned long addr); -static void __cpuinit r4k_blast_icache_page_setup(void) +static void r4k_blast_icache_page_setup(void)  {  	unsigned long ic_lsize = cpu_icache_line_size(); @@ -234,16 +279,41 @@ static void __cpuinit r4k_blast_icache_page_setup(void)  		r4k_blast_icache_page = (void *)cache_noop;  	else if (ic_lsize == 16)  		r4k_blast_icache_page = blast_icache16_page; +	else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2) +		r4k_blast_icache_page = loongson2_blast_icache32_page;  	else if (ic_lsize == 32)  		r4k_blast_icache_page = blast_icache32_page;  	else if (ic_lsize == 64)  		r4k_blast_icache_page = blast_icache64_page; +	else if (ic_lsize == 128) +		r4k_blast_icache_page = blast_icache128_page; +} + +#ifndef CONFIG_EVA +#define r4k_blast_icache_user_page  r4k_blast_icache_page +#else + +static void (*r4k_blast_icache_user_page)(unsigned long addr); + +static void __cpuinit r4k_blast_icache_user_page_setup(void) +{ +	unsigned long ic_lsize = cpu_icache_line_size(); + +	if (ic_lsize == 0) +		r4k_blast_icache_user_page = (void *)cache_noop; +	else if (ic_lsize == 16) +		r4k_blast_icache_user_page = blast_icache16_user_page; +	else if (ic_lsize == 32) +		r4k_blast_icache_user_page = blast_icache32_user_page; +	else if (ic_lsize == 64) +		r4k_blast_icache_user_page = blast_icache64_user_page;  } +#endif  static void (* r4k_blast_icache_page_indexed)(unsigned long addr); -static void __cpuinit r4k_blast_icache_page_indexed_setup(void) +static void r4k_blast_icache_page_indexed_setup(void)  {  	unsigned long ic_lsize = cpu_icache_line_size(); @@ -258,6 +328,9 @@ static void __cpuinit r4k_blast_icache_page_indexed_setup(void)  		else if (TX49XX_ICACHE_INDEX_INV_WAR)  			r4k_blast_icache_page_indexed =  				tx49_blast_icache32_page_indexed; +		else if (current_cpu_type() == CPU_LOONGSON2) +			r4k_blast_icache_page_indexed = +				loongson2_blast_icache32_page_indexed;  		else  			r4k_blast_icache_page_indexed =  				blast_icache32_page_indexed; @@ -265,9 +338,10 @@ static void __cpuinit r4k_blast_icache_page_indexed_setup(void)  		r4k_blast_icache_page_indexed = blast_icache64_page_indexed;  } -static void (* r4k_blast_icache)(void); +void (* r4k_blast_icache)(void); +EXPORT_SYMBOL(r4k_blast_icache); -static void __cpuinit r4k_blast_icache_setup(void) +static void r4k_blast_icache_setup(void)  {  	unsigned long ic_lsize = cpu_icache_line_size(); @@ -280,15 +354,19 @@ static void __cpuinit r4k_blast_icache_setup(void)  			r4k_blast_icache = blast_r4600_v1_icache32;  		else if (TX49XX_ICACHE_INDEX_INV_WAR)  			r4k_blast_icache = tx49_blast_icache32; +		else if (current_cpu_type() == CPU_LOONGSON2) +			r4k_blast_icache = loongson2_blast_icache32;  		else  			r4k_blast_icache = blast_icache32;  	} else if (ic_lsize == 64)  		r4k_blast_icache = blast_icache64; +	else if (ic_lsize == 128) +		r4k_blast_icache = blast_icache128;  }  static void (* r4k_blast_scache_page)(unsigned long addr); -static void __cpuinit r4k_blast_scache_page_setup(void) +static void r4k_blast_scache_page_setup(void)  {  	unsigned long sc_lsize = cpu_scache_line_size(); @@ -306,7 +384,7 @@ static void __cpuinit r4k_blast_scache_page_setup(void)  static void (* r4k_blast_scache_page_indexed)(unsigned long addr); -static void __cpuinit r4k_blast_scache_page_indexed_setup(void) +static void r4k_blast_scache_page_indexed_setup(void)  {  	unsigned long sc_lsize = cpu_scache_line_size(); @@ -324,7 +402,7 @@ static void __cpuinit r4k_blast_scache_page_indexed_setup(void)  static void (* r4k_blast_scache)(void); -static void __cpuinit r4k_blast_scache_setup(void) +static void r4k_blast_scache_setup(void)  {  	unsigned long sc_lsize = cpu_scache_line_size(); @@ -342,14 +420,9 @@ static void __cpuinit r4k_blast_scache_setup(void)  static inline void local_r4k___flush_cache_all(void * args)  { -#if defined(CONFIG_CPU_LOONGSON2) -	r4k_blast_scache(); -	return; -#endif -	r4k_blast_dcache(); -	r4k_blast_icache(); -  	switch (current_cpu_type()) { +	case CPU_LOONGSON2: +	case CPU_LOONGSON3:  	case CPU_R4000SC:  	case CPU_R4000MC:  	case CPU_R4400SC: @@ -357,7 +430,18 @@ static inline void local_r4k___flush_cache_all(void * args)  	case CPU_R10000:  	case CPU_R12000:  	case CPU_R14000: +		/* +		 * These caches are inclusive caches, that is, if something +		 * is not cached in the S-cache, we know it also won't be +		 * in one of the primary caches. +		 */  		r4k_blast_scache(); +		break; + +	default: +		r4k_blast_dcache(); +		r4k_blast_icache(); +		break;  	}  } @@ -368,7 +452,7 @@ static void r4k___flush_cache_all(void)  static inline int has_valid_asid(const struct mm_struct *mm)  { -#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC) +#ifdef CONFIG_MIPS_MT_SMP  	int i;  	for_each_online_cpu(i) @@ -498,12 +582,13 @@ static inline void local_r4k_flush_cache_page(void *args)  		if (map_coherent)  			vaddr = kmap_coherent(page, addr);  		else -			vaddr = kmap_atomic(page, KM_USER0); +			vaddr = kmap_atomic(page);  		addr = (unsigned long)vaddr;  	}  	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { -		r4k_blast_dcache_page(addr); +		vaddr ? r4k_blast_dcache_page(addr) : +			r4k_blast_dcache_user_page(addr);  		if (exec && !cpu_icache_snoops_remote_store)  			r4k_blast_scache_page(addr);  	} @@ -514,14 +599,15 @@ static inline void local_r4k_flush_cache_page(void *args)  			if (cpu_context(cpu, mm) != 0)  				drop_mmu_context(mm, cpu);  		} else -			r4k_blast_icache_page(addr); +			vaddr ? r4k_blast_icache_page(addr) : +				r4k_blast_icache_user_page(addr);  	}  	if (vaddr) {  		if (map_coherent)  			kunmap_coherent();  		else -			kunmap_atomic(vaddr, KM_USER0); +			kunmap_atomic(vaddr);  	}  } @@ -568,8 +654,28 @@ static inline void local_r4k_flush_icache_range(unsigned long start, unsigned lo  	if (end - start > icache_size)  		r4k_blast_icache(); -	else -		protected_blast_icache_range(start, end); +	else { +		switch (boot_cpu_type()) { +		case CPU_LOONGSON2: +			protected_loongson2_blast_icache_range(start, end); +			break; + +		default: +			protected_blast_icache_range(start, end); +			break; +		} +	} +#ifdef CONFIG_EVA +	/* +	 * Due to all possible segment mappings, there might cache aliases +	 * caused by the bootloader being in non-EVA mode, and the CPU switching +	 * to EVA during early kernel init. It's best to flush the scache +	 * to avoid having secondary cores fetching stale data and lead to +	 * kernel crashes. +	 */ +	bc_wback_inv(start, (end - start)); +	__sync(); +#endif  }  static inline void local_r4k_flush_icache_range_ipi(void *args) @@ -592,18 +698,21 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end)  	instruction_hazard();  } -#ifdef CONFIG_DMA_NONCOHERENT +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)  static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)  {  	/* Catch bad driver code */  	BUG_ON(size == 0); +	preempt_disable();  	if (cpu_has_inclusive_pcaches) {  		if (size >= scache_size)  			r4k_blast_scache();  		else  			blast_scache_range(addr, addr + size); +		preempt_enable(); +		__sync();  		return;  	} @@ -618,8 +727,10 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)  		R4600_HIT_CACHEOP_WAR_IMPL;  		blast_dcache_range(addr, addr + size);  	} +	preempt_enable();  	bc_wback_inv(addr, size); +	__sync();  }  static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) @@ -627,44 +738,38 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)  	/* Catch bad driver code */  	BUG_ON(size == 0); +	preempt_disable();  	if (cpu_has_inclusive_pcaches) {  		if (size >= scache_size)  			r4k_blast_scache();  		else { -			unsigned long lsize = cpu_scache_line_size(); -			unsigned long almask = ~(lsize - 1); -  			/*  			 * There is no clearly documented alignment requirement  			 * for the cache instruction on MIPS processors and  			 * some processors, among them the RM5200 and RM7000  			 * QED processors will throw an address error for cache -			 * hit ops with insufficient alignment.  Solved by +			 * hit ops with insufficient alignment.	 Solved by  			 * aligning the address to cache line size.  			 */ -			cache_op(Hit_Writeback_Inv_SD, addr & almask); -			cache_op(Hit_Writeback_Inv_SD, -				 (addr + size - 1) & almask);  			blast_inv_scache_range(addr, addr + size);  		} +		preempt_enable(); +		__sync();  		return;  	}  	if (cpu_has_safe_index_cacheops && size >= dcache_size) {  		r4k_blast_dcache();  	} else { -		unsigned long lsize = cpu_dcache_line_size(); -		unsigned long almask = ~(lsize - 1); -  		R4600_HIT_CACHEOP_WAR_IMPL; -		cache_op(Hit_Writeback_Inv_D, addr & almask); -		cache_op(Hit_Writeback_Inv_D, (addr + size - 1)  & almask);  		blast_inv_dcache_range(addr, addr + size);  	} +	preempt_enable();  	bc_inv(addr, size); +	__sync();  } -#endif /* CONFIG_DMA_NONCOHERENT */ +#endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */  /*   * While we're protected against bad userland addresses we don't care @@ -718,6 +823,39 @@ static void r4k_flush_icache_all(void)  		r4k_blast_icache();  } +struct flush_kernel_vmap_range_args { +	unsigned long	vaddr; +	int		size; +}; + +static inline void local_r4k_flush_kernel_vmap_range(void *args) +{ +	struct flush_kernel_vmap_range_args *vmra = args; +	unsigned long vaddr = vmra->vaddr; +	int size = vmra->size; + +	/* +	 * Aliases only affect the primary caches so don't bother with +	 * S-caches or T-caches. +	 */ +	if (cpu_has_safe_index_cacheops && size >= dcache_size) +		r4k_blast_dcache(); +	else { +		R4600_HIT_CACHEOP_WAR_IMPL; +		blast_dcache_range(vaddr, vaddr + size); +	} +} + +static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ +	struct flush_kernel_vmap_range_args args; + +	args.vaddr = (unsigned long) vaddr; +	args.size = size; + +	r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args); +} +  static inline void rm7k_erratum31(void)  {  	const unsigned long ic_lsize = 32; @@ -750,11 +888,40 @@ static inline void rm7k_erratum31(void)  	}  } -static char *way_string[] __cpuinitdata = { NULL, "direct mapped", "2-way", +static inline void alias_74k_erratum(struct cpuinfo_mips *c) +{ +	unsigned int imp = c->processor_id & PRID_IMP_MASK; +	unsigned int rev = c->processor_id & PRID_REV_MASK; + +	/* +	 * Early versions of the 74K do not update the cache tags on a +	 * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG +	 * aliases. In this case it is better to treat the cache as always +	 * having aliases. +	 */ +	switch (imp) { +	case PRID_IMP_74K: +		if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) +			c->dcache.flags |= MIPS_CACHE_VTAG; +		if (rev == PRID_REV_ENCODE_332(2, 4, 0)) +			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); +		break; +	case PRID_IMP_1074K: +		if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { +			c->dcache.flags |= MIPS_CACHE_VTAG; +			write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); +		} +		break; +	default: +		BUG(); +	} +} + +static char *way_string[] = { NULL, "direct mapped", "2-way",  	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way"  }; -static void __cpuinit probe_pcache(void) +static void probe_pcache(void)  {  	struct cpuinfo_mips *c = ¤t_cpu_data;  	unsigned int config = read_c0_config(); @@ -762,7 +929,7 @@ static void __cpuinit probe_pcache(void)  	unsigned long config1;  	unsigned int lsize; -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_R4600:			/* QED style two way caches? */  	case CPU_R4700:  	case CPU_R5000: @@ -820,7 +987,7 @@ static void __cpuinit probe_pcache(void)  		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));  		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);  		c->icache.ways = 1; -		c->icache.waybit = 0; 	/* doesn't matter */ +		c->icache.waybit = 0;	/* doesn't matter */  		dcache_size = 1 << (12 + ((config & CONF_DC) >> 6));  		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); @@ -879,7 +1046,7 @@ static void __cpuinit probe_pcache(void)  		icache_size = 1 << (10 + ((config & CONF_IC) >> 9));  		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);  		c->icache.ways = 1; -		c->icache.waybit = 0; 	/* doesn't matter */ +		c->icache.waybit = 0;	/* doesn't matter */  		dcache_size = 1 << (10 + ((config & CONF_DC) >> 6));  		c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); @@ -892,7 +1059,6 @@ static void __cpuinit probe_pcache(void)  	case CPU_RM7000:  		rm7k_erratum31(); -	case CPU_RM9000:  		icache_size = 1 << (12 + ((config & CONF_IC) >> 9));  		c->icache.linesz = 16 << ((config & CONF_IB) >> 5);  		c->icache.ways = 4; @@ -903,9 +1069,7 @@ static void __cpuinit probe_pcache(void)  		c->dcache.ways = 4;  		c->dcache.waybit = __ffs(dcache_size / c->dcache.ways); -#if !defined(CONFIG_SMP) || !defined(RM9000_CDEX_SMP_WAR)  		c->options |= MIPS_CPU_CACHE_CDEX_P; -#endif  		c->options |= MIPS_CPU_PREFETCH;  		break; @@ -927,6 +1091,48 @@ static void __cpuinit probe_pcache(void)  		c->dcache.waybit = 0;  		break; +	case CPU_LOONGSON3: +		config1 = read_c0_config1(); +		lsize = (config1 >> 19) & 7; +		if (lsize) +			c->icache.linesz = 2 << lsize; +		else +			c->icache.linesz = 0; +		c->icache.sets = 64 << ((config1 >> 22) & 7); +		c->icache.ways = 1 + ((config1 >> 16) & 7); +		icache_size = c->icache.sets * +					  c->icache.ways * +					  c->icache.linesz; +		c->icache.waybit = 0; + +		lsize = (config1 >> 10) & 7; +		if (lsize) +			c->dcache.linesz = 2 << lsize; +		else +			c->dcache.linesz = 0; +		c->dcache.sets = 64 << ((config1 >> 13) & 7); +		c->dcache.ways = 1 + ((config1 >> 7) & 7); +		dcache_size = c->dcache.sets * +					  c->dcache.ways * +					  c->dcache.linesz; +		c->dcache.waybit = 0; +		break; + +	case CPU_CAVIUM_OCTEON3: +		/* For now lie about the number of ways. */ +		c->icache.linesz = 128; +		c->icache.sets = 16; +		c->icache.ways = 8; +		c->icache.flags |= MIPS_CACHE_VTAG; +		icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + +		c->dcache.linesz = 128; +		c->dcache.ways = 8; +		c->dcache.sets = 8; +		dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; +		c->options |= MIPS_CPU_PREFETCH; +		break; +  	default:  		if (!(config & MIPS_CONF_M))  			panic("Don't know how to probe P-caches on this cpu."); @@ -937,16 +1143,20 @@ static void __cpuinit probe_pcache(void)  		 */  		config1 = read_c0_config1(); -		if ((lsize = ((config1 >> 19) & 7))) -			c->icache.linesz = 2 << lsize; -		else -			c->icache.linesz = lsize; -		c->icache.sets = 64 << ((config1 >> 22) & 7); +		lsize = (config1 >> 19) & 7; + +		/* IL == 7 is reserved */ +		if (lsize == 7) +			panic("Invalid icache line size"); + +		c->icache.linesz = lsize ? 2 << lsize : 0; + +		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);  		c->icache.ways = 1 + ((config1 >> 16) & 7);  		icache_size = c->icache.sets * -		              c->icache.ways * -		              c->icache.linesz; +			      c->icache.ways * +			      c->icache.linesz;  		c->icache.waybit = __ffs(icache_size/c->icache.ways);  		if (config & 0x8)		/* VI bit */ @@ -957,16 +1167,20 @@ static void __cpuinit probe_pcache(void)  		 */  		c->dcache.flags = 0; -		if ((lsize = ((config1 >> 10) & 7))) -			c->dcache.linesz = 2 << lsize; -		else -			c->dcache.linesz= lsize; -		c->dcache.sets = 64 << ((config1 >> 13) & 7); +		lsize = (config1 >> 10) & 7; + +		/* DL == 7 is reserved */ +		if (lsize == 7) +			panic("Invalid dcache line size"); + +		c->dcache.linesz = lsize ? 2 << lsize : 0; + +		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);  		c->dcache.ways = 1 + ((config1 >> 7) & 7);  		dcache_size = c->dcache.sets * -		              c->dcache.ways * -		              c->dcache.linesz; +			      c->dcache.ways * +			      c->dcache.linesz;  		c->dcache.waybit = __ffs(dcache_size/c->dcache.ways);  		c->options |= MIPS_CPU_PREFETCH; @@ -975,13 +1189,14 @@ static void __cpuinit probe_pcache(void)  	/*  	 * Processor configuration sanity check for the R4000SC erratum -	 * #5.  With page sizes larger than 32kB there is no possibility +	 * #5.	With page sizes larger than 32kB there is no possibility  	 * to get a VCE exception anymore so we don't care about this  	 * misconfiguration.  The case is rather theoretical anyway;  	 * presumably no vendor is shipping his hardware in the "bad"  	 * configuration.  	 */ -	if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && +	if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && +	    (prid & PRID_REV_MASK) < PRID_REV_R4400 &&  	    !(config & CONF_SC) && c->icache.linesz != 16 &&  	    PAGE_SIZE <= 0x8000)  		panic("Improper R4000SC processor configuration detected"); @@ -1001,11 +1216,12 @@ static void __cpuinit probe_pcache(void)  	 * normally they'd suffer from aliases but magic in the hardware deals  	 * with that for us so we don't need to take care ourselves.  	 */ -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_20KC:  	case CPU_25KF:  	case CPU_SB1:  	case CPU_SB1A: +	case CPU_XLR:  		c->dcache.flags |= MIPS_CACHE_PINDEX;  		break; @@ -1014,13 +1230,27 @@ static void __cpuinit probe_pcache(void)  	case CPU_R14000:  		break; +	case CPU_M14KC: +	case CPU_M14KEC:  	case CPU_24K:  	case CPU_34K:  	case CPU_74K:  	case CPU_1004K: -		if ((read_c0_config7() & (1 << 16))) { -			/* effectively physically indexed dcache, -			   thus no virtual aliases. */ +	case CPU_1074K: +	case CPU_INTERAPTIV: +	case CPU_P5600: +	case CPU_PROAPTIV: +	case CPU_M5150: +		if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) +			alias_74k_erratum(c); +		if (!(read_c0_config7() & MIPS_CONF7_IAR) && +		    (c->icache.waysize > PAGE_SIZE)) +			c->icache.flags |= MIPS_CACHE_ALIASES; +		if (read_c0_config7() & MIPS_CONF7_AR) { +			/* +			 * Effectively physically indexed dcache, +			 * thus no virtual aliases. +			*/  			c->dcache.flags |= MIPS_CACHE_PINDEX;  			break;  		} @@ -1029,7 +1259,7 @@ static void __cpuinit probe_pcache(void)  			c->dcache.flags |= MIPS_CACHE_ALIASES;  	} -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_20KC:  		/*  		 * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1041,15 +1271,14 @@ static void __cpuinit probe_pcache(void)  	case CPU_ALCHEMY:  		c->icache.flags |= MIPS_CACHE_IC_F_DC;  		break; -	} -#ifdef  CONFIG_CPU_LOONGSON2 -	/* -	 * LOONGSON2 has 4 way icache, but when using indexed cache op, -	 * one op will act on all 4 ways -	 */ -	c->icache.ways = 1; -#endif +	case CPU_LOONGSON2: +		/* +		 * LOONGSON2 has 4 way icache, but when using indexed cache op, +		 * one op will act on all 4 ways +		 */ +		c->icache.ways = 1; +	}  	printk("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n",  	       icache_size >> 10, @@ -1070,12 +1299,11 @@ static void __cpuinit probe_pcache(void)   * executes in KSEG1 space or else you will crash and burn badly.  You have   * been warned.   */ -static int __cpuinit probe_scache(void) +static int probe_scache(void)  {  	unsigned long flags, addr, begin, end, pow2;  	unsigned int config = read_c0_config();  	struct cpuinfo_mips *c = ¤t_cpu_data; -	int tmp;  	if (config & CONF_SC)  		return 0; @@ -1108,7 +1336,6 @@ static int __cpuinit probe_scache(void)  	/* Now search for the wrap around point. */  	pow2 = (128 * 1024); -	tmp = 0;  	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {  		cache_op(Index_Load_Tag_SD, addr);  		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ @@ -1127,7 +1354,6 @@ static int __cpuinit probe_scache(void)  	return 1;  } -#if defined(CONFIG_CPU_LOONGSON2)  static void __init loongson2_sc_init(void)  {  	struct cpuinfo_mips *c = ¤t_cpu_data; @@ -1143,13 +1369,39 @@ static void __init loongson2_sc_init(void)  	c->options |= MIPS_CPU_INCLUSIVE_CACHES;  } -#endif + +static void __init loongson3_sc_init(void) +{ +	struct cpuinfo_mips *c = ¤t_cpu_data; +	unsigned int config2, lsize; + +	config2 = read_c0_config2(); +	lsize = (config2 >> 4) & 15; +	if (lsize) +		c->scache.linesz = 2 << lsize; +	else +		c->scache.linesz = 0; +	c->scache.sets = 64 << ((config2 >> 8) & 15); +	c->scache.ways = 1 + (config2 & 15); + +	scache_size = c->scache.sets * +				  c->scache.ways * +				  c->scache.linesz; +	/* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */ +	scache_size *= 4; +	c->scache.waybit = 0; +	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", +	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); +	if (scache_size) +		c->options |= MIPS_CPU_INCLUSIVE_CACHES; +	return; +}  extern int r5k_sc_init(void);  extern int rm7k_sc_init(void);  extern int mips_sc_init(void); -static void __cpuinit setup_scache(void) +static void setup_scache(void)  {  	struct cpuinfo_mips *c = ¤t_cpu_data;  	unsigned int config = read_c0_config(); @@ -1160,7 +1412,7 @@ static void __cpuinit setup_scache(void)  	 * processors don't have a S-cache that would be relevant to the  	 * Linux memory management.  	 */ -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_R4000SC:  	case CPU_R4000MC:  	case CPU_R4400SC: @@ -1185,26 +1437,30 @@ static void __cpuinit setup_scache(void)  #ifdef CONFIG_R5000_CPU_SCACHE  		r5k_sc_init();  #endif -                return; +		return;  	case CPU_RM7000: -	case CPU_RM9000:  #ifdef CONFIG_RM7000_CPU_SCACHE  		rm7k_sc_init();  #endif  		return; -#if defined(CONFIG_CPU_LOONGSON2)  	case CPU_LOONGSON2:  		loongson2_sc_init();  		return; -#endif + +	case CPU_LOONGSON3: +		loongson3_sc_init(); +		return; + +	case CPU_CAVIUM_OCTEON3: +	case CPU_XLP: +		/* don't need to worry about L2, fully coherent */ +		return;  	default: -		if (c->isa_level == MIPS_CPU_ISA_M32R1 || -		    c->isa_level == MIPS_CPU_ISA_M32R2 || -		    c->isa_level == MIPS_CPU_ISA_M64R1 || -		    c->isa_level == MIPS_CPU_ISA_M64R2) { +		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | +				    MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) {  #ifdef CONFIG_MIPS_CPU_SCACHE  			if (mips_sc_init ()) {  				scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; @@ -1282,18 +1538,18 @@ static void nxp_pr4450_fixup_config(void)  	NXP_BARRIER();  } -static int __cpuinitdata cca = -1; +static int cca = -1;  static int __init cca_setup(char *str)  {  	get_option(&str, &cca); -	return 1; +	return 0;  } -__setup("cca=", cca_setup); +early_param("cca", cca_setup); -static void __cpuinit coherency_setup(void) +static void coherency_setup(void)  {  	if (cca < 0 || cca > 7)  		cca = read_c0_config() & CONF_CM_CMASK; @@ -1333,29 +1589,12 @@ static void __cpuinit coherency_setup(void)  	}  } -#if defined(CONFIG_DMA_NONCOHERENT) - -static int __cpuinitdata coherentio; - -static int __init setcoherentio(char *str) +static void r4k_cache_error_setup(void)  { -	coherentio = 1; - -	return 1; -} - -__setup("coherentio", setcoherentio); -#endif - -void __cpuinit r4k_cache_init(void) -{ -	extern void build_clear_page(void); -	extern void build_copy_page(void);  	extern char __weak except_vec2_generic;  	extern char __weak except_vec2_sb1; -	struct cpuinfo_mips *c = ¤t_cpu_data; -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_SB1:  	case CPU_SB1A:  		set_uncached_handler(0x100, &except_vec2_sb1, 0x80); @@ -1365,6 +1604,13 @@ void __cpuinit r4k_cache_init(void)  		set_uncached_handler(0x100, &except_vec2_generic, 0x80);  		break;  	} +} + +void r4k_cache_init(void) +{ +	extern void build_clear_page(void); +	extern void build_copy_page(void); +	struct cpuinfo_mips *c = ¤t_cpu_data;  	probe_pcache();  	setup_scache(); @@ -1378,6 +1624,10 @@ void __cpuinit r4k_cache_init(void)  	r4k_blast_scache_page_setup();  	r4k_blast_scache_page_indexed_setup();  	r4k_blast_scache_setup(); +#ifdef CONFIG_EVA +	r4k_blast_dcache_user_page_setup(); +	r4k_blast_icache_user_page_setup(); +#endif  	/*  	 * Some MIPS32 and MIPS64 processors have physically indexed caches. @@ -1400,6 +1650,8 @@ void __cpuinit r4k_cache_init(void)  	flush_cache_page	= r4k_flush_cache_page;  	flush_cache_range	= r4k_flush_cache_range; +	__flush_kernel_vmap_range = r4k_flush_kernel_vmap_range; +  	flush_cache_sigtramp	= r4k_flush_cache_sigtramp;  	flush_icache_all	= r4k_flush_icache_all;  	local_flush_data_cache_page	= local_r4k_flush_data_cache_page; @@ -1407,7 +1659,7 @@ void __cpuinit r4k_cache_init(void)  	flush_icache_range	= r4k_flush_icache_range;  	local_flush_icache_range	= local_r4k_flush_icache_range; -#if defined(CONFIG_DMA_NONCOHERENT) +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)  	if (coherentio) {  		_dma_cache_wback_inv	= (void *)cache_noop;  		_dma_cache_wback	= (void *)cache_noop; @@ -1421,8 +1673,37 @@ void __cpuinit r4k_cache_init(void)  	build_clear_page();  	build_copy_page(); -#if !defined(CONFIG_MIPS_CMP) + +	/* +	 * We want to run CMP kernels on core with and without coherent +	 * caches. Therefore, do not use CONFIG_MIPS_CMP to decide whether +	 * or not to flush caches. +	 */  	local_r4k___flush_cache_all(NULL); -#endif +  	coherency_setup(); +	board_cache_error_setup = r4k_cache_error_setup; +} + +static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd, +			       void *v) +{ +	switch (cmd) { +	case CPU_PM_ENTER_FAILED: +	case CPU_PM_EXIT: +		coherency_setup(); +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block r4k_cache_pm_notifier_block = { +	.notifier_call = r4k_cache_pm_notifier, +}; + +int __init r4k_cache_init_pm(void) +{ +	return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block);  } +arch_initcall(r4k_cache_init_pm); diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index 6515b441871..8d909dbbf37 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -1,7 +1,7 @@  /*   * r2300.c: R2000 and R3000 specific mmu/cache code.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   *   * with a lot of changes to make this thing work for R3000s   * Tx39XX R4k style caches added. HK @@ -18,7 +18,6 @@  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/mmu_context.h> -#include <asm/system.h>  #include <asm/isadep.h>  #include <asm/io.h>  #include <asm/bootinfo.h> @@ -34,9 +33,9 @@ extern int r3k_have_wired_reg;	/* in r3k-tlb.c */  /* This sequence is required to ensure icache is disabled immediately */  #define TX39_STOP_STREAMING() \  __asm__ __volatile__( \ -	".set    push\n\t" \ -	".set    noreorder\n\t" \ -	"b       1f\n\t" \ +	".set	 push\n\t" \ +	".set	 noreorder\n\t" \ +	"b	 1f\n\t" \  	"nop\n\t" \  	"1:\n\t" \  	".set pop" \ @@ -253,6 +252,11 @@ static void tx39_flush_icache_range(unsigned long start, unsigned long end)  	}  } +static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ +	BUG(); +} +  static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size)  {  	unsigned long end; @@ -340,7 +344,7 @@ static __init void tx39_probe_cache(void)  	}  } -void __cpuinit tx39_cache_init(void) +void tx39_cache_init(void)  {  	extern void build_clear_page(void);  	extern void build_copy_page(void); @@ -357,7 +361,7 @@ void __cpuinit tx39_cache_init(void)  		/* TX39/H core (writethru direct-map cache) */  		__flush_cache_vmap	= tx39__flush_cache_vmap;  		__flush_cache_vunmap	= tx39__flush_cache_vunmap; -		flush_cache_all	= tx39h_flush_icache_all; +		flush_cache_all = tx39h_flush_icache_all;  		__flush_cache_all	= tx39h_flush_icache_all;  		flush_cache_mm		= (void *) tx39h_flush_icache_all;  		flush_cache_range	= (void *) tx39h_flush_icache_all; @@ -394,6 +398,8 @@ void __cpuinit tx39_cache_init(void)  		flush_icache_range = tx39_flush_icache_range;  		local_flush_icache_range = tx39_flush_icache_range; +		__flush_kernel_vmap_range = tx39_flush_kernel_vmap_range; +  		flush_cache_sigtramp = tx39_flush_cache_sigtramp;  		local_flush_data_cache_page = local_tx39_flush_data_cache_page;  		flush_data_cache_page = tx39_flush_data_cache_page; @@ -403,8 +409,8 @@ void __cpuinit tx39_cache_init(void)  		_dma_cache_inv = tx39_dma_cache_inv;  		shm_align_mask = max_t(unsigned long, -		                       (dcache_size / current_cpu_data.dcache.ways) - 1, -		                       PAGE_SIZE - 1); +				       (dcache_size / current_cpu_data.dcache.ways) - 1, +				       PAGE_SIZE - 1);  		break;  	} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 12af739048f..f7b91d3a371 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -8,7 +8,6 @@   */  #include <linux/fs.h>  #include <linux/fcntl.h> -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/linkage.h>  #include <linux/module.h> @@ -30,11 +29,17 @@ void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,  void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,  	unsigned long pfn);  void (*flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(flush_icache_range);  void (*local_flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(local_flush_icache_range);  void (*__flush_cache_vmap)(void);  void (*__flush_cache_vunmap)(void); +void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size); +EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range); +void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size); +  /* MIPS specific cache operations */  void (*flush_cache_sigtramp)(unsigned long addr);  void (*local_flush_data_cache_page)(void * addr); @@ -43,8 +48,9 @@ void (*flush_icache_all)(void);  EXPORT_SYMBOL_GPL(local_flush_data_cache_page);  EXPORT_SYMBOL(flush_data_cache_page); +EXPORT_SYMBOL(flush_icache_all); -#ifdef CONFIG_DMA_NONCOHERENT +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)  /* DMA cache operations. */  void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); @@ -53,7 +59,7 @@ void (*_dma_cache_inv)(unsigned long start, unsigned long size);  EXPORT_SYMBOL(_dma_cache_wback_inv); -#endif /* CONFIG_DMA_NONCOHERENT */ +#endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */  /*   * We could optimize the case where the cache argument is not BCACHE but @@ -137,7 +143,7 @@ EXPORT_SYMBOL(_page_cachable_default);  static inline void setup_protection_map(void)  { -	if (kernel_uses_smartmips_rixi) { +	if (cpu_has_rixi) {  		protection_map[0]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);  		protection_map[1]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);  		protection_map[2]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); @@ -176,7 +182,7 @@ static inline void setup_protection_map(void)  	}  } -void __cpuinit cpu_cache_init(void) +void cpu_cache_init(void)  {  	if (cpu_has_3k_cache) {  		extern void __weak r3k_cache_init(void); diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index 3571090ba17..ee5c1ff861a 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c @@ -27,7 +27,7 @@  /*   * We'd like to dump the L2_ECC_TAG register on errors, but errata make - * that unsafe... So for now we don't.  (BCM1250/BCM112x erratum SOC-48.) + * that unsafe... So for now we don't.	(BCM1250/BCM112x erratum SOC-48.)   */  #undef DUMP_L2_ECC_TAG_ON_ERROR @@ -48,7 +48,7 @@  #define CP0_CERRI_EXTERNAL     (1 << 26)  #define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL)) -#define CP0_CERRI_DATA         (CP0_CERRI_DATA_PARITY) +#define CP0_CERRI_DATA	       (CP0_CERRI_DATA_PARITY)  #define CP0_CERRD_MULTIPLE     (1 << 31)  #define CP0_CERRD_TAG_STATE    (1 << 30) @@ -56,8 +56,8 @@  #define CP0_CERRD_DATA_SBE     (1 << 28)  #define CP0_CERRD_DATA_DBE     (1 << 27)  #define CP0_CERRD_EXTERNAL     (1 << 26) -#define CP0_CERRD_LOAD         (1 << 25) -#define CP0_CERRD_STORE        (1 << 24) +#define CP0_CERRD_LOAD	       (1 << 25) +#define CP0_CERRD_STORE	       (1 << 24)  #define CP0_CERRD_FILLWB       (1 << 23)  #define CP0_CERRD_COHERENCY    (1 << 22)  #define CP0_CERRD_DUPTAG       (1 << 21) @@ -69,10 +69,10 @@     (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG)  #define CP0_CERRD_TYPES \     (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL) -#define CP0_CERRD_DATA         (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) +#define CP0_CERRD_DATA	       (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) -static uint32_t	extract_ic(unsigned short addr, int data); -static uint32_t	extract_dc(unsigned short addr, int data); +static uint32_t extract_ic(unsigned short addr, int data); +static uint32_t extract_dc(unsigned short addr, int data);  static inline void breakout_errctl(unsigned int val)  { @@ -182,11 +182,7 @@ asmlinkage void sb1_cache_error(void)  #ifdef CONFIG_SIBYTE_BW_TRACE  	/* Freeze the trace buffer now */ -#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) -	csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); -#else  	csr_out32(M_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); -#endif  	printk("Trace buffer frozen\n");  #endif @@ -209,11 +205,11 @@ asmlinkage void sb1_cache_error(void)  	  "=r" (dpahi), "=r" (dpalo), "=r" (eepc));  	cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo; -	printk(" c0_errorepc ==   %08x\n", eepc); -	printk(" c0_errctl   ==   %08x", errctl); +	printk(" c0_errorepc ==	  %08x\n", eepc); +	printk(" c0_errctl   ==	  %08x", errctl);  	breakout_errctl(errctl);  	if (errctl & CP0_ERRCTL_ICACHE) { -		printk(" c0_cerr_i   ==   %08x", cerr_i); +		printk(" c0_cerr_i   ==	  %08x", cerr_i);  		breakout_cerri(cerr_i);  		if (CP0_CERRI_IDX_VALID(cerr_i)) {  			/* Check index of EPC, allowing for delay slot */ @@ -229,7 +225,7 @@ asmlinkage void sb1_cache_error(void)  		}  	}  	if (errctl & CP0_ERRCTL_DCACHE) { -		printk(" c0_cerr_d   ==   %08x", cerr_d); +		printk(" c0_cerr_d   ==	  %08x", cerr_d);  		breakout_cerrd(cerr_d);  		if (CP0_CERRD_DPA_VALID(cerr_d)) {  			printk(" c0_cerr_dpa == %010llx\n", cerr_dpa); @@ -256,7 +252,7 @@ asmlinkage void sb1_cache_error(void)  	/*  	 * Calling panic() when a fatal cache error occurs scrambles the  	 * state of the system (and the cache), making it difficult to -	 * investigate after the fact.  However, if you just stall the CPU, +	 * investigate after the fact.	However, if you just stall the CPU,  	 * the other CPU may keep on running, which is typically very  	 * undesirable.  	 */ @@ -411,7 +407,7 @@ static uint32_t extract_ic(unsigned short addr, int data)  				"	dmfc0  $1, $28, 1\n\t"  				"	dsrl32 %1, $1, 0 \n\t"  				"	sll    %2, $1, 0 \n\t" -				"	.set	pop         \n" +				"	.set	pop	    \n"  				: "=r" (datahi), "=r" (insta), "=r" (instb)  				: "r" ((way << 13) | addr | (offset << 3)));  				predecode = (datahi >> 8) & 0xff; @@ -441,8 +437,8 @@ static uint8_t dc_ecc(uint64_t dword)  {  	uint64_t t;  	uint32_t w; -	uint8_t  p; -	int      i; +	uint8_t	 p; +	int	 i;  	p = 0;  	for (i = 7; i >= 0; i--) diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S index e743622fd24..45dff5cd4b8 100644 --- a/arch/mips/mm/cex-gen.S +++ b/arch/mips/mm/cex-gen.S @@ -14,17 +14,17 @@  #include <asm/stackframe.h>  /* - * Game over.  Go to the button.  Press gently.  Swear where allowed by + * Game over.  Go to the button.  Press gently.	 Swear where allowed by   * legislation.   */  	LEAF(except_vec2_generic)  	.set	noreorder  	.set	noat -	.set    mips0 +	.set	mips0  	/*  	 * This is a very bad place to be.  Our cache error  	 * detection has triggered.  If we have write-back data -	 * in the cache, we may not be able to recover.  As a +	 * in the cache, we may not be able to recover.	 As a  	 * first-order desperate measure, turn off KSEG0 cacheing.  	 */  	mfc0	k0,CP0_CONFIG diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S index 3db8553fcd3..9029092aa74 100644 --- a/arch/mips/mm/cex-oct.S +++ b/arch/mips/mm/cex-oct.S @@ -18,7 +18,7 @@   */  	LEAF(except_vec2_octeon) -	.set    push +	.set	push  	.set	mips64r2  	.set	noreorder  	.set	noat @@ -27,19 +27,19 @@  	/* due to an errata we need to read the COP0 CacheErr (Dcache)  	 * before any cache/DRAM access	 */ -	rdhwr   k0, $0        /* get core_id */ -	PTR_LA  k1, cache_err_dcache -	sll     k0, k0, 3 +	rdhwr	k0, $0	      /* get core_id */ +	PTR_LA	k1, cache_err_dcache +	sll	k0, k0, 3  	PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */ -	dmfc0   k0, CP0_CACHEERR, 1 -	sd      k0, (k1) -	dmtc0   $0, CP0_CACHEERR, 1 +	dmfc0	k0, CP0_CACHEERR, 1 +	sd	k0, (k1) +	dmtc0	$0, CP0_CACHEERR, 1 -        /* check whether this is a nested exception */ -	mfc0    k1, CP0_STATUS -	andi    k1, k1, ST0_EXL -	beqz    k1, 1f +	/* check whether this is a nested exception */ +	mfc0	k1, CP0_STATUS +	andi	k1, k1, ST0_EXL +	beqz	k1, 1f  	 nop  	j	cache_parity_error_octeon_non_recoverable  	 nop @@ -48,22 +48,22 @@  1:	j	handle_cache_err  	 nop -	.set    pop +	.set	pop  	END(except_vec2_octeon)   /* We need to jump to handle_cache_err so that the previous handler    * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX -  * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).  */ +  * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).	*/  	LEAF(handle_cache_err) -	.set    push -        .set    noreorder -        .set    noat +	.set	push +	.set	noreorder +	.set	noat  	SAVE_ALL  	KMODE -	jal     cache_parity_error_octeon_recoverable +	jal	cache_parity_error_octeon_recoverable  	nop -	j       ret_from_exception +	j	ret_from_exception  	nop  	.set pop diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S index 2d08268bb70..5d5f29681a2 100644 --- a/arch/mips/mm/cex-sb1.S +++ b/arch/mips/mm/cex-sb1.S @@ -15,7 +15,6 @@   * along with this program; if not, write to the Free Software   * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.   */ -#include <linux/init.h>  #include <asm/asm.h>  #include <asm/regdef.h> @@ -24,9 +23,9 @@  #include <asm/cacheops.h>  #include <asm/sibyte/board.h> -#define C0_ERRCTL     $26             /* CP0: Error info */ -#define C0_CERR_I     $27             /* CP0: Icache error */ -#define C0_CERR_D     $27,1           /* CP0: Dcache error */ +#define C0_ERRCTL     $26	      /* CP0: Error info */ +#define C0_CERR_I     $27	      /* CP0: Icache error */ +#define C0_CERR_D     $27,1	      /* CP0: Dcache error */  	/*  	 * Based on SiByte sample software cache-err/cerr.S @@ -49,8 +48,6 @@  	 * (0x170-0x17f) are used to preserve k0, k1, and ra.  	 */ -	__CPUINIT -  LEAF(except_vec2_sb1)  	/*  	 * If this error is recoverable, we need to exit the handler @@ -79,7 +76,7 @@ LEAF(except_vec2_sb1)  recovered_dcache:  	/*  	 * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA). -	 * Ought to log the occurence of this recovered dcache error. +	 * Ought to log the occurrence of this recovered dcache error.  	 */  	b	recovered  	 mtc0	$0,C0_CERR_D @@ -88,7 +85,7 @@ attempt_recovery:  	/*  	 * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31.  Any  	 * Dcache errors we can recover from will take more extensive -	 * processing.  For now, they are considered "unrecoverable". +	 * processing.	For now, they are considered "unrecoverable".  	 * Note that 'DC' becoming set (outside of ERL mode) will  	 * cause 'IC' to clear; so if there's an Icache error, we'll  	 * only find out about it if we recover from this error and @@ -142,8 +139,6 @@ unrecoverable:  END(except_vec2_sb1) -	__FINIT -  	LEAF(handle_vec2_sb1)  	mfc0	k0,CP0_CONFIG  	li	k1,~CONF_CM_CMASK diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 4fc1a0fbe00..44b6dff5aba 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -4,7 +4,7 @@   * for more details.   *   * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001, 06  Ralf Baechle <ralf@linux-mips.org> + * Copyright (C) 2000, 2001, 06	 Ralf Baechle <ralf@linux-mips.org>   * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.   */ @@ -15,31 +15,58 @@  #include <linux/scatterlist.h>  #include <linux/string.h>  #include <linux/gfp.h> +#include <linux/highmem.h>  #include <asm/cache.h> +#include <asm/cpu-type.h>  #include <asm/io.h>  #include <dma-coherence.h> -static inline unsigned long dma_addr_to_virt(struct device *dev, -	dma_addr_t dma_addr) +#ifdef CONFIG_DMA_MAYBE_COHERENT +int coherentio = 0;	/* User defined DMA coherency from command line. */ +EXPORT_SYMBOL_GPL(coherentio); +int hw_coherentio = 0;	/* Actual hardware supported DMA coherency setting. */ + +static int __init setcoherentio(char *str)  { -	unsigned long addr = plat_dma_addr_to_phys(dev, dma_addr); +	coherentio = 1; +	pr_info("Hardware DMA cache coherency (command line)\n"); +	return 0; +} +early_param("coherentio", setcoherentio); -	return (unsigned long)phys_to_virt(addr); +static int __init setnocoherentio(char *str) +{ +	coherentio = 0; +	pr_info("Software DMA cache coherency (command line)\n"); +	return 0; +} +early_param("nocoherentio", setnocoherentio); +#endif + +static inline struct page *dma_addr_to_page(struct device *dev, +	dma_addr_t dma_addr) +{ +	return pfn_to_page( +		plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT);  }  /* + * The affected CPUs below in 'cpu_needs_post_dma_flush()' can + * speculatively fill random cachelines with stale data at any time, + * requiring an extra flush post-DMA. + *   * Warning on the terminology - Linux calls an uncached area coherent;   * MIPS terminology calls memory areas with hardware maintained coherency   * coherent.   */ - -static inline int cpu_is_noncoherent_r10000(struct device *dev) +static inline int cpu_needs_post_dma_flush(struct device *dev)  {  	return !plat_device_is_coherent(dev) && -	       (current_cpu_type() == CPU_R10000 || -	       current_cpu_type() == CPU_R12000); +	       (boot_cpu_type() == CPU_R10000 || +		boot_cpu_type() == CPU_R12000 || +		boot_cpu_type() == CPU_BMIPS5000);  }  static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) @@ -98,7 +125,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,  EXPORT_SYMBOL(dma_alloc_noncoherent);  static void *mips_dma_alloc_coherent(struct device *dev, size_t size, -	dma_addr_t * dma_handle, gfp_t gfp) +	dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs)  {  	void *ret; @@ -115,7 +142,8 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,  		if (!plat_device_is_coherent(dev)) {  			dma_cache_wback_inv((unsigned long) ret, size); -			ret = UNCAC_ADDR(ret); +			if (!hw_coherentio) +				ret = UNCAC_ADDR(ret);  		}  	} @@ -132,7 +160,7 @@ void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,  EXPORT_SYMBOL(dma_free_noncoherent);  static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, -	dma_addr_t dma_handle) +	dma_addr_t dma_handle, struct dma_attrs *attrs)  {  	unsigned long addr = (unsigned long) vaddr;  	int order = get_order(size); @@ -142,26 +170,26 @@ static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,  	plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); -	if (!plat_device_is_coherent(dev)) +	if (!plat_device_is_coherent(dev) && !hw_coherentio)  		addr = CAC_ADDR(addr);  	free_pages(addr, get_order(size));  } -static inline void __dma_sync(unsigned long addr, size_t size, +static inline void __dma_sync_virtual(void *addr, size_t size,  	enum dma_data_direction direction)  {  	switch (direction) {  	case DMA_TO_DEVICE: -		dma_cache_wback(addr, size); +		dma_cache_wback((unsigned long)addr, size);  		break;  	case DMA_FROM_DEVICE: -		dma_cache_inv(addr, size); +		dma_cache_inv((unsigned long)addr, size);  		break;  	case DMA_BIDIRECTIONAL: -		dma_cache_wback_inv(addr, size); +		dma_cache_wback_inv((unsigned long)addr, size);  		break;  	default: @@ -169,12 +197,49 @@ static inline void __dma_sync(unsigned long addr, size_t size,  	}  } +/* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ +static inline void __dma_sync(struct page *page, +	unsigned long offset, size_t size, enum dma_data_direction direction) +{ +	size_t left = size; + +	do { +		size_t len = left; + +		if (PageHighMem(page)) { +			void *addr; + +			if (offset + len > PAGE_SIZE) { +				if (offset >= PAGE_SIZE) { +					page += offset >> PAGE_SHIFT; +					offset &= ~PAGE_MASK; +				} +				len = PAGE_SIZE - offset; +			} + +			addr = kmap_atomic(page); +			__dma_sync_virtual(addr + offset, len, direction); +			kunmap_atomic(addr); +		} else +			__dma_sync_virtual(page_address(page) + offset, +					   size, direction); +		offset = 0; +		page++; +		left -= len; +	} while (left); +} +  static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,  	size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)  { -	if (cpu_is_noncoherent_r10000(dev)) -		__dma_sync(dma_addr_to_virt(dev, dma_addr), size, -		           direction); +	if (cpu_needs_post_dma_flush(dev)) +		__dma_sync(dma_addr_to_page(dev, dma_addr), +			   dma_addr & ~PAGE_MASK, size, direction);  	plat_unmap_dma_mem(dev, dma_addr, size, direction);  } @@ -185,13 +250,14 @@ static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg,  	int i;  	for (i = 0; i < nents; i++, sg++) { -		unsigned long addr; - -		addr = (unsigned long) sg_virt(sg); -		if (!plat_device_is_coherent(dev) && addr) -			__dma_sync(addr, sg->length, direction); -		sg->dma_address = plat_map_dma_mem(dev, -				                   (void *)addr, sg->length); +		if (!plat_device_is_coherent(dev)) +			__dma_sync(sg_page(sg), sg->offset, sg->length, +				   direction); +#ifdef CONFIG_NEED_SG_DMA_LENGTH +		sg->dma_length = sg->length; +#endif +		sg->dma_address = plat_map_dma_mem_page(dev, sg_page(sg)) + +				  sg->offset;  	}  	return nents; @@ -201,30 +267,23 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page,  	unsigned long offset, size_t size, enum dma_data_direction direction,  	struct dma_attrs *attrs)  { -	unsigned long addr; - -	addr = (unsigned long) page_address(page) + offset; -  	if (!plat_device_is_coherent(dev)) -		__dma_sync(addr, size, direction); +		__dma_sync(page, offset, size, direction); -	return plat_map_dma_mem(dev, (void *)addr, size); +	return plat_map_dma_mem_page(dev, page) + offset;  }  static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg,  	int nhwentries, enum dma_data_direction direction,  	struct dma_attrs *attrs)  { -	unsigned long addr;  	int i;  	for (i = 0; i < nhwentries; i++, sg++) {  		if (!plat_device_is_coherent(dev) && -		    direction != DMA_TO_DEVICE) { -			addr = (unsigned long) sg_virt(sg); -			if (addr) -				__dma_sync(addr, sg->length, direction); -		} +		    direction != DMA_TO_DEVICE) +			__dma_sync(sg_page(sg), sg->offset, sg->length, +				   direction);  		plat_unmap_dma_mem(dev, sg->dma_address, sg->length, direction);  	}  } @@ -232,24 +291,17 @@ static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg,  static void mips_dma_sync_single_for_cpu(struct device *dev,  	dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)  { -	if (cpu_is_noncoherent_r10000(dev)) { -		unsigned long addr; - -		addr = dma_addr_to_virt(dev, dma_handle); -		__dma_sync(addr, size, direction); -	} +	if (cpu_needs_post_dma_flush(dev)) +		__dma_sync(dma_addr_to_page(dev, dma_handle), +			   dma_handle & ~PAGE_MASK, size, direction);  }  static void mips_dma_sync_single_for_device(struct device *dev,  	dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)  { -	plat_extra_sync_for_device(dev); -	if (!plat_device_is_coherent(dev)) { -		unsigned long addr; - -		addr = dma_addr_to_virt(dev, dma_handle); -		__dma_sync(addr, size, direction); -	} +	if (!plat_device_is_coherent(dev)) +		__dma_sync(dma_addr_to_page(dev, dma_handle), +			   dma_handle & ~PAGE_MASK, size, direction);  }  static void mips_dma_sync_sg_for_cpu(struct device *dev, @@ -257,12 +309,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev,  {  	int i; -	/* Make sure that gcc doesn't leave the empty loop body.  */ -	for (i = 0; i < nelems; i++, sg++) { -		if (cpu_is_noncoherent_r10000(dev)) -			__dma_sync((unsigned long)page_address(sg_page(sg)), -			           sg->length, direction); -	} +	if (cpu_needs_post_dma_flush(dev)) +		for (i = 0; i < nelems; i++, sg++) +			__dma_sync(sg_page(sg), sg->offset, sg->length, +				   direction);  }  static void mips_dma_sync_sg_for_device(struct device *dev, @@ -270,17 +320,15 @@ static void mips_dma_sync_sg_for_device(struct device *dev,  {  	int i; -	/* Make sure that gcc doesn't leave the empty loop body.  */ -	for (i = 0; i < nelems; i++, sg++) { -		if (!plat_device_is_coherent(dev)) -			__dma_sync((unsigned long)page_address(sg_page(sg)), -			           sg->length, direction); -	} +	if (!plat_device_is_coherent(dev)) +		for (i = 0; i < nelems; i++, sg++) +			__dma_sync(sg_page(sg), sg->offset, sg->length, +				   direction);  }  int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)  { -	return plat_dma_mapping_error(dev, dma_addr); +	return 0;  }  int mips_dma_supported(struct device *dev, u64 mask) @@ -288,19 +336,20 @@ int mips_dma_supported(struct device *dev, u64 mask)  	return plat_dma_supported(dev, mask);  } -void mips_dma_cache_sync(struct device *dev, void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size,  			 enum dma_data_direction direction)  {  	BUG_ON(direction == DMA_NONE); -	plat_extra_sync_for_device(dev);  	if (!plat_device_is_coherent(dev)) -		__dma_sync((unsigned long)vaddr, size, direction); +		__dma_sync_virtual(vaddr, size, direction);  } +EXPORT_SYMBOL(dma_cache_sync); +  static struct dma_map_ops mips_default_dma_map_ops = { -	.alloc_coherent = mips_dma_alloc_coherent, -	.free_coherent = mips_dma_free_coherent, +	.alloc = mips_dma_alloc_coherent, +	.free = mips_dma_free_coherent,  	.map_page = mips_dma_map_page,  	.unmap_page = mips_dma_unmap_page,  	.map_sg = mips_dma_map_sg, diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 137ee76a004..becc42bb184 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -5,6 +5,7 @@   *   * Copyright (C) 1995 - 2000 by Ralf Baechle   */ +#include <linux/context_tracking.h>  #include <linux/signal.h>  #include <linux/sched.h>  #include <linux/interrupt.h> @@ -22,7 +23,6 @@  #include <asm/branch.h>  #include <asm/mmu_context.h> -#include <asm/system.h>  #include <asm/uaccess.h>  #include <asm/ptrace.h>  #include <asm/highmem.h>		/* For VMALLOC_END */ @@ -33,8 +33,8 @@   * and the problem, and then passes it off to one of the appropriate   * routines.   */ -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long write, -			      unsigned long address) +static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, +	unsigned long address)  {  	struct vm_area_struct * vma = NULL;  	struct task_struct *tsk = current; @@ -42,6 +42,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ  	const int field = sizeof(unsigned long) * 2;  	siginfo_t info;  	int fault; +	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;  #if 0  	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), @@ -51,7 +52,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ  #ifdef CONFIG_KPROBES  	/* -	 * This is to notify the fault handler of the kprobes.  The +	 * This is to notify the fault handler of the kprobes.	The  	 * exception code is redundant as it is also carried in REGS,  	 * but we pass it anyhow.  	 */ @@ -91,6 +92,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ  	if (in_atomic() || !mm)  		goto bad_area_nosemaphore; +	if (user_mode(regs)) +		flags |= FAULT_FLAG_USER; +retry:  	down_read(&mm->mmap_sem);  	vma = find_vma(mm, address);  	if (!vma) @@ -111,8 +115,9 @@ good_area:  	if (write) {  		if (!(vma->vm_flags & VM_WRITE))  			goto bad_area; +		flags |= FAULT_FLAG_WRITE;  	} else { -		if (kernel_uses_smartmips_rixi) { +		if (cpu_has_rixi) {  			if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {  #if 0  				pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", @@ -144,8 +149,12 @@ good_area:  	 * make sure we exit gracefully rather than endlessly redo  	 * the fault.  	 */ -	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	fault = handle_mm_fault(mm, vma, address, flags); + +	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) +		return; + +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	if (unlikely(fault & VM_FAULT_ERROR)) {  		if (fault & VM_FAULT_OOM)  			goto out_of_memory; @@ -153,14 +162,28 @@ good_area:  			goto do_sigbus;  		BUG();  	} -	if (fault & VM_FAULT_MAJOR) { -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, -				1, 0, regs, address); -		tsk->maj_flt++; -	} else { -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, -				1, 0, regs, address); -		tsk->min_flt++; +	if (flags & FAULT_FLAG_ALLOW_RETRY) { +		if (fault & VM_FAULT_MAJOR) { +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, +						  regs, address); +			tsk->maj_flt++; +		} else { +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, +						  regs, address); +			tsk->min_flt++; +		} +		if (fault & VM_FAULT_RETRY) { +			flags &= ~FAULT_FLAG_ALLOW_RETRY; +			flags |= FAULT_FLAG_TRIED; + +			/* +			 * No need to up_read(&mm->mmap_sem) as we would +			 * have already released it in __lock_page_or_retry +			 * in mm/filemap.c. +			 */ + +			goto retry; +		}  	}  	up_read(&mm->mmap_sem); @@ -196,7 +219,7 @@ bad_area_nosemaphore:  	}  no_context: -	/* Are we prepared to handle this kernel fault?  */ +	/* Are we prepared to handle this kernel fault?	 */  	if (fixup_exception(regs)) {  		current->thread.cp0_baduaddr = address;  		return; @@ -220,6 +243,8 @@ out_of_memory:  	 * (which will retry the fault, or kill us if we got oom-killed).  	 */  	up_read(&mm->mmap_sem); +	if (!user_mode(regs)) +		goto no_context;  	pagefault_out_of_memory();  	return; @@ -292,3 +317,13 @@ vmalloc_fault:  	}  #endif  } + +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, +	unsigned long write, unsigned long address) +{ +	enum ctx_state prev_state; + +	prev_state = exception_enter(); +	__do_page_fault(regs, write, address); +	exception_exit(prev_state); +} diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c new file mode 100644 index 00000000000..06ce17c2a90 --- /dev/null +++ b/arch/mips/mm/gup.c @@ -0,0 +1,318 @@ +/* + * Lockless get_user_pages_fast for MIPS + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + * Copyright (C) 2011 Ralf Baechle + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/highmem.h> +#include <linux/swap.h> +#include <linux/hugetlb.h> + +#include <asm/cpu-features.h> +#include <asm/pgtable.h> + +static inline pte_t gup_get_pte(pte_t *ptep) +{ +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) +	pte_t pte; + +retry: +	pte.pte_low = ptep->pte_low; +	smp_rmb(); +	pte.pte_high = ptep->pte_high; +	smp_rmb(); +	if (unlikely(pte.pte_low != ptep->pte_low)) +		goto retry; + +	return pte; +#else +	return ACCESS_ONCE(*ptep); +#endif +} + +static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, +			int write, struct page **pages, int *nr) +{ +	pte_t *ptep = pte_offset_map(&pmd, addr); +	do { +		pte_t pte = gup_get_pte(ptep); +		struct page *page; + +		if (!pte_present(pte) || +		    pte_special(pte) || (write && !pte_write(pte))) { +			pte_unmap(ptep); +			return 0; +		} +		VM_BUG_ON(!pfn_valid(pte_pfn(pte))); +		page = pte_page(pte); +		get_page(page); +		SetPageReferenced(page); +		pages[*nr] = page; +		(*nr)++; + +	} while (ptep++, addr += PAGE_SIZE, addr != end); + +	pte_unmap(ptep - 1); +	return 1; +} + +static inline void get_head_page_multiple(struct page *page, int nr) +{ +	VM_BUG_ON(page != compound_head(page)); +	VM_BUG_ON(page_count(page) == 0); +	atomic_add(nr, &page->_count); +	SetPageReferenced(page); +} + +static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, +			int write, struct page **pages, int *nr) +{ +	pte_t pte = *(pte_t *)&pmd; +	struct page *head, *page; +	int refs; + +	if (write && !pte_write(pte)) +		return 0; +	/* hugepages are never "special" */ +	VM_BUG_ON(pte_special(pte)); +	VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + +	refs = 0; +	head = pte_page(pte); +	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); +	do { +		VM_BUG_ON(compound_head(page) != head); +		pages[*nr] = page; +		if (PageTail(page)) +			get_huge_page_tail(page); +		(*nr)++; +		page++; +		refs++; +	} while (addr += PAGE_SIZE, addr != end); + +	get_head_page_multiple(head, refs); +	return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, +			int write, struct page **pages, int *nr) +{ +	unsigned long next; +	pmd_t *pmdp; + +	pmdp = pmd_offset(&pud, addr); +	do { +		pmd_t pmd = *pmdp; + +		next = pmd_addr_end(addr, end); +		/* +		 * The pmd_trans_splitting() check below explains why +		 * pmdp_splitting_flush has to flush the tlb, to stop +		 * this gup-fast code from running while we set the +		 * splitting bit in the pmd. Returning zero will take +		 * the slow path that will call wait_split_huge_page() +		 * if the pmd is still in splitting state. gup-fast +		 * can't because it has irq disabled and +		 * wait_split_huge_page() would never return as the +		 * tlb flush IPI wouldn't run. +		 */ +		if (pmd_none(pmd) || pmd_trans_splitting(pmd)) +			return 0; +		if (unlikely(pmd_huge(pmd))) { +			if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) +				return 0; +		} else { +			if (!gup_pte_range(pmd, addr, next, write, pages,nr)) +				return 0; +		} +	} while (pmdp++, addr = next, addr != end); + +	return 1; +} + +static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, +			int write, struct page **pages, int *nr) +{ +	pte_t pte = *(pte_t *)&pud; +	struct page *head, *page; +	int refs; + +	if (write && !pte_write(pte)) +		return 0; +	/* hugepages are never "special" */ +	VM_BUG_ON(pte_special(pte)); +	VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + +	refs = 0; +	head = pte_page(pte); +	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); +	do { +		VM_BUG_ON(compound_head(page) != head); +		pages[*nr] = page; +		if (PageTail(page)) +			get_huge_page_tail(page); +		(*nr)++; +		page++; +		refs++; +	} while (addr += PAGE_SIZE, addr != end); + +	get_head_page_multiple(head, refs); +	return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, +			int write, struct page **pages, int *nr) +{ +	unsigned long next; +	pud_t *pudp; + +	pudp = pud_offset(&pgd, addr); +	do { +		pud_t pud = *pudp; + +		next = pud_addr_end(addr, end); +		if (pud_none(pud)) +			return 0; +		if (unlikely(pud_huge(pud))) { +			if (!gup_huge_pud(pud, addr, next, write, pages,nr)) +				return 0; +		} else { +			if (!gup_pmd_range(pud, addr, next, write, pages,nr)) +				return 0; +		} +	} while (pudp++, addr = next, addr != end); + +	return 1; +} + +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, +			  struct page **pages) +{ +	struct mm_struct *mm = current->mm; +	unsigned long addr, len, end; +	unsigned long next; +	unsigned long flags; +	pgd_t *pgdp; +	int nr = 0; + +	start &= PAGE_MASK; +	addr = start; +	len = (unsigned long) nr_pages << PAGE_SHIFT; +	end = start + len; +	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, +					(void __user *)start, len))) +		return 0; + +	/* +	 * XXX: batch / limit 'nr', to avoid large irq off latency +	 * needs some instrumenting to determine the common sizes used by +	 * important workloads (eg. DB2), and whether limiting the batch +	 * size will decrease performance. +	 * +	 * It seems like we're in the clear for the moment. Direct-IO is +	 * the main guy that batches up lots of get_user_pages, and even +	 * they are limited to 64-at-a-time which is not so many. +	 */ +	/* +	 * This doesn't prevent pagetable teardown, but does prevent +	 * the pagetables and pages from being freed. +	 * +	 * So long as we atomically load page table pointers versus teardown, +	 * we can follow the address down to the page and take a ref on it. +	 */ +	local_irq_save(flags); +	pgdp = pgd_offset(mm, addr); +	do { +		pgd_t pgd = *pgdp; + +		next = pgd_addr_end(addr, end); +		if (pgd_none(pgd)) +			break; +		if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) +			break; +	} while (pgdp++, addr = next, addr != end); +	local_irq_restore(flags); + +	return nr; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start:	starting user address + * @nr_pages:	number of pages from start to pin + * @write:	whether pages will be written to + * @pages:	array that receives pointers to the pages pinned. + *		Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, +			struct page **pages) +{ +	struct mm_struct *mm = current->mm; +	unsigned long addr, len, end; +	unsigned long next; +	pgd_t *pgdp; +	int ret, nr = 0; + +	start &= PAGE_MASK; +	addr = start; +	len = (unsigned long) nr_pages << PAGE_SHIFT; + +	end = start + len; +	if (end < start || cpu_has_dc_aliases) +		goto slow_irqon; + +	/* XXX: batch / limit 'nr' */ +	local_irq_disable(); +	pgdp = pgd_offset(mm, addr); +	do { +		pgd_t pgd = *pgdp; + +		next = pgd_addr_end(addr, end); +		if (pgd_none(pgd)) +			goto slow; +		if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) +			goto slow; +	} while (pgdp++, addr = next, addr != end); +	local_irq_enable(); + +	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); +	return nr; +slow: +	local_irq_enable(); + +slow_irqon: +	/* Try to get the remaining pages with get_user_pages */ +	start += nr << PAGE_SHIFT; +	pages += nr; + +	down_read(&mm->mmap_sem); +	ret = get_user_pages(current, mm, start, +				(end - start) >> PAGE_SHIFT, +				write, 0, pages, NULL); +	up_read(&mm->mmap_sem); + +	/* Have to be a bit careful with return values */ +	if (nr > 0) { +		if (ret < 0) +			ret = nr; +		else +			ret += nr; +	} +	return ret; +} diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 3634c7ea06a..da815d29523 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -1,3 +1,4 @@ +#include <linux/compiler.h>  #include <linux/module.h>  #include <linux/highmem.h>  #include <linux/sched.h> @@ -41,7 +42,7 @@ EXPORT_SYMBOL(kunmap);   * kmaps are appropriate for short, tight code paths only.   */ -void *__kmap_atomic(struct page *page) +void *kmap_atomic(struct page *page)  {  	unsigned long vaddr;  	int idx, type; @@ -62,12 +63,12 @@ void *__kmap_atomic(struct page *page)  	return (void*) vaddr;  } -EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(kmap_atomic);  void __kunmap_atomic(void *kvaddr)  {  	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; -	int type; +	int type __maybe_unused;  	if (vaddr < FIXADDR_START) { // FIXME  		pagefault_enable(); diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index a7fee0dfb7a..4ec8ee10d37 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -11,7 +11,6 @@   * Copyright (C) 2008, 2009 Cavium Networks, Inc.   */ -#include <linux/init.h>  #include <linux/fs.h>  #include <linux/mm.h>  #include <linux/hugetlb.h> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2efcbd24c82..6e4413330e3 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -29,6 +29,7 @@  #include <linux/pfn.h>  #include <linux/hardirq.h>  #include <linux/gfp.h> +#include <linux/kcore.h>  #include <asm/asm-offsets.h>  #include <asm/bootinfo.h> @@ -43,32 +44,9 @@  #include <asm/tlb.h>  #include <asm/fixmap.h> -/* Atomicity and interruptability */ -#ifdef CONFIG_MIPS_MT_SMTC - -#include <asm/mipsmtregs.h> - -#define ENTER_CRITICAL(flags) \ -	{ \ -	unsigned int mvpflags; \ -	local_irq_save(flags);\ -	mvpflags = dvpe() -#define EXIT_CRITICAL(flags) \ -	evpe(mvpflags); \ -	local_irq_restore(flags); \ -	} -#else - -#define ENTER_CRITICAL(flags) local_irq_save(flags) -#define EXIT_CRITICAL(flags) local_irq_restore(flags) - -#endif /* CONFIG_MIPS_MT_SMTC */ - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -  /*   * We have up to 8 empty zeroed pages so we can map one of the right colour - * when needed.  This is necessary only on R4000 / R4400 SC and MC versions + * when needed.	 This is necessary only on R4000 / R4400 SC and MC versions   * where we have to avoid VCED / VECI exceptions for good performance at   * any price.  Since page is never written to after the initialization we   * don't have to care about aliases on other CPUs. @@ -79,10 +57,9 @@ EXPORT_SYMBOL_GPL(empty_zero_page);  /*   * Not static inline because used by IP27 special magic initialization code   */ -unsigned long setup_zero_pages(void) +void setup_zero_pages(void)  { -	unsigned int order; -	unsigned long size; +	unsigned int order, i;  	struct page *page;  	if (cpu_has_vce) @@ -96,32 +73,13 @@ unsigned long setup_zero_pages(void)  	page = virt_to_page((void *)empty_zero_page);  	split_page(page, order); -	while (page < virt_to_page((void *)(empty_zero_page + (PAGE_SIZE << order)))) { -		SetPageReserved(page); -		page++; -	} +	for (i = 0; i < (1 << order); i++, page++) +		mark_page_reserved(page); -	size = PAGE_SIZE << order; -	zero_page_mask = (size - 1) & PAGE_MASK; - -	return 1UL << order; +	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;  } -#ifdef CONFIG_MIPS_MT_SMTC -static pte_t *kmap_coherent_pte; -static void __init kmap_coherent_init(void) -{ -	unsigned long vaddr; - -	/* cache the first coherent kmap pte */ -	vaddr = __fix_to_virt(FIX_CMAP_BEGIN); -	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr); -} -#else -static inline void kmap_coherent_init(void) {} -#endif - -void *kmap_coherent(struct page *page, unsigned long addr) +static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot)  {  	enum fixed_addresses idx;  	unsigned long vaddr, flags, entrylo; @@ -131,62 +89,50 @@ void *kmap_coherent(struct page *page, unsigned long addr)  	BUG_ON(Page_dcache_dirty(page)); -	inc_preempt_count(); +	pagefault_disable();  	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); -#ifdef CONFIG_MIPS_MT_SMTC -	idx += FIX_N_COLOURS * smp_processor_id() + -		(in_interrupt() ? (FIX_N_COLOURS * NR_CPUS) : 0); -#else  	idx += in_interrupt() ? FIX_N_COLOURS : 0; -#endif  	vaddr = __fix_to_virt(FIX_CMAP_END - idx); -	pte = mk_pte(page, PAGE_KERNEL); +	pte = mk_pte(page, prot);  #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)  	entrylo = pte.pte_high;  #else  	entrylo = pte_to_entrylo(pte_val(pte));  #endif -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	old_ctx = read_c0_entryhi();  	write_c0_entryhi(vaddr & (PAGE_MASK << 1));  	write_c0_entrylo0(entrylo);  	write_c0_entrylo1(entrylo); -#ifdef CONFIG_MIPS_MT_SMTC -	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte); -	/* preload TLB instead of local_flush_tlb_one() */ -	mtc0_tlbw_hazard(); -	tlb_probe(); -	tlb_probe_hazard(); -	tlbidx = read_c0_index(); -	mtc0_tlbw_hazard(); -	if (tlbidx < 0) -		tlb_write_random(); -	else -		tlb_write_indexed(); -#else  	tlbidx = read_c0_wired();  	write_c0_wired(tlbidx + 1);  	write_c0_index(tlbidx);  	mtc0_tlbw_hazard();  	tlb_write_indexed(); -#endif  	tlbw_use_hazard();  	write_c0_entryhi(old_ctx); -	EXIT_CRITICAL(flags); +	local_irq_restore(flags);  	return (void*) vaddr;  } -#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) +void *kmap_coherent(struct page *page, unsigned long addr) +{ +	return __kmap_pgprot(page, addr, PAGE_KERNEL); +} + +void *kmap_noncoherent(struct page *page, unsigned long addr) +{ +	return __kmap_pgprot(page, addr, PAGE_KERNEL_NC); +}  void kunmap_coherent(void)  { -#ifndef CONFIG_MIPS_MT_SMTC  	unsigned int wired;  	unsigned long flags, old_ctx; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	old_ctx = read_c0_entryhi();  	wired = read_c0_wired() - 1;  	write_c0_wired(wired); @@ -198,10 +144,8 @@ void kunmap_coherent(void)  	tlb_write_indexed();  	tlbw_use_hazard();  	write_c0_entryhi(old_ctx); -	EXIT_CRITICAL(flags); -#endif -	dec_preempt_count(); -	preempt_check_resched(); +	local_irq_restore(flags); +	pagefault_enable();  }  void copy_user_highpage(struct page *to, struct page *from, @@ -209,21 +153,21 @@ void copy_user_highpage(struct page *to, struct page *from,  {  	void *vfrom, *vto; -	vto = kmap_atomic(to, KM_USER1); +	vto = kmap_atomic(to);  	if (cpu_has_dc_aliases &&  	    page_mapped(from) && !Page_dcache_dirty(from)) {  		vfrom = kmap_coherent(from, vaddr);  		copy_page(vto, vfrom);  		kunmap_coherent();  	} else { -		vfrom = kmap_atomic(from, KM_USER0); +		vfrom = kmap_atomic(from);  		copy_page(vto, vfrom); -		kunmap_atomic(vfrom, KM_USER0); +		kunmap_atomic(vfrom);  	}  	if ((!cpu_has_ic_fills_f_dc) ||  	    pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))  		flush_data_cache_page((unsigned long)vto); -	kunmap_atomic(vto, KM_USER1); +	kunmap_atomic(vto);  	/* Make sure this page is cleared on other CPU's too before using it */  	smp_wmb();  } @@ -261,11 +205,12 @@ void copy_from_user_page(struct vm_area_struct *vma,  			SetPageDcacheDirty(page);  	}  } +EXPORT_SYMBOL_GPL(copy_from_user_page);  void __init fixrange_init(unsigned long start, unsigned long end,  	pgd_t *pgd_base)  { -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_MIPS_MT_SMTC) +#ifdef CONFIG_HIGHMEM  	pgd_t *pgd;  	pud_t *pud;  	pmd_t *pmd; @@ -279,11 +224,11 @@ void __init fixrange_init(unsigned long start, unsigned long end,  	k = __pmd_offset(vaddr);  	pgd = pgd_base + i; -	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { +	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {  		pud = (pud_t *)pgd; -		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { +		for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) {  			pmd = (pmd_t *)pud; -			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { +			for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) {  				if (pmd_none(*pmd)) {  					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);  					set_pmd(pmd, __pmd((unsigned long)pte)); @@ -306,9 +251,14 @@ int page_is_ram(unsigned long pagenr)  	for (i = 0; i < boot_mem_map.nr_map; i++) {  		unsigned long addr, end; -		if (boot_mem_map.map[i].type != BOOT_MEM_RAM) +		switch (boot_mem_map.map[i].type) { +		case BOOT_MEM_RAM: +		case BOOT_MEM_INIT_RAM: +			break; +		default:  			/* not usable memory */  			continue; +		}  		addr = PFN_UP(boot_mem_map.map[i].addr);  		end = PFN_DOWN(boot_mem_map.map[i].addr + @@ -324,15 +274,13 @@ int page_is_ram(unsigned long pagenr)  void __init paging_init(void)  {  	unsigned long max_zone_pfns[MAX_NR_ZONES]; -	unsigned long lastpfn; +	unsigned long lastpfn __maybe_unused;  	pagetable_init();  #ifdef CONFIG_HIGHMEM  	kmap_init();  #endif -	kmap_coherent_init(); -  #ifdef CONFIG_ZONE_DMA  	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;  #endif @@ -361,53 +309,38 @@ void __init paging_init(void)  static struct kcore_list kcore_kseg0;  #endif -void __init mem_init(void) +static inline void mem_init_free_highmem(void)  { -	unsigned long codesize, reservedpages, datasize, initsize; -	unsigned long tmp, ram; +#ifdef CONFIG_HIGHMEM +	unsigned long tmp; +	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { +		struct page *page = pfn_to_page(tmp); + +		if (!page_is_ram(tmp)) +			SetPageReserved(page); +		else +			free_highmem_page(page); +	} +#endif +} + +void __init mem_init(void) +{  #ifdef CONFIG_HIGHMEM  #ifdef CONFIG_DISCONTIGMEM  #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"  #endif -	max_mapnr = highend_pfn; +	max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;  #else  	max_mapnr = max_low_pfn;  #endif  	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); -	totalram_pages += free_all_bootmem(); -	totalram_pages -= setup_zero_pages();	/* Setup zeroed pages.  */ - -	reservedpages = ram = 0; -	for (tmp = 0; tmp < max_low_pfn; tmp++) -		if (page_is_ram(tmp)) { -			ram++; -			if (PageReserved(pfn_to_page(tmp))) -				reservedpages++; -		} -	num_physpages = ram; - -#ifdef CONFIG_HIGHMEM -	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { -		struct page *page = pfn_to_page(tmp); - -		if (!page_is_ram(tmp)) { -			SetPageReserved(page); -			continue; -		} -		ClearPageReserved(page); -		init_page_count(page); -		__free_page(page); -		totalhigh_pages++; -	} -	totalram_pages += totalhigh_pages; -	num_physpages += totalhigh_pages; -#endif - -	codesize =  (unsigned long) &_etext - (unsigned long) &_text; -	datasize =  (unsigned long) &_edata - (unsigned long) &_etext; -	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin; +	free_all_bootmem(); +	setup_zero_pages();	/* Setup zeroed pages.  */ +	mem_init_free_highmem(); +	mem_init_print_info(NULL);  #ifdef CONFIG_64BIT  	if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -416,16 +349,6 @@ void __init mem_init(void)  		kclist_add(&kcore_kseg0, (void *) CKSEG0,  				0x80000000 - 4, KCORE_TEXT);  #endif - -	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " -	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", -	       nr_free_pages() << (PAGE_SHIFT-10), -	       ram << (PAGE_SHIFT-10), -	       codesize >> 10, -	       reservedpages << (PAGE_SHIFT-10), -	       datasize >> 10, -	       initsize >> 10, -	       totalhigh_pages << (PAGE_SHIFT-10));  }  #endif /* !CONFIG_NEED_MULTIPLE_NODES */ @@ -437,11 +360,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)  		struct page *page = pfn_to_page(pfn);  		void *addr = phys_to_virt(PFN_PHYS(pfn)); -		ClearPageReserved(page); -		init_page_count(page);  		memset(addr, POISON_FREE_INITMEM, PAGE_SIZE); -		__free_page(page); -		totalram_pages++; +		free_reserved_page(page);  	}  	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);  } @@ -449,36 +369,44 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)  #ifdef CONFIG_BLK_DEV_INITRD  void free_initrd_mem(unsigned long start, unsigned long end)  { -	free_init_pages("initrd memory", -			virt_to_phys((void *)start), -			virt_to_phys((void *)end)); +	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, +			   "initrd");  }  #endif +void (*free_init_pages_eva)(void *begin, void *end) = NULL; +  void __init_refok free_initmem(void)  {  	prom_free_prom_memory(); -	free_init_pages("unused kernel memory", -			__pa_symbol(&__init_begin), -			__pa_symbol(&__init_end)); +	/* +	 * Let the platform define a specific function to free the +	 * init section since EVA may have used any possible mapping +	 * between virtual and physical addresses. +	 */ +	if (free_init_pages_eva) +		free_init_pages_eva((void *)&__init_begin, (void *)&__init_end); +	else +		free_initmem_default(POISON_FREE_INITMEM);  }  #ifndef CONFIG_MIPS_PGD_C0_CONTEXT  unsigned long pgd_current[NR_CPUS];  #endif -/* - * On 64-bit we've got three-level pagetables with a slightly - * different layout ... - */ -#define __page_aligned(order) __attribute__((__aligned__(PAGE_SIZE<<order)))  /*   * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER   * are constants.  So we use the variants from asm-offset.h until that gcc   * will officially be retired. + * + * Align swapper_pg_dir in to 64K, allows its address to be loaded + * with a single LUI instruction in the TLB handlers.  If we used + * __aligned(64K), its size would get rounded up to the alignment + * size, and waste space.  So we place it in its own section and align + * it in the linker script.   */ -pgd_t swapper_pg_dir[_PTRS_PER_PGD] __page_aligned(_PGD_ORDER); +pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);  #ifndef __PAGETABLE_PMD_FOLDED -pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned(PMD_ORDER); +pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;  #endif -pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned(PTE_ORDER); +pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index cacfd31e8ec..7f840bc08ab 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -22,7 +22,7 @@ static inline void remap_area_pte(pte_t * pte, unsigned long address,  	phys_t end;  	unsigned long pfn;  	pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE -	                           | __WRITEABLE | flags); +				   | __WRITEABLE | flags);  	address &= ~PMD_MASK;  	end = address + size; @@ -185,7 +185,7 @@ void __iounmap(const volatile void __iomem *addr)  	if (!p)  		printk(KERN_ERR "iounmap: bad address %p\n", addr); -        kfree(p); +	kfree(p);  }  EXPORT_SYMBOL(__ioremap); diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c new file mode 100644 index 00000000000..f1baadd56e8 --- /dev/null +++ b/arch/mips/mm/mmap.c @@ -0,0 +1,198 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Wind River Systems, + *   written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <linux/sched.h> + +unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */ +EXPORT_SYMBOL(shm_align_mask); + +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ +	if (current->personality & ADDR_COMPAT_LAYOUT) +		return 1; + +	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) +		return 1; + +	return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ +	unsigned long gap = rlimit(RLIMIT_STACK); + +	if (gap < MIN_GAP) +		gap = MIN_GAP; +	else if (gap > MAX_GAP) +		gap = MAX_GAP; + +	return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + +#define COLOUR_ALIGN(addr, pgoff)				\ +	((((addr) + shm_align_mask) & ~shm_align_mask) +	\ +	 (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_common(struct file *filp, +	unsigned long addr0, unsigned long len, unsigned long pgoff, +	unsigned long flags, enum mmap_allocation_direction dir) +{ +	struct mm_struct *mm = current->mm; +	struct vm_area_struct *vma; +	unsigned long addr = addr0; +	int do_color_align; +	struct vm_unmapped_area_info info; + +	if (unlikely(len > TASK_SIZE)) +		return -ENOMEM; + +	if (flags & MAP_FIXED) { +		/* Even MAP_FIXED mappings must reside within TASK_SIZE */ +		if (TASK_SIZE - len < addr) +			return -EINVAL; + +		/* +		 * We do not accept a shared mapping if it would violate +		 * cache aliasing constraints. +		 */ +		if ((flags & MAP_SHARED) && +		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) +			return -EINVAL; +		return addr; +	} + +	do_color_align = 0; +	if (filp || (flags & MAP_SHARED)) +		do_color_align = 1; + +	/* requesting a specific address */ +	if (addr) { +		if (do_color_align) +			addr = COLOUR_ALIGN(addr, pgoff); +		else +			addr = PAGE_ALIGN(addr); + +		vma = find_vma(mm, addr); +		if (TASK_SIZE - len >= addr && +		    (!vma || addr + len <= vma->vm_start)) +			return addr; +	} + +	info.length = len; +	info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; +	info.align_offset = pgoff << PAGE_SHIFT; + +	if (dir == DOWN) { +		info.flags = VM_UNMAPPED_AREA_TOPDOWN; +		info.low_limit = PAGE_SIZE; +		info.high_limit = mm->mmap_base; +		addr = vm_unmapped_area(&info); + +		if (!(addr & ~PAGE_MASK)) +			return addr; + +		/* +		 * A failed mmap() very likely causes application failure, +		 * so fall back to the bottom-up function here. This scenario +		 * can happen with large stack limits and large mmap() +		 * allocations. +		 */ +	} + +	info.flags = 0; +	info.low_limit = mm->mmap_base; +	info.high_limit = TASK_SIZE; +	return vm_unmapped_area(&info); +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, +	unsigned long len, unsigned long pgoff, unsigned long flags) +{ +	return arch_get_unmapped_area_common(filp, +			addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, +	unsigned long addr0, unsigned long len, unsigned long pgoff, +	unsigned long flags) +{ +	return arch_get_unmapped_area_common(filp, +			addr0, len, pgoff, flags, DOWN); +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ +	unsigned long random_factor = 0UL; + +	if (current->flags & PF_RANDOMIZE) { +		random_factor = get_random_int(); +		random_factor = random_factor << PAGE_SHIFT; +		if (TASK_IS_32BIT_ADDR) +			random_factor &= 0xfffffful; +		else +			random_factor &= 0xffffffful; +	} + +	if (mmap_is_legacy()) { +		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; +		mm->get_unmapped_area = arch_get_unmapped_area; +	} else { +		mm->mmap_base = mmap_base(random_factor); +		mm->get_unmapped_area = arch_get_unmapped_area_topdown; +	} +} + +static inline unsigned long brk_rnd(void) +{ +	unsigned long rnd = get_random_int(); + +	rnd = rnd << PAGE_SHIFT; +	/* 8MB for 32bit, 256MB for 64bit */ +	if (TASK_IS_32BIT_ADDR) +		rnd = rnd & 0x7ffffful; +	else +		rnd = rnd & 0xffffffful; + +	return rnd; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ +	unsigned long base = mm->brk; +	unsigned long ret; + +	ret = PAGE_ALIGN(base + brk_rnd()); + +	if (ret < mm->brk) +		return mm->brk; + +	return ret; +} + +int __virt_addr_valid(const volatile void *kaddr) +{ +	return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); +} +EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 00000000000..48a6b38ff13 --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012  MIPS Technologies, Inc. + * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name	clear_page_cpu +#define cpu_copy_page_function_name	copy_page_cpu +#else +#define cpu_clear_page_function_name	clear_page +#define cpu_copy_page_function_name	copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache:		0x058 bytes + * R4600 v1.7:				0x05c bytes + * R4600 v2.0:				0x060 bytes + * With prefetching, 16 word strides	0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +1:	j	1b		/* Dummy, will be replaced. */ +	.space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache:		0x11c bytes + * R4600 v1.7:				0x080 bytes + * R4600 v2.0:				0x07c bytes + * With prefetching, 16 word strides	0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +1:	j	1b		/* Dummy, will be replaced. */ +	.space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 36272f7d374..b611102e23b 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -6,8 +6,8 @@   * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)   * Copyright (C) 2007  Maciej W. Rozycki   * Copyright (C) 2008  Thiemo Seufer + * Copyright (C) 2012  MIPS Technologies, Inc.   */ -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/smp.h> @@ -17,12 +17,12 @@  #include <asm/bugs.h>  #include <asm/cacheops.h> +#include <asm/cpu-type.h>  #include <asm/inst.h>  #include <asm/io.h>  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/prefetch.h> -#include <asm/system.h>  #include <asm/bootinfo.h>  #include <asm/mipsregs.h>  #include <asm/mmu_context.h> @@ -66,68 +66,29 @@ UASM_L_LA(_copy_pref_both)  UASM_L_LA(_copy_pref_store)  /* We need one branch and therefore one relocation per target label. */ -static struct uasm_label __cpuinitdata labels[5]; -static struct uasm_reloc __cpuinitdata relocs[5]; +static struct uasm_label labels[5]; +static struct uasm_reloc relocs[5];  #define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)  #define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020) -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache:		0x058 bytes - * R4600 v1.7:				0x05c bytes - * R4600 v2.0:				0x060 bytes - * With prefetching, 16 word strides	0x120 bytes - */ - -static u32 clear_page_array[0x120 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); -#else -void clear_page(void *page) __attribute__((alias("clear_page_array"))); -#endif - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache:		0x11c bytes - * R4600 v1.7:				0x080 bytes - * R4600 v2.0:				0x07c bytes - * With prefetching, 16 word strides	0x540 bytes - */ -static u32 copy_page_array[0x540 / 4]; - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS -void -copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); -#else -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); -#endif +static int pref_bias_clear_store; +static int pref_bias_copy_load; +static int pref_bias_copy_store; -EXPORT_SYMBOL(copy_page); +static u32 pref_src_mode; +static u32 pref_dst_mode; +static int clear_word_size; +static int copy_word_size; -static int pref_bias_clear_store __cpuinitdata; -static int pref_bias_copy_load __cpuinitdata; -static int pref_bias_copy_store __cpuinitdata; +static int half_clear_loop_size; +static int half_copy_loop_size; -static u32 pref_src_mode __cpuinitdata; -static u32 pref_dst_mode __cpuinitdata; - -static int clear_word_size __cpuinitdata; -static int copy_word_size __cpuinitdata; - -static int half_clear_loop_size __cpuinitdata; -static int half_copy_loop_size __cpuinitdata; - -static int cache_line_size __cpuinitdata; +static int cache_line_size;  #define cache_line_mask() (cache_line_size - 1) -static inline void __cpuinit +static inline void  pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)  {  	if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { @@ -147,7 +108,7 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)  	}  } -static void __cpuinit set_prefetch_parameters(void) +static void set_prefetch_parameters(void)  {  	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)  		clear_word_size = 8; @@ -179,15 +140,6 @@ static void __cpuinit set_prefetch_parameters(void)  			pref_bias_copy_load = 256;  			break; -		case CPU_RM9000: -			/* -			 * As a workaround for erratum G105 which make the -			 * PrepareForStore hint unusable we fall back to -			 * StoreRetained on the RM9000.  Once it is known which -			 * versions of the RM9000 we'll be able to condition- -			 * alize this. -			 */ -  		case CPU_R10000:  		case CPU_R12000:  		case CPU_R14000: @@ -247,7 +199,7 @@ static void __cpuinit set_prefetch_parameters(void)  				      4 * copy_word_size));  } -static void __cpuinit build_clear_store(u32 **buf, int off) +static void build_clear_store(u32 **buf, int off)  {  	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {  		uasm_i_sd(buf, ZERO, off, A0); @@ -256,7 +208,7 @@ static void __cpuinit build_clear_store(u32 **buf, int off)  	}  } -static inline void __cpuinit build_clear_pref(u32 **buf, int off) +static inline void build_clear_pref(u32 **buf, int off)  {  	if (off & cache_line_mask())  		return; @@ -280,16 +232,26 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)  			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);  		} -		} +	}  } -void __cpuinit build_clear_page(void) +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; + +void build_clear_page(void)  {  	int off; -	u32 *buf = (u32 *)&clear_page_array; +	u32 *buf = &__clear_page_start;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs;  	int i; +	static atomic_t run_once = ATOMIC_INIT(0); + +	if (atomic_xchg(&run_once, 1)) { +		return; +	}  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs)); @@ -311,10 +273,10 @@ void __cpuinit build_clear_page(void)  		uasm_i_ori(&buf, A2, A0, off);  	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) -		uasm_i_lui(&buf, AT, 0xa000); +		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));  	off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) -	                        * cache_line_size : 0; +				* cache_line_size : 0;  	while (off) {  		build_clear_pref(&buf, -off);  		off -= cache_line_size; @@ -357,21 +319,21 @@ void __cpuinit build_clear_page(void)  	uasm_i_jr(&buf, RA);  	uasm_i_nop(&buf); -	BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); +	BUG_ON(buf > &__clear_page_end);  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Synthesized clear page handler (%u instructions).\n", -		 (u32)(buf - clear_page_array)); +		 (u32)(buf - &__clear_page_start));  	pr_debug("\t.set push\n");  	pr_debug("\t.set noreorder\n"); -	for (i = 0; i < (buf - clear_page_array); i++) -		pr_debug("\t.word 0x%08x\n", clear_page_array[i]); +	for (i = 0; i < (buf - &__clear_page_start); i++) +		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);  	pr_debug("\t.set pop\n");  } -static void __cpuinit build_copy_load(u32 **buf, int reg, int off) +static void build_copy_load(u32 **buf, int reg, int off)  {  	if (cpu_has_64bit_gp_regs) {  		uasm_i_ld(buf, reg, off, A1); @@ -380,7 +342,7 @@ static void __cpuinit build_copy_load(u32 **buf, int reg, int off)  	}  } -static void __cpuinit build_copy_store(u32 **buf, int reg, int off) +static void build_copy_store(u32 **buf, int reg, int off)  {  	if (cpu_has_64bit_gp_regs) {  		uasm_i_sd(buf, reg, off, A0); @@ -425,13 +387,18 @@ static inline void build_copy_store_pref(u32 **buf, int off)  	}  } -void __cpuinit build_copy_page(void) +void build_copy_page(void)  {  	int off; -	u32 *buf = (u32 *)©_page_array; +	u32 *buf = &__copy_page_start;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs;  	int i; +	static atomic_t run_once = ATOMIC_INIT(0); + +	if (atomic_xchg(&run_once, 1)) { +		return; +	}  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs)); @@ -457,16 +424,16 @@ void __cpuinit build_copy_page(void)  		uasm_i_ori(&buf, A2, A0, off);  	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) -		uasm_i_lui(&buf, AT, 0xa000); +		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));  	off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * -	                        cache_line_size : 0; +				cache_line_size : 0;  	while (off) {  		build_copy_load_pref(&buf, -off);  		off -= cache_line_size;  	}  	off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * -	                        cache_line_size : 0; +				cache_line_size : 0;  	while (off) {  		build_copy_store_pref(&buf, -off);  		off -= cache_line_size; @@ -596,21 +563,23 @@ void __cpuinit build_copy_page(void)  	uasm_i_jr(&buf, RA);  	uasm_i_nop(&buf); -	BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); +	BUG_ON(buf > &__copy_page_end);  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Synthesized copy page handler (%u instructions).\n", -		 (u32)(buf - copy_page_array)); +		 (u32)(buf - &__copy_page_start));  	pr_debug("\t.set push\n");  	pr_debug("\t.set noreorder\n"); -	for (i = 0; i < (buf - copy_page_array); i++) -		pr_debug("\t.word 0x%08x\n", copy_page_array[i]); +	for (i = 0; i < (buf - &__copy_page_start); i++) +		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);  	pr_debug("\t.set pop\n");  }  #ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from);  /*   * Pad descriptors to cacheline, since each is exclusively owned by a diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index 575e4019227..adc6911ba74 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -52,7 +52,7 @@ void __init pagetable_init(void)  	 * Fixed mappings:  	 */  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -	fixrange_init(vaddr, 0, pgd_base); +	fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);  #ifdef CONFIG_HIGHMEM  	/* diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index 78eaa4f0b0e..e8adc0069d6 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -11,6 +11,7 @@  #include <asm/fixmap.h>  #include <asm/pgtable.h>  #include <asm/pgalloc.h> +#include <asm/tlbflush.h>  void pgd_init(unsigned long page)  { @@ -23,20 +24,20 @@ void pgd_init(unsigned long page)  	entry = (unsigned long)invalid_pmd_table;  #endif - 	p = (unsigned long *) page; +	p = (unsigned long *) page;  	end = p + PTRS_PER_PGD; -	while (p < end) { +	do {  		p[0] = entry;  		p[1] = entry;  		p[2] = entry;  		p[3] = entry;  		p[4] = entry; -		p[5] = entry; -		p[6] = entry; -		p[7] = entry;  		p += 8; -	} +		p[-3] = entry; +		p[-2] = entry; +		p[-1] = entry; +	} while (p != end);  }  #ifndef __PAGETABLE_PMD_FOLDED @@ -44,23 +45,53 @@ void pmd_init(unsigned long addr, unsigned long pagetable)  {  	unsigned long *p, *end; - 	p = (unsigned long *) addr; +	p = (unsigned long *) addr;  	end = p + PTRS_PER_PMD; -	while (p < end) { +	do {  		p[0] = pagetable;  		p[1] = pagetable;  		p[2] = pagetable;  		p[3] = pagetable;  		p[4] = pagetable; -		p[5] = pagetable; -		p[6] = pagetable; -		p[7] = pagetable;  		p += 8; +		p[-3] = pagetable; +		p[-2] = pagetable; +		p[-1] = pagetable; +	} while (p != end); +} +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +void pmdp_splitting_flush(struct vm_area_struct *vma, +			 unsigned long address, +			 pmd_t *pmdp) +{ +	if (!pmd_trans_splitting(*pmdp)) { +		pmd_t pmd = pmd_mksplitting(*pmdp); +		set_pmd_at(vma->vm_mm, address, pmdp, pmd);  	}  } +  #endif +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ +	pmd_t pmd; + +	pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + +	return pmd; +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, +		pmd_t *pmdp, pmd_t pmd) +{ +	*pmdp = pmd; +	flush_tlb_all(); +} +  void __init pagetable_init(void)  {  	unsigned long vaddr; @@ -76,5 +107,5 @@ void __init pagetable_init(void)  	 * Fixed mappings:  	 */  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -	fixrange_init(vaddr, 0, pgd_base); +	fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);  } diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index 13adb578211..dc7c5a5214a 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -2,7 +2,7 @@   * sc-ip22.c: Indy cache management functions.   *   * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), - * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + * derived from r4xx0.c by David S. Miller (davem@davemloft.net).   */  #include <linux/init.h>  #include <linux/kernel.h> @@ -12,7 +12,6 @@  #include <asm/bcache.h>  #include <asm/page.h>  #include <asm/pgtable.h> -#include <asm/system.h>  #include <asm/bootinfo.h>  #include <asm/sgi/ip22.h>  #include <asm/sgi/mc.h> @@ -160,7 +159,7 @@ static inline int __init indy_sc_probe(void)  }  /* XXX Check with wje if the Indy caches can differenciate between -   writeback + invalidate and just invalidate.  */ +   writeback + invalidate and just invalidate.	*/  static struct bcache_ops indy_sc_ops = {  	.bc_enable = indy_sc_enable,  	.bc_disable = indy_sc_disable, @@ -168,7 +167,7 @@ static struct bcache_ops indy_sc_ops = {  	.bc_inv = indy_sc_wback_invalidate  }; -void __cpuinit indy_sc_init(void) +void indy_sc_init(void)  {  	if (indy_sc_probe()) {  		indy_sc_enable(); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 505fecad468..99eb8fabab6 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -6,12 +6,12 @@  #include <linux/sched.h>  #include <linux/mm.h> +#include <asm/cpu-type.h>  #include <asm/mipsregs.h>  #include <asm/bcache.h>  #include <asm/cacheops.h>  #include <asm/page.h>  #include <asm/pgtable.h> -#include <asm/system.h>  #include <asm/mmu_context.h>  #include <asm/r4kcache.h> @@ -68,11 +68,18 @@ static struct bcache_ops mips_sc_ops = {   */  static inline int mips_sc_is_activated(struct cpuinfo_mips *c)  { +	unsigned int config2 = read_c0_config2(); +	unsigned int tmp; +  	/* Check the bypass bit (L2B) */ -	switch (c->cputype) { +	switch (current_cpu_type()) {  	case CPU_34K:  	case CPU_74K:  	case CPU_1004K: +	case CPU_1074K: +	case CPU_INTERAPTIV: +	case CPU_PROAPTIV: +	case CPU_P5600:  	case CPU_BMIPS5000:  		if (config2 & (1 << 12))  			return 0; @@ -83,6 +90,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c)  		c->scache.linesz = 2 << tmp;  	else  		return 0; +	return 1;  }  static inline int __init mips_sc_probe(void) @@ -95,10 +103,8 @@ static inline int __init mips_sc_probe(void)  	c->scache.flags |= MIPS_CACHE_NOT_PRESENT;  	/* Ignore anything but MIPSxx processors */ -	if (c->isa_level != MIPS_CPU_ISA_M32R1 && -	    c->isa_level != MIPS_CPU_ISA_M32R2 && -	    c->isa_level != MIPS_CPU_ISA_M64R1 && -	    c->isa_level != MIPS_CPU_ISA_M64R2) +	if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | +			      MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)))  		return 0;  	/* Does this MIPS32/MIPS64 CPU have a config2 register? */ @@ -131,7 +137,7 @@ static inline int __init mips_sc_probe(void)  	return 1;  } -int __cpuinit mips_sc_init(void) +int mips_sc_init(void)  {  	int found = mips_sc_probe();  	if (found) { diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c index f330d38e557..0216ed6eaa2 100644 --- a/arch/mips/mm/sc-r5k.c +++ b/arch/mips/mm/sc-r5k.c @@ -1,6 +1,6 @@  /*   * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), - * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + * derived from r4xx0.c by David S. Miller (davem@davemloft.net).   */  #include <linux/init.h>  #include <linux/kernel.h> @@ -12,7 +12,6 @@  #include <asm/cacheops.h>  #include <asm/page.h>  #include <asm/pgtable.h> -#include <asm/system.h>  #include <asm/mmu_context.h>  #include <asm/r4kcache.h> @@ -59,7 +58,7 @@ static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size)  static void r5k_sc_enable(void)  { -        unsigned long flags; +	unsigned long flags;  	local_irq_save(flags);  	set_c0_config(R5K_CONF_SE); @@ -69,7 +68,7 @@ static void r5k_sc_enable(void)  static void r5k_sc_disable(void)  { -        unsigned long flags; +	unsigned long flags;  	local_irq_save(flags);  	blast_r5000_scache(); @@ -99,7 +98,7 @@ static struct bcache_ops r5k_sc_ops = {  	.bc_inv = r5k_dma_cache_inv_sc  }; -void __cpuinit r5k_sc_init(void) +void r5k_sc_init(void)  {  	if (r5k_sc_probe()) {  		r5k_sc_enable(); diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 274af3be144..9ac1efcfbcc 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -6,7 +6,6 @@  #undef DEBUG -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/mm.h>  #include <linux/bitops.h> @@ -104,7 +103,7 @@ static void blast_rm7k_tcache(void)  /*   * This function is executed in uncached address space.   */ -static __cpuinit void __rm7k_tc_enable(void) +static void __rm7k_tc_enable(void)  {  	int i; @@ -117,7 +116,7 @@ static __cpuinit void __rm7k_tc_enable(void)  		cache_op(Index_Store_Tag_T, CKSEG0ADDR(i));  } -static __cpuinit void rm7k_tc_enable(void) +static void rm7k_tc_enable(void)  {  	if (read_c0_config() & RM7K_CONF_TE)  		return; @@ -130,7 +129,7 @@ static __cpuinit void rm7k_tc_enable(void)  /*   * This function is executed in uncached address space.   */ -static __cpuinit void __rm7k_sc_enable(void) +static void __rm7k_sc_enable(void)  {  	int i; @@ -143,7 +142,7 @@ static __cpuinit void __rm7k_sc_enable(void)  		cache_op(Index_Store_Tag_SD, CKSEG0ADDR(i));  } -static __cpuinit void rm7k_sc_enable(void) +static void rm7k_sc_enable(void)  {  	if (read_c0_config() & RM7K_CONF_SE)  		return; @@ -184,7 +183,7 @@ static struct bcache_ops rm7k_sc_ops = {   * This is a probing function like the one found in c-r4k.c, we look for the   * wrap around point with different addresses.   */ -static __cpuinit void __probe_tcache(void) +static void __probe_tcache(void)  {  	unsigned long flags, addr, begin, end, pow2; @@ -226,7 +225,7 @@ static __cpuinit void __probe_tcache(void)  	local_irq_restore(flags);  } -void __cpuinit rm7k_sc_init(void) +void rm7k_sc_init(void)  {  	struct cpuinfo_mips *c = ¤t_cpu_data;  	unsigned int config = read_c0_config(); diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S new file mode 100644 index 00000000000..a5427c6e975 --- /dev/null +++ b/arch/mips/mm/tlb-funcs.S @@ -0,0 +1,39 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated tlb handler functions. + * + * Copyright (C) 2013  Broadcom Corporation. + * + * Based on mm/page-funcs.c + * Copyright (C) 2012  MIPS Technologies, Inc. + * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#define FASTPATH_SIZE	128 + +EXPORT(tlbmiss_handler_setup_pgd_start) +LEAF(tlbmiss_handler_setup_pgd) +1:	j	1b		/* Dummy, will be replaced. */ +	.space	64 +END(tlbmiss_handler_setup_pgd) +EXPORT(tlbmiss_handler_setup_pgd_end) + +LEAF(handle_tlbm) +	.space		FASTPATH_SIZE * 4 +END(handle_tlbm) +EXPORT(handle_tlbm_end) + +LEAF(handle_tlbs) +	.space		FASTPATH_SIZE * 4 +END(handle_tlbs) +EXPORT(handle_tlbs_end) + +LEAF(handle_tlbl) +	.space		FASTPATH_SIZE * 4 +END(handle_tlbl) +EXPORT(handle_tlbl_end) diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index 0f5ab236ab6..d657493ef56 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -1,7 +1,7 @@  /*   * r2300.c: R2000 and R3000 specific mmu/cache code.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   *   * with a lot of changes to make this thing work for R3000s   * Tx39XX R4k style caches added. HK @@ -10,7 +10,6 @@   * Copyright (C) 2002  Ralf Baechle   * Copyright (C) 2002  Maciej W. Rozycki   */ -#include <linux/init.h>  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/smp.h> @@ -19,7 +18,7 @@  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/mmu_context.h> -#include <asm/system.h> +#include <asm/tlbmisc.h>  #include <asm/isadep.h>  #include <asm/io.h>  #include <asm/bootinfo.h> @@ -223,8 +222,8 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)  	local_irq_restore(flags);  } -void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, -			    unsigned long entryhi, unsigned long pagemask) +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, +		     unsigned long entryhi, unsigned long pagemask)  {  	unsigned long flags;  	unsigned long old_ctx; @@ -276,7 +275,7 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,  	}  } -void __cpuinit tlb_init(void) +void tlb_init(void)  {  	local_flush_tlb_all(); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index c618eed933a..3914e27456f 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -3,76 +3,58 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org   * Carsten Langgaard, carstenl@mips.com   * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.   */ +#include <linux/cpu_pm.h>  #include <linux/init.h>  #include <linux/sched.h>  #include <linux/smp.h>  #include <linux/mm.h>  #include <linux/hugetlb.h> +#include <linux/module.h>  #include <asm/cpu.h> +#include <asm/cpu-type.h>  #include <asm/bootinfo.h>  #include <asm/mmu_context.h>  #include <asm/pgtable.h> -#include <asm/system.h> +#include <asm/tlb.h> +#include <asm/tlbmisc.h>  extern void build_tlb_refill_handler(void);  /* - * Make sure all entries differ.  If they're not different - * MIPS32 will take revenge ... + * LOONGSON2/3 has a 4 entry itlb which is a subset of dtlb, + * unfortunately, itlb is not totally transparent to software.   */ -#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) - -/* Atomicity and interruptability */ -#ifdef CONFIG_MIPS_MT_SMTC - -#include <asm/smtc.h> -#include <asm/mipsmtregs.h> - -#define ENTER_CRITICAL(flags) \ -	{ \ -	unsigned int mvpflags; \ -	local_irq_save(flags);\ -	mvpflags = dvpe() -#define EXIT_CRITICAL(flags) \ -	evpe(mvpflags); \ -	local_irq_restore(flags); \ +static inline void flush_itlb(void) +{ +	switch (current_cpu_type()) { +	case CPU_LOONGSON2: +	case CPU_LOONGSON3: +		write_c0_diag(4); +		break; +	default: +		break;  	} -#else - -#define ENTER_CRITICAL(flags) local_irq_save(flags) -#define EXIT_CRITICAL(flags) local_irq_restore(flags) - -#endif /* CONFIG_MIPS_MT_SMTC */ - -#if defined(CONFIG_CPU_LOONGSON2) -/* - * LOONGSON2 has a 4 entry itlb which is a subset of dtlb, - * unfortrunately, itlb is not totally transparent to software. - */ -#define FLUSH_ITLB write_c0_diag(4); - -#define FLUSH_ITLB_VM(vma) { if ((vma)->vm_flags & VM_EXEC)  write_c0_diag(4); } - -#else - -#define FLUSH_ITLB -#define FLUSH_ITLB_VM(vma) +} -#endif +static inline void flush_itlb_vm(struct vm_area_struct *vma) +{ +	if (vma->vm_flags & VM_EXEC) +		flush_itlb(); +}  void local_flush_tlb_all(void)  {  	unsigned long flags;  	unsigned long old_ctx; -	int entry; +	int entry, ftlbhighset; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	/* Save old context and create impossible VPN2 value */  	old_ctx = read_c0_entryhi();  	write_c0_entrylo0(0); @@ -81,19 +63,37 @@ void local_flush_tlb_all(void)  	entry = read_c0_wired();  	/* Blast 'em all away. */ -	while (entry < current_cpu_data.tlbsize) { -		/* Make sure all entries differ. */ -		write_c0_entryhi(UNIQUE_ENTRYHI(entry)); -		write_c0_index(entry); -		mtc0_tlbw_hazard(); -		tlb_write_indexed(); -		entry++; +	if (cpu_has_tlbinv) { +		if (current_cpu_data.tlbsizevtlb) { +			write_c0_index(0); +			mtc0_tlbw_hazard(); +			tlbinvf();  /* invalidate VTLB */ +		} +		ftlbhighset = current_cpu_data.tlbsizevtlb + +			current_cpu_data.tlbsizeftlbsets; +		for (entry = current_cpu_data.tlbsizevtlb; +		     entry < ftlbhighset; +		     entry++) { +			write_c0_index(entry); +			mtc0_tlbw_hazard(); +			tlbinvf();  /* invalidate one FTLB set */ +		} +	} else { +		while (entry < current_cpu_data.tlbsize) { +			/* Make sure all entries differ. */ +			write_c0_entryhi(UNIQUE_ENTRYHI(entry)); +			write_c0_index(entry); +			mtc0_tlbw_hazard(); +			tlb_write_indexed(); +			entry++; +		}  	}  	tlbw_use_hazard();  	write_c0_entryhi(old_ctx); -	FLUSH_ITLB; -	EXIT_CRITICAL(flags); +	flush_itlb(); +	local_irq_restore(flags);  } +EXPORT_SYMBOL(local_flush_tlb_all);  /* All entries common to a mm share an asid.  To effectively flush     these entries, we just bump the asid. */ @@ -121,16 +121,16 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,  	if (cpu_context(cpu, mm) != 0) {  		unsigned long size, flags; -		ENTER_CRITICAL(flags); -		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; -		size = (size + 1) >> 1; -		if (size <= current_cpu_data.tlbsize/2) { +		local_irq_save(flags); +		start = round_down(start, PAGE_SIZE << 1); +		end = round_up(end, PAGE_SIZE << 1); +		size = (end - start) >> (PAGE_SHIFT + 1); +		if (size <= (current_cpu_data.tlbsizeftlbsets ? +			     current_cpu_data.tlbsize / 8 : +			     current_cpu_data.tlbsize / 2)) {  			int oldpid = read_c0_entryhi();  			int newpid = cpu_asid(cpu, mm); -			start &= (PAGE_MASK << 1); -			end += ((PAGE_SIZE << 1) - 1); -			end &= (PAGE_MASK << 1);  			while (start < end) {  				int idx; @@ -154,8 +154,8 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,  		} else {  			drop_mmu_context(mm, cpu);  		} -		FLUSH_ITLB; -		EXIT_CRITICAL(flags); +		flush_itlb(); +		local_irq_restore(flags);  	}  } @@ -163,10 +163,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)  {  	unsigned long size, flags; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;  	size = (size + 1) >> 1; -	if (size <= current_cpu_data.tlbsize / 2) { +	if (size <= (current_cpu_data.tlbsizeftlbsets ? +		     current_cpu_data.tlbsize / 8 : +		     current_cpu_data.tlbsize / 2)) {  		int pid = read_c0_entryhi();  		start &= (PAGE_MASK << 1); @@ -196,8 +198,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)  	} else {  		local_flush_tlb_all();  	} -	FLUSH_ITLB; -	EXIT_CRITICAL(flags); +	flush_itlb(); +	local_irq_restore(flags);  }  void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -210,7 +212,7 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)  		newpid = cpu_asid(cpu, vma->vm_mm);  		page &= (PAGE_MASK << 1); -		ENTER_CRITICAL(flags); +		local_irq_save(flags);  		oldpid = read_c0_entryhi();  		write_c0_entryhi(page | newpid);  		mtc0_tlbw_hazard(); @@ -229,8 +231,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)  	finish:  		write_c0_entryhi(oldpid); -		FLUSH_ITLB_VM(vma); -		EXIT_CRITICAL(flags); +		flush_itlb_vm(vma); +		local_irq_restore(flags);  	}  } @@ -243,7 +245,7 @@ void local_flush_tlb_one(unsigned long page)  	unsigned long flags;  	int oldpid, idx; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	oldpid = read_c0_entryhi();  	page &= (PAGE_MASK << 1);  	write_c0_entryhi(page); @@ -261,8 +263,8 @@ void local_flush_tlb_one(unsigned long page)  		tlbw_use_hazard();  	}  	write_c0_entryhi(oldpid); -	FLUSH_ITLB; -	EXIT_CRITICAL(flags); +	flush_itlb(); +	local_irq_restore(flags);  }  /* @@ -285,7 +287,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)  	if (current->active_mm != vma->vm_mm)  		return; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	pid = read_c0_entryhi() & ASID_MASK;  	address &= (PAGE_MASK << 1); @@ -297,7 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)  	pudp = pud_offset(pgdp, address);  	pmdp = pmd_offset(pudp, address);  	idx = read_c0_index(); -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	/* this could be a huge page  */  	if (pmd_huge(*pmdp)) {  		unsigned long lo; @@ -312,6 +314,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)  			tlb_write_random();  		else  			tlb_write_indexed(); +		tlbw_use_hazard();  		write_c0_pagemask(PM_DEFAULT_MASK);  	} else  #endif @@ -333,19 +336,19 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)  			tlb_write_indexed();  	}  	tlbw_use_hazard(); -	FLUSH_ITLB_VM(vma); -	EXIT_CRITICAL(flags); +	flush_itlb_vm(vma); +	local_irq_restore(flags);  } -void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, -	unsigned long entryhi, unsigned long pagemask) +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, +		     unsigned long entryhi, unsigned long pagemask)  {  	unsigned long flags;  	unsigned long wired;  	unsigned long old_pagemask;  	unsigned long old_ctx; -	ENTER_CRITICAL(flags); +	local_irq_save(flags);  	/* Save old context and create impossible VPN2 value */  	old_ctx = read_c0_entryhi();  	old_pagemask = read_c0_pagemask(); @@ -365,55 +368,30 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,  	tlbw_use_hazard();	/* What is the hazard here? */  	write_c0_pagemask(old_pagemask);  	local_flush_tlb_all(); -	EXIT_CRITICAL(flags); +	local_irq_restore(flags);  } -/* - * Used for loading TLB entries before trap_init() has started, when we - * don't actually want to add a wired entry which remains throughout the - * lifetime of the system - */ - -static int temp_tlb_entry __cpuinitdata; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE -__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, -			       unsigned long entryhi, unsigned long pagemask) +int __init has_transparent_hugepage(void)  { -	int ret = 0; +	unsigned int mask;  	unsigned long flags; -	unsigned long wired; -	unsigned long old_pagemask; -	unsigned long old_ctx; -	ENTER_CRITICAL(flags); -	/* Save old context and create impossible VPN2 value */ -	old_ctx = read_c0_entryhi(); -	old_pagemask = read_c0_pagemask(); -	wired = read_c0_wired(); -	if (--temp_tlb_entry < wired) { -		printk(KERN_WARNING -		       "No TLB space left for add_temporary_entry\n"); -		ret = -ENOSPC; -		goto out; -	} +	local_irq_save(flags); +	write_c0_pagemask(PM_HUGE_MASK); +	back_to_back_c0_hazard(); +	mask = read_c0_pagemask(); +	write_c0_pagemask(PM_DEFAULT_MASK); -	write_c0_index(temp_tlb_entry); -	write_c0_pagemask(pagemask); -	write_c0_entryhi(entryhi); -	write_c0_entrylo0(entrylo0); -	write_c0_entrylo1(entrylo1); -	mtc0_tlbw_hazard(); -	tlb_write_indexed(); -	tlbw_use_hazard(); +	local_irq_restore(flags); -	write_c0_entryhi(old_ctx); -	write_c0_pagemask(old_pagemask); -out: -	EXIT_CRITICAL(flags); -	return ret; +	return mask == PM_HUGE_MASK;  } -static int __cpuinitdata ntlb; +#endif /* CONFIG_TRANSPARENT_HUGEPAGE  */ + +static int ntlb;  static int __init set_ntlb(char *str)  {  	get_option(&str, &ntlb); @@ -422,7 +400,10 @@ static int __init set_ntlb(char *str)  __setup("ntlb=", set_ntlb); -void __cpuinit tlb_init(void) +/* + * Configure TLB (for init or after a CPU has been powered off). + */ +static void r4k_tlb_configure(void)  {  	/*  	 * You should never change this register: @@ -438,7 +419,7 @@ void __cpuinit tlb_init(void)  	    current_cpu_type() == CPU_R14000)  		write_c0_framemask(0); -	if (kernel_uses_smartmips_rixi) { +	if (cpu_has_rixi) {  		/*  		 * Enable the no read, no exec bits, and enable large virtual  		 * address. @@ -450,12 +431,15 @@ void __cpuinit tlb_init(void)  		write_c0_pagegrain(pg);  	} -	temp_tlb_entry = current_cpu_data.tlbsize - 1; - -        /* From this point on the ARC firmware is dead.  */ +	/* From this point on the ARC firmware is dead.	 */  	local_flush_tlb_all();  	/* Did I tell you that ARC SUCKS?  */ +} + +void tlb_init(void) +{ +	r4k_tlb_configure();  	if (ntlb) {  		if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) { @@ -469,3 +453,26 @@ void __cpuinit tlb_init(void)  	build_tlb_refill_handler();  } + +static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd, +			       void *v) +{ +	switch (cmd) { +	case CPU_PM_ENTER_FAILED: +	case CPU_PM_EXIT: +		r4k_tlb_configure(); +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block r4k_tlb_pm_notifier_block = { +	.notifier_call = r4k_tlb_pm_notifier, +}; + +static int __init r4k_tlb_init_pm(void) +{ +	return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block); +} +arch_initcall(r4k_tlb_init_pm); diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c index 2b82f23df1a..138a2ec7cc6 100644 --- a/arch/mips/mm/tlb-r8k.c +++ b/arch/mips/mm/tlb-r8k.c @@ -3,12 +3,11 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net)   * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org   * Carsten Langgaard, carstenl@mips.com   * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.   */ -#include <linux/init.h>  #include <linux/sched.h>  #include <linux/smp.h>  #include <linux/mm.h> @@ -17,7 +16,6 @@  #include <asm/bootinfo.h>  #include <asm/mmu_context.h>  #include <asm/pgtable.h> -#include <asm/system.h>  extern void build_tlb_refill_handler(void); @@ -214,14 +212,14 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)  	local_irq_restore(flags);  } -static void __cpuinit probe_tlb(unsigned long config) +static void probe_tlb(unsigned long config)  {  	struct cpuinfo_mips *c = ¤t_cpu_data;  	c->tlbsize = 3 * 128;		/* 3 sets each 128 entries */  } -void __cpuinit tlb_init(void) +void tlb_init(void)  {  	unsigned int config = read_c0_config();  	unsigned long status; diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S index e99eaa1fbed..318855eb5f8 100644 --- a/arch/mips/mm/tlbex-fault.S +++ b/arch/mips/mm/tlbex-fault.S @@ -7,7 +7,6 @@   * Copyright (C) 1999 Silicon Graphics, Inc.   */  #include <asm/mipsregs.h> -#include <asm/page.h>  #include <asm/regdef.h>  #include <asm/stackframe.h> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 93816f3bca6..e80e10bafc8 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -5,10 +5,11 @@   *   * Synthesize TLB refill handlers at runtime.   * - * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer - * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki + * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer + * Copyright (C) 2005, 2007, 2008, 2009	 Maciej W. Rozycki   * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)   * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011  MIPS Technologies, Inc.   *   * ... and the days got worse and worse and now you see   * I've gone completly out of my mind. @@ -25,11 +26,14 @@  #include <linux/types.h>  #include <linux/smp.h>  #include <linux/string.h> -#include <linux/init.h> +#include <linux/cache.h> -#include <asm/mmu_context.h> +#include <asm/cacheflush.h> +#include <asm/cpu-type.h> +#include <asm/pgtable.h>  #include <asm/war.h>  #include <asm/uasm.h> +#include <asm/setup.h>  /*   * TLB load/store/modify handlers. @@ -40,6 +44,18 @@  extern void tlb_do_page_fault_0(void);  extern void tlb_do_page_fault_1(void); +struct work_registers { +	int r1; +	int r2; +	int r3; +}; + +struct tlb_reg_save { +	unsigned long a; +	unsigned long b; +} ____cacheline_aligned_in_smp; + +static struct tlb_reg_save handler_reg_save[NR_CPUS];  static inline int r45k_bvahwbug(void)  { @@ -63,6 +79,56 @@ static inline int __maybe_unused r10000_llsc_war(void)  	return R10000_LLSC_WAR;  } +static int use_bbit_insns(void) +{ +	switch (current_cpu_type()) { +	case CPU_CAVIUM_OCTEON: +	case CPU_CAVIUM_OCTEON_PLUS: +	case CPU_CAVIUM_OCTEON2: +	case CPU_CAVIUM_OCTEON3: +		return 1; +	default: +		return 0; +	} +} + +static int use_lwx_insns(void) +{ +	switch (current_cpu_type()) { +	case CPU_CAVIUM_OCTEON2: +	case CPU_CAVIUM_OCTEON3: +		return 1; +	default: +		return 0; +	} +} +#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ +    CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 +static bool scratchpad_available(void) +{ +	return true; +} +static int scratchpad_offset(int i) +{ +	/* +	 * CVMSEG starts at address -32768 and extends for +	 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. +	 */ +	i += 1; /* Kernel use starts at the top and works down. */ +	return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; +} +#else +static bool scratchpad_available(void) +{ +	return false; +} +static int scratchpad_offset(int i) +{ +	BUG(); +	/* Really unreachable, but evidently some GCC want this. */ +	return 0; +} +#endif  /*   * Found by experiment: At least some revisions of the 4kc throw under   * some circumstances a machine check exception, triggered by invalid @@ -72,7 +138,7 @@ static inline int __maybe_unused r10000_llsc_war(void)   * why; it's not an issue caused by the core RTL.   *   */ -static int __cpuinit m4kc_tlbp_war(void) +static int m4kc_tlbp_war(void)  {  	return (current_cpu_data.processor_id & 0xffff00) ==  	       (PRID_COMP_MIPS | PRID_IMP_4KC); @@ -84,8 +150,8 @@ enum label_id {  	label_leave,  	label_vmalloc,  	label_vmalloc_done, -	label_tlbw_hazard, -	label_split, +	label_tlbw_hazard_0, +	label_split = label_tlbw_hazard_0 + 8,  	label_tlbl_goaround1,  	label_tlbl_goaround2,  	label_nopage_tlbl, @@ -94,7 +160,7 @@ enum label_id {  	label_smp_pgtable_change,  	label_r3000_write_probe_fail,  	label_large_segbits_fault, -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	label_tlb_huge_update,  #endif  }; @@ -103,7 +169,7 @@ UASM_L_LA(_second_part)  UASM_L_LA(_leave)  UASM_L_LA(_vmalloc)  UASM_L_LA(_vmalloc_done) -UASM_L_LA(_tlbw_hazard) +/* _tlbw_hazard_x is handled differently.  */  UASM_L_LA(_split)  UASM_L_LA(_tlbl_goaround1)  UASM_L_LA(_tlbl_goaround2) @@ -113,24 +179,88 @@ UASM_L_LA(_nopage_tlbm)  UASM_L_LA(_smp_pgtable_change)  UASM_L_LA(_r3000_write_probe_fail)  UASM_L_LA(_large_segbits_fault) -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  UASM_L_LA(_tlb_huge_update)  #endif +static int hazard_instance; + +static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance) +{ +	switch (instance) { +	case 0 ... 7: +		uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance); +		return; +	default: +		BUG(); +	} +} + +static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance) +{ +	switch (instance) { +	case 0 ... 7: +		uasm_build_label(l, *p, label_tlbw_hazard_0 + instance); +		break; +	default: +		BUG(); +	} +} +  /* - * For debug purposes. + * pgtable bits are assigned dynamically depending on processor feature + * and statically based on kernel configuration.  This spits out the actual + * values the kernel is using.	Required to make sense from disassembled + * TLB exception handlers.   */ -static inline void dump_handler(const u32 *handler, int count) +static void output_pgtable_bits_defines(void) +{ +#define pr_define(fmt, ...)					\ +	pr_debug("#define " fmt, ##__VA_ARGS__) + +	pr_debug("#include <asm/asm.h>\n"); +	pr_debug("#include <asm/regdef.h>\n"); +	pr_debug("\n"); + +	pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); +	pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT); +	pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); +	pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT); +	pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +	pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); +	pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); +#endif +	if (cpu_has_rixi) { +#ifdef _PAGE_NO_EXEC_SHIFT +		pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); +#endif +#ifdef _PAGE_NO_READ_SHIFT +		pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); +#endif +	} +	pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); +	pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); +	pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); +	pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); +	pr_debug("\n"); +} + +static inline void dump_handler(const char *symbol, const u32 *handler, int count)  {  	int i; +	pr_debug("LEAF(%s)\n", symbol); +  	pr_debug("\t.set push\n");  	pr_debug("\t.set noreorder\n");  	for (i = 0; i < count; i++) -		pr_debug("\t%p\t.word 0x%08x\n", &handler[i], handler[i]); +		pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]); -	pr_debug("\t.set pop\n"); +	pr_debug("\t.set\tpop\n"); + +	pr_debug("\tEND(%s)\n", symbol);  }  /* The only general purpose registers allowed in TLB handlers. */ @@ -163,26 +293,110 @@ static inline void dump_handler(const u32 *handler, int count)   * We deliberately chose a buffer size of 128, so we won't scribble   * over anything important on overflow before we panic.   */ -static u32 tlb_handler[128] __cpuinitdata; +static u32 tlb_handler[128];  /* simply assume worst case size for labels and relocs */ -static struct uasm_label labels[128] __cpuinitdata; -static struct uasm_reloc relocs[128] __cpuinitdata; +static struct uasm_label labels[128]; +static struct uasm_reloc relocs[128]; -#ifdef CONFIG_64BIT -static int check_for_high_segbits __cpuinitdata; -#endif +static int check_for_high_segbits; + +static unsigned int kscratch_used_mask; + +static inline int __maybe_unused c0_kscratch(void) +{ +	switch (current_cpu_type()) { +	case CPU_XLP: +	case CPU_XLR: +		return 22; +	default: +		return 31; +	} +} + +static int allocate_kscratch(void) +{ +	int r; +	unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; + +	r = ffs(a); + +	if (r == 0) +		return -1; + +	r--; /* make it zero based */ + +	kscratch_used_mask |= (1 << r); + +	return r; +} + +static int scratch_reg; +static int pgd_reg; +enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; + +static struct work_registers build_get_work_registers(u32 **p) +{ +	struct work_registers r; + +	if (scratch_reg >= 0) { +		/* Save in CPU local C0_KScratch? */ +		UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg); +		r.r1 = K0; +		r.r2 = K1; +		r.r3 = 1; +		return r; +	} + +	if (num_possible_cpus() > 1) { +		/* Get smp_processor_id */ +		UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG); +		UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT); + +		/* handler_reg_save index in K0 */ +		UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); + +		UASM_i_LA(p, K1, (long)&handler_reg_save); +		UASM_i_ADDU(p, K0, K0, K1); +	} else { +		UASM_i_LA(p, K0, (long)&handler_reg_save); +	} +	/* K0 now points to save area, save $1 and $2  */ +	UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); +	UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); + +	r.r1 = K1; +	r.r2 = 1; +	r.r3 = 2; +	return r; +} + +static void build_restore_work_registers(u32 **p) +{ +	if (scratch_reg >= 0) { +		UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); +		return; +	} +	/* K0 already points to save area, restore $1 and $2  */ +	UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); +	UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); +}  #ifndef CONFIG_MIPS_PGD_C0_CONTEXT +  /*   * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,   * we cannot do r3000 under these circumstances. + * + * Declare pgd_current here instead of including mmu_context.h to avoid type + * conflicts for tlbmiss_handler_setup_pgd   */ +extern unsigned long pgd_current[];  /*   * The R3000 TLB handler is simple.   */ -static void __cpuinit build_r3000_tlb_refill_handler(void) +static void build_r3000_tlb_refill_handler(void)  {  	long pgdc = (long)pgd_current;  	u32 *p; @@ -216,7 +430,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void)  	memcpy((void *)ebase, tlb_handler, 0x80); -	dump_handler((u32 *)ebase, 32); +	dump_handler("r3000_tlb_refill", (u32 *)ebase, 32);  }  #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ @@ -227,7 +441,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void)   * other one.To keep things simple, we first assume linear space,   * then we relocate it to the final handler layout as needed.   */ -static u32 final_handler[64] __cpuinitdata; +static u32 final_handler[64];  /*   * Hazards @@ -235,8 +449,8 @@ static u32 final_handler[64] __cpuinitdata;   * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:   * 2. A timing hazard exists for the TLBP instruction.   * - *      stalling_instruction - *      TLBP + *	stalling_instruction + *	TLBP   *   * The JTLB is being read for the TLBP throughout the stall generated by the   * previous instruction. This is not really correct as the stalling instruction @@ -247,18 +461,17 @@ static u32 final_handler[64] __cpuinitdata;   * The software work-around is to not allow the instruction preceding the TLBP   * to stall - make it an NOP or some other instruction guaranteed not to stall.   * - * Errata 2 will not be fixed.  This errata is also on the R5000. + * Errata 2 will not be fixed.	This errata is also on the R5000.   *   * As if we MIPS hackers wouldn't know how to nop pipelines happy ...   */ -static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) +static void __maybe_unused build_tlb_probe_entry(u32 **p)  {  	switch (current_cpu_type()) {  	/* Found by experiment: R4600 v2.0/R4700 needs this, too.  */  	case CPU_R4600:  	case CPU_R4700:  	case CPU_R5000: -	case CPU_R5000A:  	case CPU_NEVADA:  		uasm_i_nop(p);  		uasm_i_tlbp(p); @@ -272,13 +485,13 @@ static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p)  /*   * Write random or indexed TLB entry, and care about the hazards from - * the preceeding mtc0 and for the following eret. + * the preceding mtc0 and for the following eret.   */  enum tlb_write_entry { tlb_random, tlb_indexed }; -static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, -					 struct uasm_reloc **r, -					 enum tlb_write_entry wmode) +static void build_tlb_write_entry(u32 **p, struct uasm_label **l, +				  struct uasm_reloc **r, +				  enum tlb_write_entry wmode)  {  	void(*tlbw)(u32 **) = NULL; @@ -288,8 +501,24 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  	}  	if (cpu_has_mips_r2) { -		if (cpu_has_mips_r2_exec_hazard) +		/* +		 * The architecture spec says an ehb is required here, +		 * but a number of cores do not have the hazard and +		 * using an ehb causes an expensive pipeline stall. +		 */ +		switch (current_cpu_type()) { +		case CPU_M14KC: +		case CPU_74K: +		case CPU_1074K: +		case CPU_PROAPTIV: +		case CPU_P5600: +		case CPU_M5150: +			break; + +		default:  			uasm_i_ehb(p); +			break; +		}  		tlbw(p);  		return;  	} @@ -305,25 +534,32 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  		 * This branch uses up a mtc0 hazard nop slot and saves  		 * two nops after the tlbw instruction.  		 */ -		uasm_il_bgezl(p, r, 0, label_tlbw_hazard); +		uasm_bgezl_hazard(p, r, hazard_instance);  		tlbw(p); -		uasm_l_tlbw_hazard(l, *p); +		uasm_bgezl_label(l, p, hazard_instance); +		hazard_instance++;  		uasm_i_nop(p);  		break;  	case CPU_R4600:  	case CPU_R4700: -	case CPU_R5000: -	case CPU_R5000A:  		uasm_i_nop(p);  		tlbw(p);  		uasm_i_nop(p);  		break; +	case CPU_R5000: +	case CPU_NEVADA: +		uasm_i_nop(p); /* QED specifies 2 nops hazard */ +		uasm_i_nop(p); /* QED specifies 2 nops hazard */ +		tlbw(p); +		break; +  	case CPU_R4300:  	case CPU_5KC:  	case CPU_TX49XX:  	case CPU_PR4450: +	case CPU_XLR:  		uasm_i_nop(p);  		tlbw(p);  		break; @@ -333,6 +569,8 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  	case CPU_R14000:  	case CPU_4KC:  	case CPU_4KEC: +	case CPU_M14KC: +	case CPU_M14KEC:  	case CPU_SB1:  	case CPU_SB1A:  	case CPU_4KSC: @@ -344,6 +582,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  	case CPU_BMIPS4380:  	case CPU_BMIPS5000:  	case CPU_LOONGSON2: +	case CPU_LOONGSON3:  	case CPU_R5500:  		if (m4kc_tlbp_war())  			uasm_i_nop(p); @@ -351,17 +590,6 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  		tlbw(p);  		break; -	case CPU_NEVADA: -		uasm_i_nop(p); /* QED specifies 2 nops hazard */ -		/* -		 * This branch uses up a mtc0 hazard nop slot and saves -		 * a nop after the tlbw instruction. -		 */ -		uasm_il_bgezl(p, r, 0, label_tlbw_hazard); -		tlbw(p); -		uasm_l_tlbw_hazard(l, *p); -		break; -  	case CPU_RM7000:  		uasm_i_nop(p);  		uasm_i_nop(p); @@ -370,24 +598,6 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  		tlbw(p);  		break; -	case CPU_RM9000: -		/* -		 * When the JTLB is updated by tlbwi or tlbwr, a subsequent -		 * use of the JTLB for instructions should not occur for 4 -		 * cpu cycles and use for data translations should not occur -		 * for 3 cpu cycles. -		 */ -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		tlbw(p); -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		uasm_i_ssnop(p); -		break; -  	case CPU_VR4111:  	case CPU_VR4121:  	case CPU_VR4122: @@ -415,17 +625,16 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,  	default:  		panic("No TLB refill handler yet (CPU type: %d)", -		      current_cpu_data.cputype); +		      current_cpu_type());  		break;  	}  } -static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, -								  unsigned int reg) +static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, +							unsigned int reg)  { -	if (kernel_uses_smartmips_rixi) { -		UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); -		UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); +	if (cpu_has_rixi) { +		UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));  	} else {  #ifdef CONFIG_64BIT_PHYS_ADDR  		uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); @@ -435,34 +644,54 @@ static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p,  	}  } -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT -static __cpuinit void build_restore_pagemask(u32 **p, -					     struct uasm_reloc **r, -					     unsigned int tmp, -					     enum label_id lid) +static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, +				   unsigned int tmp, enum label_id lid, +				   int restore_scratch)  { -	/* Reset default page size */ -	if (PM_DEFAULT_MASK >> 16) { -		uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); -		uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); -		uasm_il_b(p, r, lid); -		uasm_i_mtc0(p, tmp, C0_PAGEMASK); -	} else if (PM_DEFAULT_MASK) { -		uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); -		uasm_il_b(p, r, lid); -		uasm_i_mtc0(p, tmp, C0_PAGEMASK); +	if (restore_scratch) { +		/* Reset default page size */ +		if (PM_DEFAULT_MASK >> 16) { +			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); +			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); +			uasm_i_mtc0(p, tmp, C0_PAGEMASK); +			uasm_il_b(p, r, lid); +		} else if (PM_DEFAULT_MASK) { +			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); +			uasm_i_mtc0(p, tmp, C0_PAGEMASK); +			uasm_il_b(p, r, lid); +		} else { +			uasm_i_mtc0(p, 0, C0_PAGEMASK); +			uasm_il_b(p, r, lid); +		} +		if (scratch_reg >= 0) +			UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); +		else +			UASM_i_LW(p, 1, scratchpad_offset(0), 0);  	} else { -		uasm_il_b(p, r, lid); -		uasm_i_mtc0(p, 0, C0_PAGEMASK); +		/* Reset default page size */ +		if (PM_DEFAULT_MASK >> 16) { +			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); +			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); +			uasm_il_b(p, r, lid); +			uasm_i_mtc0(p, tmp, C0_PAGEMASK); +		} else if (PM_DEFAULT_MASK) { +			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); +			uasm_il_b(p, r, lid); +			uasm_i_mtc0(p, tmp, C0_PAGEMASK); +		} else { +			uasm_il_b(p, r, lid); +			uasm_i_mtc0(p, 0, C0_PAGEMASK); +		}  	}  } -static __cpuinit void build_huge_tlb_write_entry(u32 **p, -						 struct uasm_label **l, -						 struct uasm_reloc **r, -						 unsigned int tmp, -						 enum tlb_write_entry wmode) +static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l, +				       struct uasm_reloc **r, +				       unsigned int tmp, +				       enum tlb_write_entry wmode, +				       int restore_scratch)  {  	/* Set huge page tlb entry size */  	uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); @@ -471,24 +700,27 @@ static __cpuinit void build_huge_tlb_write_entry(u32 **p,  	build_tlb_write_entry(p, l, r, wmode); -	build_restore_pagemask(p, r, tmp, label_leave); +	build_restore_pagemask(p, r, tmp, label_leave, restore_scratch);  }  /*   * Check if Huge PTE is present, if so then jump to LABEL.   */ -static void __cpuinit +static void  build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, -		unsigned int pmd, int lid) +		  unsigned int pmd, int lid)  {  	UASM_i_LW(p, tmp, 0, pmd); -	uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); -	uasm_il_bnez(p, r, tmp, lid); +	if (use_bbit_insns()) { +		uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); +	} else { +		uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); +		uasm_il_bnez(p, r, tmp, lid); +	}  } -static __cpuinit void build_huge_update_entries(u32 **p, -						unsigned int pte, -						unsigned int tmp) +static void build_huge_update_entries(u32 **p, unsigned int pte, +				      unsigned int tmp)  {  	int small_sequence; @@ -503,7 +735,7 @@ static __cpuinit void build_huge_update_entries(u32 **p,  	 */  	small_sequence = (HPAGE_SIZE >> 7) < 0x10000; -	/* We can clobber tmp.  It isn't used after this.*/ +	/* We can clobber tmp.	It isn't used after this.*/  	if (!small_sequence)  		uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); @@ -518,11 +750,10 @@ static __cpuinit void build_huge_update_entries(u32 **p,  	UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */  } -static __cpuinit void build_huge_handler_tail(u32 **p, -					      struct uasm_reloc **r, -					      struct uasm_label **l, -					      unsigned int pte, -					      unsigned int ptr) +static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, +				    struct uasm_label **l, +				    unsigned int pte, +				    unsigned int ptr)  {  #ifdef CONFIG_SMP  	UASM_i_SC(p, pte, 0, ptr); @@ -532,16 +763,16 @@ static __cpuinit void build_huge_handler_tail(u32 **p,  	UASM_i_SW(p, pte, 0, ptr);  #endif  	build_huge_update_entries(p, pte, ptr); -	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed); +	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);  } -#endif /* CONFIG_HUGETLB_PAGE */ +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */  #ifdef CONFIG_64BIT  /*   * TMP and PTR are scratch.   * TMP will be clobbered, PTR will hold the pmd entry.   */ -static void __cpuinit +static void  build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  		 unsigned int tmp, unsigned int ptr)  { @@ -572,37 +803,34 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  	}  	/* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ -#ifdef CONFIG_MIPS_PGD_C0_CONTEXT -	/* -	 * &pgd << 11 stored in CONTEXT [23..63]. -	 */ -	UASM_i_MFC0(p, ptr, C0_CONTEXT); -	uasm_i_dins(p, ptr, 0, 0, 23); /* Clear lower 23 bits of context. */ -	uasm_i_ori(p, ptr, ptr, 0x540); /* 1 0  1 0 1  << 6  xkphys cached */ -	uasm_i_drotr(p, ptr, ptr, 11); +	if (pgd_reg != -1) { +		/* pgd is in pgd_reg */ +		UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); +	} else { +#if defined(CONFIG_MIPS_PGD_C0_CONTEXT) +		/* +		 * &pgd << 11 stored in CONTEXT [23..63]. +		 */ +		UASM_i_MFC0(p, ptr, C0_CONTEXT); + +		/* Clear lower 23 bits of context. */ +		uasm_i_dins(p, ptr, 0, 0, 23); + +		/* 1 0	1 0 1  << 6  xkphys cached */ +		uasm_i_ori(p, ptr, ptr, 0x540); +		uasm_i_drotr(p, ptr, ptr, 11);  #elif defined(CONFIG_SMP) -# ifdef  CONFIG_MIPS_MT_SMTC -	/* -	 * SMTC uses TCBind value as "CPU" index -	 */ -	uasm_i_mfc0(p, ptr, C0_TCBIND); -	uasm_i_dsrl_safe(p, ptr, ptr, 19); -# else -	/* -	 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 -	 * stored in CONTEXT. -	 */ -	uasm_i_dmfc0(p, ptr, C0_CONTEXT); -	uasm_i_dsrl_safe(p, ptr, ptr, 23); -# endif -	UASM_i_LA_mostly(p, tmp, pgdc); -	uasm_i_daddu(p, ptr, ptr, tmp); -	uasm_i_dmfc0(p, tmp, C0_BADVADDR); -	uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); +		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG); +		uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT); +		UASM_i_LA_mostly(p, tmp, pgdc); +		uasm_i_daddu(p, ptr, ptr, tmp); +		uasm_i_dmfc0(p, tmp, C0_BADVADDR); +		uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);  #else -	UASM_i_LA_mostly(p, ptr, pgdc); -	uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); +		UASM_i_LA_mostly(p, ptr, pgdc); +		uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr);  #endif +	}  	uasm_l_vmalloc_done(l, *p); @@ -620,12 +848,11 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  #endif  } -enum vmalloc64_mode {not_refill, refill};  /*   * BVADDR is the faulting address, PTR is scratch.   * PTR will hold the pgd for vmalloc.   */ -static void __cpuinit +static void  build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  			unsigned int bvaddr, unsigned int ptr,  			enum vmalloc64_mode mode) @@ -638,7 +865,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  	uasm_l_vmalloc(l, *p); -	if (mode == refill && check_for_high_segbits) { +	if (mode != not_refill && check_for_high_segbits) {  		if (single_insn_swpd) {  			uasm_il_bltz(p, r, bvaddr, label_vmalloc_done);  			uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); @@ -661,7 +888,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  				uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd));  		}  	} -	if (mode == refill && check_for_high_segbits) { +	if (mode != not_refill && check_for_high_segbits) {  		uasm_l_large_segbits_fault(l, *p);  		/*  		 * We get here if we are an xsseg address, or if we are @@ -677,7 +904,15 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,  		 */  		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);  		uasm_i_jr(p, ptr); -		uasm_i_nop(p); + +		if (mode == refill_scratch) { +			if (scratch_reg >= 0) +				UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); +			else +				UASM_i_LW(p, 1, scratchpad_offset(0), 0); +		} else { +			uasm_i_nop(p); +		}  	}  } @@ -687,34 +922,28 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,   * TMP and PTR are scratch.   * TMP will be clobbered, PTR will hold the pgd entry.   */ -static void __cpuinit __maybe_unused +static void __maybe_unused  build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)  { -	long pgdc = (long)pgd_current; +	if (pgd_reg != -1) { +		/* pgd is in pgd_reg */ +		uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg); +		uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ +	} else { +		long pgdc = (long)pgd_current; -	/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ +		/* 32 bit SMP has smp_processor_id() stored in CONTEXT. */  #ifdef CONFIG_SMP -#ifdef  CONFIG_MIPS_MT_SMTC -	/* -	 * SMTC uses TCBind value as "CPU" index -	 */ -	uasm_i_mfc0(p, ptr, C0_TCBIND); -	UASM_i_LA_mostly(p, tmp, pgdc); -	uasm_i_srl(p, ptr, ptr, 19); -#else -	/* -	 * smp_processor_id() << 3 is stored in CONTEXT. -         */ -	uasm_i_mfc0(p, ptr, C0_CONTEXT); -	UASM_i_LA_mostly(p, tmp, pgdc); -	uasm_i_srl(p, ptr, ptr, 23); -#endif -	uasm_i_addu(p, ptr, tmp, ptr); +		uasm_i_mfc0(p, ptr, SMP_CPUID_REG); +		UASM_i_LA_mostly(p, tmp, pgdc); +		uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT); +		uasm_i_addu(p, ptr, tmp, ptr);  #else -	UASM_i_LA_mostly(p, ptr, pgdc); +		UASM_i_LA_mostly(p, ptr, pgdc);  #endif -	uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ -	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); +		uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ +		uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); +	}  	uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */  	uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);  	uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ @@ -722,7 +951,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)  #endif /* !CONFIG_64BIT */ -static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) +static void build_adjust_context(u32 **p, unsigned int ctx)  {  	unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12;  	unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); @@ -748,7 +977,7 @@ static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)  	uasm_i_andi(p, ctx, ctx, mask);  } -static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)  {  	/*  	 * Bug workaround for the Nevada. It seems as if under certain @@ -773,8 +1002,7 @@ static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr  	UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */  } -static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, -					unsigned int ptep) +static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)  {  	/*  	 * 64bit address support (36bit on a 32bit CPU) in a 32bit @@ -784,12 +1012,10 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,  	if (cpu_has_64bits) {  		uasm_i_ld(p, tmp, 0, ptep); /* get even pte */  		uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ -		if (kernel_uses_smartmips_rixi) { -			UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); -			UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); -			UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); +		if (cpu_has_rixi) { +			UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));  			UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ -			UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); +			UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));  		} else {  			uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */  			UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ @@ -811,14 +1037,12 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,  	UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */  	if (r45k_bvahwbug())  		build_tlb_probe_entry(p); -	if (kernel_uses_smartmips_rixi) { -		UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); -		UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); -		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); +	if (cpu_has_rixi) { +		UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL));  		if (r4k_250MHZhwbug())  			UASM_i_MTC0(p, 0, C0_ENTRYLO0);  		UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ -		UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); +		UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL));  	} else {  		UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */  		if (r4k_250MHZhwbug()) @@ -834,6 +1058,181 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,  #endif  } +struct mips_huge_tlb_info { +	int huge_pte; +	int restore_scratch; +}; + +static struct mips_huge_tlb_info +build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, +			       struct uasm_reloc **r, unsigned int tmp, +			       unsigned int ptr, int c0_scratch_reg) +{ +	struct mips_huge_tlb_info rv; +	unsigned int even, odd; +	int vmalloc_branch_delay_filled = 0; +	const int scratch = 1; /* Our extra working register */ + +	rv.huge_pte = scratch; +	rv.restore_scratch = 0; + +	if (check_for_high_segbits) { +		UASM_i_MFC0(p, tmp, C0_BADVADDR); + +		if (pgd_reg != -1) +			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); +		else +			UASM_i_MFC0(p, ptr, C0_CONTEXT); + +		if (c0_scratch_reg >= 0) +			UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); +		else +			UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + +		uasm_i_dsrl_safe(p, scratch, tmp, +				 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); +		uasm_il_bnez(p, r, scratch, label_vmalloc); + +		if (pgd_reg == -1) { +			vmalloc_branch_delay_filled = 1; +			/* Clear lower 23 bits of context. */ +			uasm_i_dins(p, ptr, 0, 0, 23); +		} +	} else { +		if (pgd_reg != -1) +			UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); +		else +			UASM_i_MFC0(p, ptr, C0_CONTEXT); + +		UASM_i_MFC0(p, tmp, C0_BADVADDR); + +		if (c0_scratch_reg >= 0) +			UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); +		else +			UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + +		if (pgd_reg == -1) +			/* Clear lower 23 bits of context. */ +			uasm_i_dins(p, ptr, 0, 0, 23); + +		uasm_il_bltz(p, r, tmp, label_vmalloc); +	} + +	if (pgd_reg == -1) { +		vmalloc_branch_delay_filled = 1; +		/* 1 0	1 0 1  << 6  xkphys cached */ +		uasm_i_ori(p, ptr, ptr, 0x540); +		uasm_i_drotr(p, ptr, ptr, 11); +	} + +#ifdef __PAGETABLE_PMD_FOLDED +#define LOC_PTEP scratch +#else +#define LOC_PTEP ptr +#endif + +	if (!vmalloc_branch_delay_filled) +		/* get pgd offset in bytes */ +		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + +	uasm_l_vmalloc_done(l, *p); + +	/* +	 *			   tmp		ptr +	 * fall-through case =	 badvaddr  *pgd_current +	 * vmalloc case	     =	 badvaddr  swapper_pg_dir +	 */ + +	if (vmalloc_branch_delay_filled) +		/* get pgd offset in bytes */ +		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + +#ifdef __PAGETABLE_PMD_FOLDED +	GET_CONTEXT(p, tmp); /* get context reg */ +#endif +	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); + +	if (use_lwx_insns()) { +		UASM_i_LWX(p, LOC_PTEP, scratch, ptr); +	} else { +		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ +		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ +	} + +#ifndef __PAGETABLE_PMD_FOLDED +	/* get pmd offset in bytes */ +	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); +	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); +	GET_CONTEXT(p, tmp); /* get context reg */ + +	if (use_lwx_insns()) { +		UASM_i_LWX(p, scratch, scratch, ptr); +	} else { +		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ +		UASM_i_LW(p, scratch, 0, ptr); +	} +#endif +	/* Adjust the context during the load latency. */ +	build_adjust_context(p, tmp); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +	uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); +	/* +	 * The in the LWX case we don't want to do the load in the +	 * delay slot.	It cannot issue in the same cycle and may be +	 * speculative and unneeded. +	 */ +	if (use_lwx_insns()) +		uasm_i_nop(p); +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + + +	/* build_update_entries */ +	if (use_lwx_insns()) { +		even = ptr; +		odd = tmp; +		UASM_i_LWX(p, even, scratch, tmp); +		UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); +		UASM_i_LWX(p, odd, scratch, tmp); +	} else { +		UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ +		even = tmp; +		odd = ptr; +		UASM_i_LW(p, even, 0, ptr); /* get even pte */ +		UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ +	} +	if (cpu_has_rixi) { +		uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL)); +		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ +		uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL)); +	} else { +		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); +		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ +		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); +	} +	UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ + +	if (c0_scratch_reg >= 0) { +		UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); +		build_tlb_write_entry(p, l, r, tlb_random); +		uasm_l_leave(l, *p); +		rv.restore_scratch = 1; +	} else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13)  { +		build_tlb_write_entry(p, l, r, tlb_random); +		uasm_l_leave(l, *p); +		UASM_i_LW(p, scratch, scratchpad_offset(0), 0); +	} else { +		UASM_i_LW(p, scratch, scratchpad_offset(0), 0); +		build_tlb_write_entry(p, l, r, tlb_random); +		uasm_l_leave(l, *p); +		rv.restore_scratch = 1; +	} + +	uasm_i_eret(p); /* return from trap */ + +	return rv; +} +  /*   * For a 64-bit kernel, we are using the 64-bit XTLB refill exception   * because EXL == 0.  If we wrap, we can also use the 32 instruction @@ -842,61 +1241,71 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,   */  #define MIPS64_REFILL_INSNS 32 -static void __cpuinit build_r4000_tlb_refill_handler(void) +static void build_r4000_tlb_refill_handler(void)  {  	u32 *p = tlb_handler;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs;  	u32 *f;  	unsigned int final_len; +	struct mips_huge_tlb_info htlb_info __maybe_unused; +	enum vmalloc64_mode vmalloc_mode __maybe_unused;  	memset(tlb_handler, 0, sizeof(tlb_handler));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs));  	memset(final_handler, 0, sizeof(final_handler)); -	/* -	 * create the plain linear handler -	 */ -	if (bcm1250_m3_war()) { -		unsigned int segbits = 44; - -		uasm_i_dmfc0(&p, K0, C0_BADVADDR); -		uasm_i_dmfc0(&p, K1, C0_ENTRYHI); -		uasm_i_xor(&p, K0, K0, K1); -		uasm_i_dsrl_safe(&p, K1, K0, 62); -		uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); -		uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); -		uasm_i_or(&p, K0, K0, K1); -		uasm_il_bnez(&p, &r, K0, label_leave); -		/* No need for uasm_i_nop */ -	} +	if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { +		htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, +							  scratch_reg); +		vmalloc_mode = refill_scratch; +	} else { +		htlb_info.huge_pte = K0; +		htlb_info.restore_scratch = 0; +		vmalloc_mode = refill_noscratch; +		/* +		 * create the plain linear handler +		 */ +		if (bcm1250_m3_war()) { +			unsigned int segbits = 44; + +			uasm_i_dmfc0(&p, K0, C0_BADVADDR); +			uasm_i_dmfc0(&p, K1, C0_ENTRYHI); +			uasm_i_xor(&p, K0, K0, K1); +			uasm_i_dsrl_safe(&p, K1, K0, 62); +			uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); +			uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); +			uasm_i_or(&p, K0, K0, K1); +			uasm_il_bnez(&p, &r, K0, label_leave); +			/* No need for uasm_i_nop */ +		}  #ifdef CONFIG_64BIT -	build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +		build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */  #else -	build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +		build_get_pgde32(&p, K0, K1); /* get pgd in K1 */  #endif -#ifdef CONFIG_HUGETLB_PAGE -	build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +		build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update);  #endif -	build_get_ptep(&p, K0, K1); -	build_update_entries(&p, K0, K1); -	build_tlb_write_entry(&p, &l, &r, tlb_random); -	uasm_l_leave(&l, p); -	uasm_i_eret(&p); /* return from trap */ - -#ifdef CONFIG_HUGETLB_PAGE +		build_get_ptep(&p, K0, K1); +		build_update_entries(&p, K0, K1); +		build_tlb_write_entry(&p, &l, &r, tlb_random); +		uasm_l_leave(&l, p); +		uasm_i_eret(&p); /* return from trap */ +	} +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	uasm_l_tlb_huge_update(&l, p); -	UASM_i_LW(&p, K0, 0, K1); -	build_huge_update_entries(&p, K0, K1); -	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random); +	build_huge_update_entries(&p, htlb_info.huge_pte, K1); +	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, +				   htlb_info.restore_scratch);  #endif  #ifdef CONFIG_64BIT -	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, refill); +	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);  #endif  	/* @@ -906,95 +1315,100 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)  	 * need three, with the second nop'ed and the third being  	 * unused.  	 */ -	/* Loongson2 ebase is different than r4k, we have more space */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) -	if ((p - tlb_handler) > 64) -		panic("TLB refill handler space exceeded"); -#else -	if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) -	    || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) -		&& uasm_insn_has_bdelay(relocs, -					tlb_handler + MIPS64_REFILL_INSNS - 3))) -		panic("TLB refill handler space exceeded"); -#endif - -	/* -	 * Now fold the handler in the TLB refill handler space. -	 */ -#if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) -	f = final_handler; -	/* Simplest case, just copy the handler. */ -	uasm_copy_handler(relocs, labels, tlb_handler, p, f); -	final_len = p - tlb_handler; -#else /* CONFIG_64BIT */ -	f = final_handler + MIPS64_REFILL_INSNS; -	if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { -		/* Just copy the handler. */ -		uasm_copy_handler(relocs, labels, tlb_handler, p, f); -		final_len = p - tlb_handler; -	} else { -#if defined(CONFIG_HUGETLB_PAGE) -		const enum label_id ls = label_tlb_huge_update; -#else -		const enum label_id ls = label_vmalloc; -#endif -		u32 *split; -		int ov = 0; -		int i; - -		for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) -			; -		BUG_ON(i == ARRAY_SIZE(labels)); -		split = labels[i].addr; - -		/* -		 * See if we have overflown one way or the other. -		 */ -		if (split > tlb_handler + MIPS64_REFILL_INSNS || -		    split < p - MIPS64_REFILL_INSNS) -			ov = 1; - -		if (ov) { +	switch (boot_cpu_type()) { +	default: +		if (sizeof(long) == 4) { +	case CPU_LOONGSON2: +		/* Loongson2 ebase is different than r4k, we have more space */ +			if ((p - tlb_handler) > 64) +				panic("TLB refill handler space exceeded");  			/* -			 * Split two instructions before the end.  One -			 * for the branch and one for the instruction -			 * in the delay slot. +			 * Now fold the handler in the TLB refill handler space.  			 */ -			split = tlb_handler + MIPS64_REFILL_INSNS - 2; - +			f = final_handler; +			/* Simplest case, just copy the handler. */ +			uasm_copy_handler(relocs, labels, tlb_handler, p, f); +			final_len = p - tlb_handler; +			break; +		} else { +			if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) +			    || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) +				&& uasm_insn_has_bdelay(relocs, +							tlb_handler + MIPS64_REFILL_INSNS - 3))) +				panic("TLB refill handler space exceeded");  			/* -			 * If the branch would fall in a delay slot, -			 * we must back up an additional instruction -			 * so that it is no longer in a delay slot. +			 * Now fold the handler in the TLB refill handler space.  			 */ -			if (uasm_insn_has_bdelay(relocs, split - 1)) -				split--; -		} -		/* Copy first part of the handler. */ -		uasm_copy_handler(relocs, labels, tlb_handler, split, f); -		f += split - tlb_handler; - -		if (ov) { -			/* Insert branch. */ -			uasm_l_split(&l, final_handler); -			uasm_il_b(&f, &r, label_split); -			if (uasm_insn_has_bdelay(relocs, split)) -				uasm_i_nop(&f); -			else { -				uasm_copy_handler(relocs, labels, -						  split, split + 1, f); -				uasm_move_labels(labels, f, f + 1, -1); -				f++; -				split++; +			f = final_handler + MIPS64_REFILL_INSNS; +			if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { +				/* Just copy the handler. */ +				uasm_copy_handler(relocs, labels, tlb_handler, p, f); +				final_len = p - tlb_handler; +			} else { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +				const enum label_id ls = label_tlb_huge_update; +#else +				const enum label_id ls = label_vmalloc; +#endif +				u32 *split; +				int ov = 0; +				int i; + +				for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) +					; +				BUG_ON(i == ARRAY_SIZE(labels)); +				split = labels[i].addr; + +				/* +				 * See if we have overflown one way or the other. +				 */ +				if (split > tlb_handler + MIPS64_REFILL_INSNS || +				    split < p - MIPS64_REFILL_INSNS) +					ov = 1; + +				if (ov) { +					/* +					 * Split two instructions before the end.  One +					 * for the branch and one for the instruction +					 * in the delay slot. +					 */ +					split = tlb_handler + MIPS64_REFILL_INSNS - 2; + +					/* +					 * If the branch would fall in a delay slot, +					 * we must back up an additional instruction +					 * so that it is no longer in a delay slot. +					 */ +					if (uasm_insn_has_bdelay(relocs, split - 1)) +						split--; +				} +				/* Copy first part of the handler. */ +				uasm_copy_handler(relocs, labels, tlb_handler, split, f); +				f += split - tlb_handler; + +				if (ov) { +					/* Insert branch. */ +					uasm_l_split(&l, final_handler); +					uasm_il_b(&f, &r, label_split); +					if (uasm_insn_has_bdelay(relocs, split)) +						uasm_i_nop(&f); +					else { +						uasm_copy_handler(relocs, labels, +								  split, split + 1, f); +						uasm_move_labels(labels, f, f + 1, -1); +						f++; +						split++; +					} +				} + +				/* Copy the rest of the handler. */ +				uasm_copy_handler(relocs, labels, split, p, final_handler); +				final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + +					    (p - split);  			}  		} - -		/* Copy the rest of the handler. */ -		uasm_copy_handler(relocs, labels, split, p, final_handler); -		final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + -			    (p - split); +		break;  	} -#endif /* CONFIG_64BIT */  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB refill handler (%u instructions).\n", @@ -1002,20 +1416,90 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)  	memcpy((void *)ebase, final_handler, 0x100); -	dump_handler((u32 *)ebase, 64); +	dump_handler("r4000_tlb_refill", (u32 *)ebase, 64);  } -/* - * 128 instructions for the fastpath handler is generous and should - * never be exceeded. - */ -#define FASTPATH_SIZE 128 +extern u32 handle_tlbl[], handle_tlbl_end[]; +extern u32 handle_tlbs[], handle_tlbs_end[]; +extern u32 handle_tlbm[], handle_tlbm_end[]; +extern u32 tlbmiss_handler_setup_pgd_start[], tlbmiss_handler_setup_pgd[]; +extern u32 tlbmiss_handler_setup_pgd_end[]; + +static void build_setup_pgd(void) +{ +	const int a0 = 4; +	const int __maybe_unused a1 = 5; +	const int __maybe_unused a2 = 6; +	u32 *p = tlbmiss_handler_setup_pgd_start; +	const int tlbmiss_handler_setup_pgd_size = +		tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd_start; +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT +	long pgdc = (long)pgd_current; +#endif + +	memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size * +					sizeof(tlbmiss_handler_setup_pgd[0])); +	memset(labels, 0, sizeof(labels)); +	memset(relocs, 0, sizeof(relocs)); +	pgd_reg = allocate_kscratch(); +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT +	if (pgd_reg == -1) { +		struct uasm_label *l = labels; +		struct uasm_reloc *r = relocs; + +		/* PGD << 11 in c0_Context */ +		/* +		 * If it is a ckseg0 address, convert to a physical +		 * address.  Shifting right by 29 and adding 4 will +		 * result in zero for these addresses. +		 * +		 */ +		UASM_i_SRA(&p, a1, a0, 29); +		UASM_i_ADDIU(&p, a1, a1, 4); +		uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); +		uasm_i_nop(&p); +		uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); +		uasm_l_tlbl_goaround1(&l, p); +		UASM_i_SLL(&p, a0, a0, 11); +		uasm_i_jr(&p, 31); +		UASM_i_MTC0(&p, a0, C0_CONTEXT); +	} else { +		/* PGD in c0_KScratch */ +		uasm_i_jr(&p, 31); +		UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); +	} +#else +#ifdef CONFIG_SMP +	/* Save PGD to pgd_current[smp_processor_id()] */ +	UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG); +	UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT); +	UASM_i_LA_mostly(&p, a2, pgdc); +	UASM_i_ADDU(&p, a2, a2, a1); +	UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#else +	UASM_i_LA_mostly(&p, a2, pgdc); +	UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#endif /* SMP */ +	uasm_i_jr(&p, 31); + +	/* if pgd_reg is allocated, save PGD also to scratch register */ +	if (pgd_reg != -1) +		UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); +	else +		uasm_i_nop(&p); +#endif +	if (p >= tlbmiss_handler_setup_pgd_end) +		panic("tlbmiss_handler_setup_pgd space exceeded"); + +	uasm_resolve_relocs(relocs, labels); +	pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", +		 (unsigned int)(p - tlbmiss_handler_setup_pgd)); -u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; -u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; -u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; +	dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, +					tlbmiss_handler_setup_pgd_size); +} -static void __cpuinit +static void  iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)  {  #ifdef CONFIG_SMP @@ -1035,7 +1519,7 @@ iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)  #endif  } -static void __cpuinit +static void  iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,  	unsigned int mode)  { @@ -1095,23 +1579,35 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,   * the page table where this PTE is located, PTE will be re-loaded   * with it's original value.   */ -static void __cpuinit +static void  build_pte_present(u32 **p, struct uasm_reloc **r, -		  unsigned int pte, unsigned int ptr, enum label_id lid) +		  int pte, int ptr, int scratch, enum label_id lid)  { -	if (kernel_uses_smartmips_rixi) { -		uasm_i_andi(p, pte, pte, _PAGE_PRESENT); -		uasm_il_beqz(p, r, pte, lid); +	int t = scratch >= 0 ? scratch : pte; + +	if (cpu_has_rixi) { +		if (use_bbit_insns()) { +			uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); +			uasm_i_nop(p); +		} else { +			uasm_i_andi(p, t, pte, _PAGE_PRESENT); +			uasm_il_beqz(p, r, t, lid); +			if (pte == t) +				/* You lose the SMP race :-(*/ +				iPTE_LW(p, pte, ptr); +		}  	} else { -		uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); -		uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); -		uasm_il_bnez(p, r, pte, lid); +		uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); +		uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); +		uasm_il_bnez(p, r, t, lid); +		if (pte == t) +			/* You lose the SMP race :-(*/ +			iPTE_LW(p, pte, ptr);  	} -	iPTE_LW(p, pte, ptr);  }  /* Make PTE valid, store result in PTR. */ -static void __cpuinit +static void  build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,  		 unsigned int ptr)  { @@ -1124,20 +1620,27 @@ build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte,   * Check if PTE can be written to, if not branch to LABEL. Regardless   * restore PTE with value from PTR when done.   */ -static void __cpuinit +static void  build_pte_writable(u32 **p, struct uasm_reloc **r, -		   unsigned int pte, unsigned int ptr, enum label_id lid) +		   unsigned int pte, unsigned int ptr, int scratch, +		   enum label_id lid)  { -	uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); -	uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); -	uasm_il_bnez(p, r, pte, lid); -	iPTE_LW(p, pte, ptr); +	int t = scratch >= 0 ? scratch : pte; + +	uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); +	uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); +	uasm_il_bnez(p, r, t, lid); +	if (pte == t) +		/* You lose the SMP race :-(*/ +		iPTE_LW(p, pte, ptr); +	else +		uasm_i_nop(p);  }  /* Make PTE writable, update software status bits as well, then store   * at PTR.   */ -static void __cpuinit +static void  build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,  		 unsigned int ptr)  { @@ -1151,16 +1654,27 @@ build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte,   * Check if PTE can be modified, if not branch to LABEL. Regardless   * restore PTE with value from PTR when done.   */ -static void __cpuinit +static void  build_pte_modifiable(u32 **p, struct uasm_reloc **r, -		     unsigned int pte, unsigned int ptr, enum label_id lid) +		     unsigned int pte, unsigned int ptr, int scratch, +		     enum label_id lid)  { -	uasm_i_andi(p, pte, pte, _PAGE_WRITE); -	uasm_il_beqz(p, r, pte, lid); -	iPTE_LW(p, pte, ptr); +	if (use_bbit_insns()) { +		uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); +		uasm_i_nop(p); +	} else { +		int t = scratch >= 0 ? scratch : pte; +		uasm_i_andi(p, t, pte, _PAGE_WRITE); +		uasm_il_beqz(p, r, t, lid); +		if (pte == t) +			/* You lose the SMP race :-(*/ +			iPTE_LW(p, pte, ptr); +	}  }  #ifndef CONFIG_MIPS_PGD_C0_CONTEXT + +  /*   * R3000 style TLB load/store/modify handlers.   */ @@ -1169,7 +1683,7 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r,   * This places the pte into ENTRYLO0 and writes it with tlbwi.   * Then it returns.   */ -static void __cpuinit +static void  build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp)  {  	uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ @@ -1185,7 +1699,7 @@ build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp)   * may have the probe fail bit set as a result of a trap on a   * kseg2 access, i.e. without refill.  Then it returns.   */ -static void __cpuinit +static void  build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l,  			     struct uasm_reloc **r, unsigned int pte,  			     unsigned int tmp) @@ -1203,7 +1717,7 @@ build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l,  	uasm_i_rfe(p); /* branch delay */  } -static void __cpuinit +static void  build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,  				   unsigned int ptr)  { @@ -1223,18 +1737,19 @@ build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte,  	uasm_i_tlbp(p); /* load delay */  } -static void __cpuinit build_r3000_tlb_load_handler(void) +static void build_r3000_tlb_load_handler(void)  {  	u32 *p = handle_tlbl; +	const int handle_tlbl_size = handle_tlbl_end - handle_tlbl;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; -	memset(handle_tlbl, 0, sizeof(handle_tlbl)); +	memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs));  	build_r3000_tlbchange_handler_head(&p, K0, K1); -	build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); +	build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);  	uasm_i_nop(&p); /* load delay */  	build_make_valid(&p, &r, K0, K1);  	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); @@ -1243,28 +1758,29 @@ static void __cpuinit build_r3000_tlb_load_handler(void)  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbl) > FASTPATH_SIZE) +	if (p >= handle_tlbl_end)  		panic("TLB load handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbl)); -	dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); +	dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size);  } -static void __cpuinit build_r3000_tlb_store_handler(void) +static void build_r3000_tlb_store_handler(void)  {  	u32 *p = handle_tlbs; +	const int handle_tlbs_size = handle_tlbs_end - handle_tlbs;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; -	memset(handle_tlbs, 0, sizeof(handle_tlbs)); +	memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs));  	build_r3000_tlbchange_handler_head(&p, K0, K1); -	build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs); +	build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);  	uasm_i_nop(&p); /* load delay */  	build_make_write(&p, &r, K0, K1);  	build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); @@ -1273,28 +1789,29 @@ static void __cpuinit build_r3000_tlb_store_handler(void)  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbs) > FASTPATH_SIZE) +	if (p >= handle_tlbs_end)  		panic("TLB store handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbs)); -	dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); +	dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size);  } -static void __cpuinit build_r3000_tlb_modify_handler(void) +static void build_r3000_tlb_modify_handler(void)  {  	u32 *p = handle_tlbm; +	const int handle_tlbm_size = handle_tlbm_end - handle_tlbm;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; -	memset(handle_tlbm, 0, sizeof(handle_tlbm)); +	memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs));  	build_r3000_tlbchange_handler_head(&p, K0, K1); -	build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm); +	build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);  	uasm_i_nop(&p); /* load delay */  	build_make_write(&p, &r, K0, K1);  	build_r3000_pte_reload_tlbwi(&p, K0, K1); @@ -1303,55 +1820,57 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbm) > FASTPATH_SIZE) +	if (p >= handle_tlbm_end)  		panic("TLB modify handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbm)); -	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); +	dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size);  }  #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */  /*   * R4000 style TLB load/store/modify handlers.   */ -static void __cpuinit +static struct work_registers  build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, -				   struct uasm_reloc **r, unsigned int pte, -				   unsigned int ptr) +				   struct uasm_reloc **r)  { +	struct work_registers wr = build_get_work_registers(p); +  #ifdef CONFIG_64BIT -	build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */ +	build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */  #else -	build_get_pgde32(p, pte, ptr); /* get pgd in ptr */ +	build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */  #endif -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	/*  	 * For huge tlb entries, pmd doesn't contain an address but  	 * instead contains the tlb pte. Check the PAGE_HUGE bit and  	 * see if we need to jump to huge tlb processing.  	 */ -	build_is_huge_pte(p, r, pte, ptr, label_tlb_huge_update); +	build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update);  #endif -	UASM_i_MFC0(p, pte, C0_BADVADDR); -	UASM_i_LW(p, ptr, 0, ptr); -	UASM_i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); -	uasm_i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2); -	UASM_i_ADDU(p, ptr, ptr, pte); +	UASM_i_MFC0(p, wr.r1, C0_BADVADDR); +	UASM_i_LW(p, wr.r2, 0, wr.r2); +	UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); +	uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); +	UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1);  #ifdef CONFIG_SMP  	uasm_l_smp_pgtable_change(l, *p);  #endif -	iPTE_LW(p, pte, ptr); /* get even pte */ +	iPTE_LW(p, wr.r1, wr.r2); /* get even pte */  	if (!m4kc_tlbp_war())  		build_tlb_probe_entry(p); +	return wr;  } -static void __cpuinit +static void  build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,  				   struct uasm_reloc **r, unsigned int tmp,  				   unsigned int ptr) @@ -1361,6 +1880,7 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,  	build_update_entries(p, tmp, ptr);  	build_tlb_write_entry(p, l, r, tlb_indexed);  	uasm_l_leave(l, *p); +	build_restore_work_registers(p);  	uasm_i_eret(p); /* return from trap */  #ifdef CONFIG_64BIT @@ -1368,13 +1888,15 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l,  #endif  } -static void __cpuinit build_r4000_tlb_load_handler(void) +static void build_r4000_tlb_load_handler(void)  {  	u32 *p = handle_tlbl; +	const int handle_tlbl_size = handle_tlbl_end - handle_tlbl;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; +	struct work_registers wr; -	memset(handle_tlbl, 0, sizeof(handle_tlbl)); +	memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs)); @@ -1392,194 +1914,287 @@ static void __cpuinit build_r4000_tlb_load_handler(void)  		/* No need for uasm_i_nop */  	} -	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); -	build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); +	wr = build_r4000_tlbchange_handler_head(&p, &l, &r); +	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);  	if (m4kc_tlbp_war())  		build_tlb_probe_entry(&p); -	if (kernel_uses_smartmips_rixi) { +	if (cpu_has_rixi) {  		/*  		 * If the page is not _PAGE_VALID, RI or XI could not  		 * have triggered it.  Skip the expensive test..  		 */ -		uasm_i_andi(&p, K0, K0, _PAGE_VALID); -		uasm_il_beqz(&p, &r, K0, label_tlbl_goaround1); +		if (use_bbit_insns()) { +			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), +				      label_tlbl_goaround1); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); +			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); +		}  		uasm_i_nop(&p);  		uasm_i_tlbr(&p); -		/* Examine  entrylo 0 or 1 based on ptr. */ -		uasm_i_andi(&p, K0, K1, sizeof(pte_t)); -		uasm_i_beqz(&p, K0, 8); -		UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/ -		UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */ +		switch (current_cpu_type()) { +		default: +			if (cpu_has_mips_r2) { +				uasm_i_ehb(&p); + +		case CPU_CAVIUM_OCTEON: +		case CPU_CAVIUM_OCTEON_PLUS: +		case CPU_CAVIUM_OCTEON2: +				break; +			} +		} + +		/* Examine  entrylo 0 or 1 based on ptr. */ +		if (use_bbit_insns()) { +			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); +			uasm_i_beqz(&p, wr.r3, 8); +		} +		/* load it in the delay slot*/ +		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); +		/* load it if ptr is odd */ +		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);  		/* -		 * If the entryLo (now in K0) is valid (bit 1), RI or +		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or  		 * XI must have triggered it.  		 */ -		uasm_i_andi(&p, K0, K0, 2); -		uasm_il_bnez(&p, &r, K0, label_nopage_tlbl); - +		if (use_bbit_insns()) { +			uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); +			uasm_i_nop(&p); +			uasm_l_tlbl_goaround1(&l, p); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r3, 2); +			uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); +			uasm_i_nop(&p); +		}  		uasm_l_tlbl_goaround1(&l, p); -		/* Reload the PTE value */ -		iPTE_LW(&p, K0, K1);  	} -	build_make_valid(&p, &r, K0, K1); -	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); +	build_make_valid(&p, &r, wr.r1, wr.r2); +	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	/*  	 * This is the entry point when build_r4000_tlbchange_handler_head  	 * spots a huge page.  	 */  	uasm_l_tlb_huge_update(&l, p); -	iPTE_LW(&p, K0, K1); -	build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); +	iPTE_LW(&p, wr.r1, wr.r2); +	build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl);  	build_tlb_probe_entry(&p); -	if (kernel_uses_smartmips_rixi) { +	if (cpu_has_rixi) {  		/*  		 * If the page is not _PAGE_VALID, RI or XI could not  		 * have triggered it.  Skip the expensive test..  		 */ -		uasm_i_andi(&p, K0, K0, _PAGE_VALID); -		uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2); +		if (use_bbit_insns()) { +			uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), +				      label_tlbl_goaround2); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); +			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); +		}  		uasm_i_nop(&p);  		uasm_i_tlbr(&p); -		/* Examine  entrylo 0 or 1 based on ptr. */ -		uasm_i_andi(&p, K0, K1, sizeof(pte_t)); -		uasm_i_beqz(&p, K0, 8); -		UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/ -		UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */ +		switch (current_cpu_type()) { +		default: +			if (cpu_has_mips_r2) { +				uasm_i_ehb(&p); + +		case CPU_CAVIUM_OCTEON: +		case CPU_CAVIUM_OCTEON_PLUS: +		case CPU_CAVIUM_OCTEON2: +				break; +			} +		} + +		/* Examine  entrylo 0 or 1 based on ptr. */ +		if (use_bbit_insns()) { +			uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); +			uasm_i_beqz(&p, wr.r3, 8); +		} +		/* load it in the delay slot*/ +		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); +		/* load it if ptr is odd */ +		UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1);  		/* -		 * If the entryLo (now in K0) is valid (bit 1), RI or +		 * If the entryLo (now in wr.r3) is valid (bit 1), RI or  		 * XI must have triggered it.  		 */ -		uasm_i_andi(&p, K0, K0, 2); -		uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2); -		/* Reload the PTE value */ -		iPTE_LW(&p, K0, K1); - +		if (use_bbit_insns()) { +			uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); +		} else { +			uasm_i_andi(&p, wr.r3, wr.r3, 2); +			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); +		} +		if (PM_DEFAULT_MASK == 0) +			uasm_i_nop(&p);  		/*  		 * We clobbered C0_PAGEMASK, restore it.  On the other branch  		 * it is restored in build_huge_tlb_write_entry.  		 */ -		build_restore_pagemask(&p, &r, K0, label_nopage_tlbl); +		build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0);  		uasm_l_tlbl_goaround2(&l, p);  	} -	uasm_i_ori(&p, K0, K0, (_PAGE_ACCESSED | _PAGE_VALID)); -	build_huge_handler_tail(&p, &r, &l, K0, K1); +	uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); +	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);  #endif  	uasm_l_nopage_tlbl(&l, p); +	build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS +	if ((unsigned long)tlb_do_page_fault_0 & 1) { +		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0)); +		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0)); +		uasm_i_jr(&p, K0); +	} else +#endif  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbl) > FASTPATH_SIZE) +	if (p >= handle_tlbl_end)  		panic("TLB load handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB load handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbl)); -	dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); +	dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size);  } -static void __cpuinit build_r4000_tlb_store_handler(void) +static void build_r4000_tlb_store_handler(void)  {  	u32 *p = handle_tlbs; +	const int handle_tlbs_size = handle_tlbs_end - handle_tlbs;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; +	struct work_registers wr; -	memset(handle_tlbs, 0, sizeof(handle_tlbs)); +	memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs)); -	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); -	build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs); +	wr = build_r4000_tlbchange_handler_head(&p, &l, &r); +	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);  	if (m4kc_tlbp_war())  		build_tlb_probe_entry(&p); -	build_make_write(&p, &r, K0, K1); -	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); +	build_make_write(&p, &r, wr.r1, wr.r2); +	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	/*  	 * This is the entry point when  	 * build_r4000_tlbchange_handler_head spots a huge page.  	 */  	uasm_l_tlb_huge_update(&l, p); -	iPTE_LW(&p, K0, K1); -	build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs); +	iPTE_LW(&p, wr.r1, wr.r2); +	build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs);  	build_tlb_probe_entry(&p); -	uasm_i_ori(&p, K0, K0, +	uasm_i_ori(&p, wr.r1, wr.r1,  		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); -	build_huge_handler_tail(&p, &r, &l, K0, K1); +	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);  #endif  	uasm_l_nopage_tlbs(&l, p); +	build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS +	if ((unsigned long)tlb_do_page_fault_1 & 1) { +		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); +		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); +		uasm_i_jr(&p, K0); +	} else +#endif  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbs) > FASTPATH_SIZE) +	if (p >= handle_tlbs_end)  		panic("TLB store handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB store handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbs)); -	dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); +	dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size);  } -static void __cpuinit build_r4000_tlb_modify_handler(void) +static void build_r4000_tlb_modify_handler(void)  {  	u32 *p = handle_tlbm; +	const int handle_tlbm_size = handle_tlbm_end - handle_tlbm;  	struct uasm_label *l = labels;  	struct uasm_reloc *r = relocs; +	struct work_registers wr; -	memset(handle_tlbm, 0, sizeof(handle_tlbm)); +	memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0]));  	memset(labels, 0, sizeof(labels));  	memset(relocs, 0, sizeof(relocs)); -	build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); -	build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm); +	wr = build_r4000_tlbchange_handler_head(&p, &l, &r); +	build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm);  	if (m4kc_tlbp_war())  		build_tlb_probe_entry(&p);  	/* Present and writable bits set, set accessed and dirty bits. */ -	build_make_write(&p, &r, K0, K1); -	build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); +	build_make_write(&p, &r, wr.r1, wr.r2); +	build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); -#ifdef CONFIG_HUGETLB_PAGE +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT  	/*  	 * This is the entry point when  	 * build_r4000_tlbchange_handler_head spots a huge page.  	 */  	uasm_l_tlb_huge_update(&l, p); -	iPTE_LW(&p, K0, K1); -	build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm); +	iPTE_LW(&p, wr.r1, wr.r2); +	build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);  	build_tlb_probe_entry(&p); -	uasm_i_ori(&p, K0, K0, +	uasm_i_ori(&p, wr.r1, wr.r1,  		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); -	build_huge_handler_tail(&p, &r, &l, K0, K1); +	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);  #endif  	uasm_l_nopage_tlbm(&l, p); +	build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS +	if ((unsigned long)tlb_do_page_fault_1 & 1) { +		uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); +		uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); +		uasm_i_jr(&p, K0); +	} else +#endif  	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);  	uasm_i_nop(&p); -	if ((p - handle_tlbm) > FASTPATH_SIZE) +	if (p >= handle_tlbm_end)  		panic("TLB modify handler fastpath space exceeded");  	uasm_resolve_relocs(relocs, labels);  	pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n",  		 (unsigned int)(p - handle_tlbm)); -	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); +	dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size); +} + +static void flush_tlb_handlers(void) +{ +	local_flush_icache_range((unsigned long)handle_tlbl, +			   (unsigned long)handle_tlbl_end); +	local_flush_icache_range((unsigned long)handle_tlbs, +			   (unsigned long)handle_tlbs_end); +	local_flush_icache_range((unsigned long)handle_tlbm, +			   (unsigned long)handle_tlbm_end); +	local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, +			   (unsigned long)tlbmiss_handler_setup_pgd_end);  } -void __cpuinit build_tlb_refill_handler(void) +void build_tlb_refill_handler(void)  {  	/*  	 * The refill handler is generated per-CPU, multi-node systems @@ -1588,6 +2203,8 @@ void __cpuinit build_tlb_refill_handler(void)  	 */  	static int run_once = 0; +	output_pgtable_bits_defines(); +  #ifdef CONFIG_64BIT  	check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);  #endif @@ -1601,11 +2218,16 @@ void __cpuinit build_tlb_refill_handler(void)  	case CPU_TX3922:  	case CPU_TX3927:  #ifndef CONFIG_MIPS_PGD_C0_CONTEXT -		build_r3000_tlb_refill_handler(); +		if (cpu_has_local_ebase) +			build_r3000_tlb_refill_handler();  		if (!run_once) { +			if (!cpu_has_local_ebase) +				build_r3000_tlb_refill_handler(); +			build_setup_pgd();  			build_r3000_tlb_load_handler();  			build_r3000_tlb_store_handler();  			build_r3000_tlb_modify_handler(); +			flush_tlb_handlers();  			run_once++;  		}  #else @@ -1623,22 +2245,18 @@ void __cpuinit build_tlb_refill_handler(void)  		break;  	default: -		build_r4000_tlb_refill_handler();  		if (!run_once) { +			scratch_reg = allocate_kscratch(); +			build_setup_pgd();  			build_r4000_tlb_load_handler();  			build_r4000_tlb_store_handler();  			build_r4000_tlb_modify_handler(); +			if (!cpu_has_local_ebase) +				build_r4000_tlb_refill_handler(); +			flush_tlb_handlers();  			run_once++;  		} +		if (cpu_has_local_ebase) +			build_r4000_tlb_refill_handler();  	}  } - -void __cpuinit flush_tlb_handlers(void) -{ -	local_flush_icache_range((unsigned long)handle_tlbl, -			   (unsigned long)handle_tlbl + sizeof(handle_tlbl)); -	local_flush_icache_range((unsigned long)handle_tlbs, -			   (unsigned long)handle_tlbs + sizeof(handle_tlbs)); -	local_flush_icache_range((unsigned long)handle_tlbm, -			   (unsigned long)handle_tlbm + sizeof(handle_tlbm)); -} diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c new file mode 100644 index 00000000000..8399ddf03a0 --- /dev/null +++ b/arch/mips/mm/uasm-micromips.c @@ -0,0 +1,235 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer + * Copyright (C) 2005, 2007  Maciej W. Rozycki + * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013   MIPS Technologies, Inc.  All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#define UASM_ISA	_UASM_ISA_MICROMIPS +#include <asm/uasm.h> + +#define RS_MASK		0x1f +#define RS_SH		16 +#define RT_MASK		0x1f +#define RT_SH		21 +#define SCIMM_MASK	0x3ff +#define SCIMM_SH	16 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f)					\ +	((a) << OP_SH						\ +	 | (b) << RT_SH						\ +	 | (c) << RS_SH						\ +	 | (d) << RD_SH						\ +	 | (e) << RE_SH						\ +	 | (f) << FUNC_SH) + +/* Define these when we are not the ISA the kernel is being compiled with. */ +#ifndef CONFIG_CPU_MICROMIPS +#define MM_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) +#define MM_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) +#define MM_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) +#define MM_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) +#endif + +#include "uasm.c" + +static struct insn insn_table_MM[] = { +	{ insn_addu, M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD }, +	{ insn_addiu, M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, +	{ insn_and, M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD }, +	{ insn_andi, M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, +	{ insn_beq, M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_beql, 0, 0 }, +	{ insn_bgez, M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM }, +	{ insn_bgezl, 0, 0 }, +	{ insn_bltz, M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM }, +	{ insn_bltzl, 0, 0 }, +	{ insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM }, +	{ insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM }, +	{ insn_daddu, 0, 0 }, +	{ insn_daddiu, 0, 0 }, +	{ insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS }, +	{ insn_dmfc0, 0, 0 }, +	{ insn_dmtc0, 0, 0 }, +	{ insn_dsll, 0, 0 }, +	{ insn_dsll32, 0, 0 }, +	{ insn_dsra, 0, 0 }, +	{ insn_dsrl, 0, 0 }, +	{ insn_dsrl32, 0, 0 }, +	{ insn_drotr, 0, 0 }, +	{ insn_drotr32, 0, 0 }, +	{ insn_dsubu, 0, 0 }, +	{ insn_eret, M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0 }, +	{ insn_ins, M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE }, +	{ insn_ext, M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE }, +	{ insn_j, M(mm_j32_op, 0, 0, 0, 0, 0), JIMM }, +	{ insn_jal, M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM }, +	{ insn_jalr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS }, +	{ insn_jr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS }, +	{ insn_lb, M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, +	{ insn_ld, 0, 0 }, +	{ insn_lh, M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM }, +	{ insn_ll, M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM }, +	{ insn_lld, 0, 0 }, +	{ insn_lui, M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM }, +	{ insn_lw, M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, +	{ insn_mfc0, M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD }, +	{ insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS }, +	{ insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS }, +	{ insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD }, +	{ insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD }, +	{ insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD }, +	{ insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, +	{ insn_pref, M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM }, +	{ insn_rfe, 0, 0 }, +	{ insn_sc, M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM }, +	{ insn_scd, 0, 0 }, +	{ insn_sd, 0, 0 }, +	{ insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD }, +	{ insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD }, +	{ insn_slt, M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD }, +	{ insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, +	{ insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD }, +	{ insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD }, +	{ insn_srl, M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD }, +	{ insn_srlv, M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD }, +	{ insn_rotr, M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD }, +	{ insn_subu, M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD }, +	{ insn_sw, M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, +	{ insn_sync, M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS }, +	{ insn_tlbp, M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0 }, +	{ insn_tlbr, M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0 }, +	{ insn_tlbwi, M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0 }, +	{ insn_tlbwr, M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0 }, +	{ insn_wait, M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM }, +	{ insn_wsbh, M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS }, +	{ insn_xor, M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD }, +	{ insn_xori, M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, +	{ insn_dins, 0, 0 }, +	{ insn_dinsm, 0, 0 }, +	{ insn_syscall, M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM}, +	{ insn_bbit0, 0, 0 }, +	{ insn_bbit1, 0, 0 }, +	{ insn_lwx, 0, 0 }, +	{ insn_ldx, 0, 0 }, +	{ insn_invalid, 0, 0 } +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ +	WARN(arg > 0xffff || arg < -0x10000, +	     KERN_WARNING "Micro-assembler field overflow\n"); + +	WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + +	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 1) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ + +	WARN(arg & ~((JIMM_MASK << 2) | 1), +	     KERN_WARNING "Micro-assembler field overflow\n"); + +	return (arg >> 1) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ +	struct insn *ip = NULL; +	unsigned int i; +	va_list ap; +	u32 op; + +	for (i = 0; insn_table_MM[i].opcode != insn_invalid; i++) +		if (insn_table_MM[i].opcode == opc) { +			ip = &insn_table_MM[i]; +			break; +		} + +	if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) +		panic("Unsupported Micro-assembler instruction %d", opc); + +	op = ip->match; +	va_start(ap, opc); +	if (ip->fields & RS) { +		if (opc == insn_mfc0 || opc == insn_mtc0) +			op |= build_rt(va_arg(ap, u32)); +		else +			op |= build_rs(va_arg(ap, u32)); +	} +	if (ip->fields & RT) { +		if (opc == insn_mfc0 || opc == insn_mtc0) +			op |= build_rs(va_arg(ap, u32)); +		else +			op |= build_rt(va_arg(ap, u32)); +	} +	if (ip->fields & RD) +		op |= build_rd(va_arg(ap, u32)); +	if (ip->fields & RE) +		op |= build_re(va_arg(ap, u32)); +	if (ip->fields & SIMM) +		op |= build_simm(va_arg(ap, s32)); +	if (ip->fields & UIMM) +		op |= build_uimm(va_arg(ap, u32)); +	if (ip->fields & BIMM) +		op |= build_bimm(va_arg(ap, s32)); +	if (ip->fields & JIMM) +		op |= build_jimm(va_arg(ap, u32)); +	if (ip->fields & FUNC) +		op |= build_func(va_arg(ap, u32)); +	if (ip->fields & SET) +		op |= build_set(va_arg(ap, u32)); +	if (ip->fields & SCIMM) +		op |= build_scimm(va_arg(ap, u32)); +	va_end(ap); + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +	**buf = ((op & 0xffff) << 16) | (op >> 16); +#else +	**buf = op; +#endif +	(*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ +	long laddr = (long)lab->addr; +	long raddr = (long)rel->addr; + +	switch (rel->type) { +	case R_MIPS_PC16: +#ifdef CONFIG_CPU_LITTLE_ENDIAN +		*rel->addr |= (build_bimm(laddr - (raddr + 4)) << 16); +#else +		*rel->addr |= build_bimm(laddr - (raddr + 4)); +#endif +		break; + +	default: +		panic("Unsupported Micro-assembler relocation %d", +		      rel->type); +	} +} diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c new file mode 100644 index 00000000000..6708a2dbf93 --- /dev/null +++ b/arch/mips/mm/uasm-mips.c @@ -0,0 +1,220 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer + * Copyright (C) 2005, 2007  Maciej W. Rozycki + * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013  MIPS Technologies, Inc.  All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#define UASM_ISA	_UASM_ISA_CLASSIC +#include <asm/uasm.h> + +#define RS_MASK		0x1f +#define RS_SH		21 +#define RT_MASK		0x1f +#define RT_SH		16 +#define SCIMM_MASK	0xfffff +#define SCIMM_SH	6 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f)					\ +	((a) << OP_SH						\ +	 | (b) << RS_SH						\ +	 | (c) << RT_SH						\ +	 | (d) << RD_SH						\ +	 | (e) << RE_SH						\ +	 | (f) << FUNC_SH) + +/* Define these when we are not the ISA the kernel is being compiled with. */ +#ifdef CONFIG_CPU_MICROMIPS +#define CL_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) +#define CL_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) +#define CL_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) +#define CL_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) +#endif + +#include "uasm.c" + +static struct insn insn_table[] = { +	{ insn_addiu, M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, +	{ insn_addu, M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD }, +	{ insn_andi, M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, +	{ insn_and, M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD }, +	{ insn_bbit0, M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_bbit1, M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_beql, M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_beq, M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_bgezl, M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM }, +	{ insn_bgez, M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM }, +	{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, +	{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, +	{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, +	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, +	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, +	{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE }, +	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, +	{ insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT }, +	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, +	{ insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, +	{ insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE }, +	{ insn_drotr, M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE }, +	{ insn_dsll32, M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE }, +	{ insn_dsll, M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE }, +	{ insn_dsra, M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE }, +	{ insn_dsrl32, M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE }, +	{ insn_dsrl, M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE }, +	{ insn_dsubu, M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD }, +	{ insn_eret,  M(cop0_op, cop_op, 0, 0, 0, eret_op),  0 }, +	{ insn_ext, M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE }, +	{ insn_ins, M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE }, +	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM }, +	{ insn_jal,  M(jal_op, 0, 0, 0, 0, 0),	JIMM }, +	{ insn_jalr,  M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD }, +	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM }, +	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jr_op),  RS }, +	{ insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, +	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD }, +	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM }, +	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM }, +	{ insn_lw,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD }, +	{ insn_mfc0,  M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET}, +	{ insn_mfhi,  M(spec_op, 0, 0, 0, 0, mfhi_op), RD }, +	{ insn_mflo,  M(spec_op, 0, 0, 0, 0, mflo_op), RD }, +	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET}, +	{ insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, +	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM }, +	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD }, +	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_rfe,  M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0 }, +	{ insn_rotr,  M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE }, +	{ insn_scd,  M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM }, +	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE }, +	{ insn_sllv,  M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD }, +	{ insn_slt,  M(spec_op, 0, 0, 0, 0, slt_op),  RS | RT | RD }, +	{ insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, +	{ insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD }, +	{ insn_sra,  M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE }, +	{ insn_srl,  M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE }, +	{ insn_srlv,  M(spec_op, 0, 0, 0, 0, srlv_op),  RS | RT | RD }, +	{ insn_subu,  M(spec_op, 0, 0, 0, 0, subu_op),	RS | RT | RD }, +	{ insn_sw,  M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, +	{ insn_sync, M(spec_op, 0, 0, 0, 0, sync_op), RE }, +	{ insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, +	{ insn_tlbp,  M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0 }, +	{ insn_tlbr,  M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0 }, +	{ insn_tlbwi,  M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0 }, +	{ insn_tlbwr,  M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0 }, +	{ insn_wait, M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM }, +	{ insn_wsbh, M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD }, +	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM }, +	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD }, +	{ insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD }, +	{ insn_invalid, 0, 0 } +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ +	WARN(arg > 0x1ffff || arg < -0x20000, +	     KERN_WARNING "Micro-assembler field overflow\n"); + +	WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + +	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ +	WARN(arg & ~(JIMM_MASK << 2), +	     KERN_WARNING "Micro-assembler field overflow\n"); + +	return (arg >> 2) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ +	struct insn *ip = NULL; +	unsigned int i; +	va_list ap; +	u32 op; + +	for (i = 0; insn_table[i].opcode != insn_invalid; i++) +		if (insn_table[i].opcode == opc) { +			ip = &insn_table[i]; +			break; +		} + +	if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) +		panic("Unsupported Micro-assembler instruction %d", opc); + +	op = ip->match; +	va_start(ap, opc); +	if (ip->fields & RS) +		op |= build_rs(va_arg(ap, u32)); +	if (ip->fields & RT) +		op |= build_rt(va_arg(ap, u32)); +	if (ip->fields & RD) +		op |= build_rd(va_arg(ap, u32)); +	if (ip->fields & RE) +		op |= build_re(va_arg(ap, u32)); +	if (ip->fields & SIMM) +		op |= build_simm(va_arg(ap, s32)); +	if (ip->fields & UIMM) +		op |= build_uimm(va_arg(ap, u32)); +	if (ip->fields & BIMM) +		op |= build_bimm(va_arg(ap, s32)); +	if (ip->fields & JIMM) +		op |= build_jimm(va_arg(ap, u32)); +	if (ip->fields & FUNC) +		op |= build_func(va_arg(ap, u32)); +	if (ip->fields & SET) +		op |= build_set(va_arg(ap, u32)); +	if (ip->fields & SCIMM) +		op |= build_scimm(va_arg(ap, u32)); +	va_end(ap); + +	**buf = op; +	(*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ +	long laddr = (long)lab->addr; +	long raddr = (long)rel->addr; + +	switch (rel->type) { +	case R_MIPS_PC16: +		*rel->addr |= build_bimm(laddr - (raddr + 4)); +		break; + +	default: +		panic("Unsupported Micro-assembler relocation %d", +		      rel->type); +	} +} diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 23afdebc8e5..a01b0d6cedd 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -7,20 +7,12 @@   * support a subset of instructions, and does not try to hide pipeline   * effects like branch delay slots.   * - * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer + * Copyright (C) 2004, 2005, 2006, 2008	 Thiemo Seufer   * Copyright (C) 2005, 2007  Maciej W. Rozycki   * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013  MIPS Technologies, Inc.  All rights reserved.   */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> - -#include <asm/inst.h> -#include <asm/elf.h> -#include <asm/bugs.h> -#include <asm/uasm.h> -  enum fields {  	RS = 0x001,  	RT = 0x002, @@ -37,10 +29,6 @@ enum fields {  #define OP_MASK		0x3f  #define OP_SH		26 -#define RS_MASK		0x1f -#define RS_SH		21 -#define RT_MASK		0x1f -#define RT_SH		16  #define RD_MASK		0x1f  #define RD_SH		11  #define RE_MASK		0x1f @@ -53,22 +41,22 @@ enum fields {  #define FUNC_SH		0  #define SET_MASK	0x7  #define SET_SH		0 -#define SCIMM_MASK	0xfffff -#define SCIMM_SH	6  enum opcode {  	insn_invalid, -	insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, -	insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, -	insn_bne, insn_cache, insn_daddu, insn_daddiu, insn_dmfc0, -	insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, -	insn_dsrl32, insn_drotr, insn_drotr32, insn_dsubu, insn_eret, -	insn_j, insn_jal, insn_jr, insn_ld, insn_ll, insn_lld, -	insn_lui, insn_lw, insn_mfc0, insn_mtc0, insn_or, insn_ori, -	insn_pref, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, -	insn_sra, insn_srl, insn_rotr, insn_subu, insn_sw, insn_tlbp, -	insn_tlbr, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori, -	insn_dins, insn_syscall, insn_bbit0, insn_bbit1 +	insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1, +	insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, +	insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm, +	insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll, +	insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret, +	insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, +	insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw, +	insn_lwx, insn_mfc0, insn_mfhi, insn_mflo, insn_mtc0, insn_mul, +	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, +	insn_sd, insn_sll, insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, +	insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, +	insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, +	insn_xor, insn_xori, insn_yield,  };  struct insn { @@ -77,219 +65,72 @@ struct insn {  	enum fields fields;  }; -/* This macro sets the non-variable bits of an instruction. */ -#define M(a, b, c, d, e, f)					\ -	((a) << OP_SH						\ -	 | (b) << RS_SH						\ -	 | (c) << RT_SH						\ -	 | (d) << RD_SH						\ -	 | (e) << RE_SH						\ -	 | (f) << FUNC_SH) - -static struct insn insn_table[] __uasminitdata = { -	{ insn_addiu, M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, -	{ insn_addu, M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD }, -	{ insn_and, M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD }, -	{ insn_andi, M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, -	{ insn_beq, M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, -	{ insn_beql, M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, -	{ insn_bgez, M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM }, -	{ insn_bgezl, M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM }, -	{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, -	{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, -	{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, -	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, -	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, -	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, -	{ insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, -	{ insn_dsll, M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE }, -	{ insn_dsll32, M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE }, -	{ insn_dsra, M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE }, -	{ insn_dsrl, M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE }, -	{ insn_dsrl32, M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE }, -	{ insn_drotr, M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE }, -	{ insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE }, -	{ insn_dsubu, M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD }, -	{ insn_eret,  M(cop0_op, cop_op, 0, 0, 0, eret_op),  0 }, -	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM }, -	{ insn_jal,  M(jal_op, 0, 0, 0, 0, 0),  JIMM }, -	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jr_op),  RS }, -	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),  RT | SIMM }, -	{ insn_lw,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_mfc0,  M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET}, -	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET}, -	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD }, -	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM }, -	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_rfe,  M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0 }, -	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_scd,  M(scd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE }, -	{ insn_sra,  M(spec_op, 0, 0, 0, 0, sra_op),  RT | RD | RE }, -	{ insn_srl,  M(spec_op, 0, 0, 0, 0, srl_op),  RT | RD | RE }, -	{ insn_rotr,  M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE }, -	{ insn_subu,  M(spec_op, 0, 0, 0, 0, subu_op),  RS | RT | RD }, -	{ insn_sw,  M(sw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM }, -	{ insn_tlbp,  M(cop0_op, cop_op, 0, 0, 0, tlbp_op),  0 }, -	{ insn_tlbr,  M(cop0_op, cop_op, 0, 0, 0, tlbr_op),  0 }, -	{ insn_tlbwi,  M(cop0_op, cop_op, 0, 0, 0, tlbwi_op),  0 }, -	{ insn_tlbwr,  M(cop0_op, cop_op, 0, 0, 0, tlbwr_op),  0 }, -	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD }, -	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM }, -	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, -	{ insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, -	{ insn_bbit0, M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, -	{ insn_bbit1, M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, -	{ insn_invalid, 0, 0 } -}; - -#undef M - -static inline __uasminit u32 build_rs(u32 arg) +static inline u32 build_rs(u32 arg)  { -	if (arg & ~RS_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~RS_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return (arg & RS_MASK) << RS_SH;  } -static inline __uasminit u32 build_rt(u32 arg) +static inline u32 build_rt(u32 arg)  { -	if (arg & ~RT_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return (arg & RT_MASK) << RT_SH;  } -static inline __uasminit u32 build_rd(u32 arg) +static inline u32 build_rd(u32 arg)  { -	if (arg & ~RD_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~RD_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return (arg & RD_MASK) << RD_SH;  } -static inline __uasminit u32 build_re(u32 arg) +static inline u32 build_re(u32 arg)  { -	if (arg & ~RE_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~RE_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return (arg & RE_MASK) << RE_SH;  } -static inline __uasminit u32 build_simm(s32 arg) +static inline u32 build_simm(s32 arg)  { -	if (arg > 0x7fff || arg < -0x8000) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg > 0x7fff || arg < -0x8000, +	     KERN_WARNING "Micro-assembler field overflow\n");  	return arg & 0xffff;  } -static inline __uasminit u32 build_uimm(u32 arg) +static inline u32 build_uimm(u32 arg)  { -	if (arg & ~IMM_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~IMM_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return arg & IMM_MASK;  } -static inline __uasminit u32 build_bimm(s32 arg) +static inline u32 build_scimm(u32 arg)  { -	if (arg > 0x1ffff || arg < -0x20000) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); - -	if (arg & 0x3) -		printk(KERN_WARNING "Invalid micro-assembler branch target\n"); - -	return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); -} - -static inline __uasminit u32 build_jimm(u32 arg) -{ -	if (arg & ~((JIMM_MASK) << 2)) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); - -	return (arg >> 2) & JIMM_MASK; -} - -static inline __uasminit u32 build_scimm(u32 arg) -{ -	if (arg & ~SCIMM_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~SCIMM_MASK, +	     KERN_WARNING "Micro-assembler field overflow\n");  	return (arg & SCIMM_MASK) << SCIMM_SH;  } -static inline __uasminit u32 build_func(u32 arg) +static inline u32 build_func(u32 arg)  { -	if (arg & ~FUNC_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return arg & FUNC_MASK;  } -static inline __uasminit u32 build_set(u32 arg) +static inline u32 build_set(u32 arg)  { -	if (arg & ~SET_MASK) -		printk(KERN_WARNING "Micro-assembler field overflow\n"); +	WARN(arg & ~SET_MASK, KERN_WARNING "Micro-assembler field overflow\n");  	return arg & SET_MASK;  } -/* - * The order of opcode arguments is implicitly left to right, - * starting with RS and ending with FUNC or IMM. - */ -static void __uasminit build_insn(u32 **buf, enum opcode opc, ...) -{ -	struct insn *ip = NULL; -	unsigned int i; -	va_list ap; -	u32 op; - -	for (i = 0; insn_table[i].opcode != insn_invalid; i++) -		if (insn_table[i].opcode == opc) { -			ip = &insn_table[i]; -			break; -		} - -	if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) -		panic("Unsupported Micro-assembler instruction %d", opc); - -	op = ip->match; -	va_start(ap, opc); -	if (ip->fields & RS) -		op |= build_rs(va_arg(ap, u32)); -	if (ip->fields & RT) -		op |= build_rt(va_arg(ap, u32)); -	if (ip->fields & RD) -		op |= build_rd(va_arg(ap, u32)); -	if (ip->fields & RE) -		op |= build_re(va_arg(ap, u32)); -	if (ip->fields & SIMM) -		op |= build_simm(va_arg(ap, s32)); -	if (ip->fields & UIMM) -		op |= build_uimm(va_arg(ap, u32)); -	if (ip->fields & BIMM) -		op |= build_bimm(va_arg(ap, s32)); -	if (ip->fields & JIMM) -		op |= build_jimm(va_arg(ap, u32)); -	if (ip->fields & FUNC) -		op |= build_func(va_arg(ap, u32)); -	if (ip->fields & SET) -		op |= build_set(va_arg(ap, u32)); -	if (ip->fields & SCIMM) -		op |= build_scimm(va_arg(ap, u32)); -	va_end(ap); - -	**buf = op; -	(*buf)++; -} +static void build_insn(u32 **buf, enum opcode opc, ...);  #define I_u1u2u3(op)					\  Ip_u1u2u3(op)						\ @@ -298,6 +139,13 @@ Ip_u1u2u3(op)						\  }							\  UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_s3s1s2(op)					\ +Ip_s3s1s2(op)						\ +{							\ +	build_insn(buf, insn##op, b, c, a);		\ +}							\ +UASM_EXPORT_SYMBOL(uasm_i##op); +  #define I_u2u1u3(op)					\  Ip_u2u1u3(op)						\  {							\ @@ -305,6 +153,13 @@ Ip_u2u1u3(op)						\  }							\  UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u3u2u1(op)					\ +Ip_u3u2u1(op)						\ +{							\ +	build_insn(buf, insn##op, c, b, a);		\ +}							\ +UASM_EXPORT_SYMBOL(uasm_i##op); +  #define I_u3u1u2(op)					\  Ip_u3u1u2(op)						\  {							\ @@ -340,6 +195,20 @@ Ip_u2u1msbu3(op)					\  }							\  UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u2u1msb32u3(op)				\ +Ip_u2u1msbu3(op)					\ +{							\ +	build_insn(buf, insn##op, b, a, c+d-33, c);	\ +}							\ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msbdu3(op)				\ +Ip_u2u1msbu3(op)					\ +{							\ +	build_insn(buf, insn##op, b, a, d-1, c);	\ +}							\ +UASM_EXPORT_SYMBOL(uasm_i##op); +  #define I_u1u2(op)					\  Ip_u1u2(op)						\  {							\ @@ -347,6 +216,13 @@ Ip_u1u2(op)						\  }							\  UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_u2u1(op)					\ +Ip_u1u2(op)						\ +{							\ +	build_insn(buf, insn##op, b, a);		\ +}							\ +UASM_EXPORT_SYMBOL(uasm_i##op); +  #define I_u1s2(op)					\  Ip_u1s2(op)						\  {							\ @@ -384,6 +260,7 @@ I_u1u2u3(_dmfc0)  I_u1u2u3(_dmtc0)  I_u2u1s3(_daddiu)  I_u3u1u2(_daddu) +I_u1u2(_divu)  I_u2u1u3(_dsll)  I_u2u1u3(_dsll32)  I_u2u1u3(_dsra) @@ -393,16 +270,24 @@ I_u2u1u3(_drotr)  I_u2u1u3(_drotr32)  I_u3u1u2(_dsubu)  I_0(_eret) +I_u2u1msbdu3(_ext) +I_u2u1msbu3(_ins)  I_u1(_j)  I_u1(_jal) +I_u2u1(_jalr)  I_u1(_jr) +I_u2s3u1(_lb)  I_u2s3u1(_ld) +I_u2s3u1(_lh)  I_u2s3u1(_ll)  I_u2s3u1(_lld)  I_u1s2(_lui)  I_u2s3u1(_lw)  I_u1u2u3(_mfc0) +I_u1(_mfhi) +I_u1(_mflo)  I_u1u2u3(_mtc0) +I_u3u1u2(_mul)  I_u2u1u3(_ori)  I_u3u1u2(_or)  I_0(_rfe) @@ -410,25 +295,37 @@ I_u2s3u1(_sc)  I_u2s3u1(_scd)  I_u2s3u1(_sd)  I_u2u1u3(_sll) +I_u3u2u1(_sllv) +I_s3s1s2(_slt) +I_u2u1s3(_sltiu) +I_u3u1u2(_sltu)  I_u2u1u3(_sra)  I_u2u1u3(_srl) +I_u3u2u1(_srlv)  I_u2u1u3(_rotr)  I_u3u1u2(_subu)  I_u2s3u1(_sw) +I_u1(_sync)  I_0(_tlbp)  I_0(_tlbr)  I_0(_tlbwi)  I_0(_tlbwr) +I_u1(_wait); +I_u2u1(_wsbh)  I_u3u1u2(_xor)  I_u2u1u3(_xori) +I_u2u1(_yield)  I_u2u1msbu3(_dins); +I_u2u1msb32u3(_dinsm);  I_u1(_syscall);  I_u1u2s3(_bbit0);  I_u1u2s3(_bbit1); +I_u3u1u2(_lwx) +I_u3u1u2(_ldx)  #ifdef CONFIG_CPU_CAVIUM_OCTEON  #include <asm/octeon/octeon.h> -void __uasminit uasm_i_pref(u32 **buf, unsigned int a, signed int b, +void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,  			    unsigned int c)  {  	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X) && a <= 24 && a != 5) @@ -440,21 +337,21 @@ void __uasminit uasm_i_pref(u32 **buf, unsigned int a, signed int b,  	else  		build_insn(buf, insn_pref, c, a, b);  } -UASM_EXPORT_SYMBOL(uasm_i_pref); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_i_pref));  #else  I_u2s3u1(_pref)  #endif  /* Handle labels. */ -void __uasminit uasm_build_label(struct uasm_label **lab, u32 *addr, int lid) +void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr, int lid)  {  	(*lab)->addr = addr;  	(*lab)->lab = lid;  	(*lab)++;  } -UASM_EXPORT_SYMBOL(uasm_build_label); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label)); -int __uasminit uasm_in_compat_space_p(long addr) +int ISAFUNC(uasm_in_compat_space_p)(long addr)  {  	/* Is this address in 32bit compat space? */  #ifdef CONFIG_64BIT @@ -463,9 +360,9 @@ int __uasminit uasm_in_compat_space_p(long addr)  	return 1;  #endif  } -UASM_EXPORT_SYMBOL(uasm_in_compat_space_p); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p)); -static int __uasminit uasm_rel_highest(long val) +static int uasm_rel_highest(long val)  {  #ifdef CONFIG_64BIT  	return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; @@ -474,7 +371,7 @@ static int __uasminit uasm_rel_highest(long val)  #endif  } -static int __uasminit uasm_rel_higher(long val) +static int uasm_rel_higher(long val)  {  #ifdef CONFIG_64BIT  	return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; @@ -483,77 +380,65 @@ static int __uasminit uasm_rel_higher(long val)  #endif  } -int __uasminit uasm_rel_hi(long val) +int ISAFUNC(uasm_rel_hi)(long val)  {  	return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;  } -UASM_EXPORT_SYMBOL(uasm_rel_hi); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_hi)); -int __uasminit uasm_rel_lo(long val) +int ISAFUNC(uasm_rel_lo)(long val)  {  	return ((val & 0xffff) ^ 0x8000) - 0x8000;  } -UASM_EXPORT_SYMBOL(uasm_rel_lo); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_lo)); -void __uasminit UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr) +void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr)  { -	if (!uasm_in_compat_space_p(addr)) { -		uasm_i_lui(buf, rs, uasm_rel_highest(addr)); +	if (!ISAFUNC(uasm_in_compat_space_p)(addr)) { +		ISAFUNC(uasm_i_lui)(buf, rs, uasm_rel_highest(addr));  		if (uasm_rel_higher(addr)) -			uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr)); -		if (uasm_rel_hi(addr)) { -			uasm_i_dsll(buf, rs, rs, 16); -			uasm_i_daddiu(buf, rs, rs, uasm_rel_hi(addr)); -			uasm_i_dsll(buf, rs, rs, 16); +			ISAFUNC(uasm_i_daddiu)(buf, rs, rs, uasm_rel_higher(addr)); +		if (ISAFUNC(uasm_rel_hi(addr))) { +			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16); +			ISAFUNC(uasm_i_daddiu)(buf, rs, rs, +					ISAFUNC(uasm_rel_hi)(addr)); +			ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16);  		} else -			uasm_i_dsll32(buf, rs, rs, 0); +			ISAFUNC(uasm_i_dsll32)(buf, rs, rs, 0);  	} else -		uasm_i_lui(buf, rs, uasm_rel_hi(addr)); +		ISAFUNC(uasm_i_lui)(buf, rs, ISAFUNC(uasm_rel_hi(addr)));  } -UASM_EXPORT_SYMBOL(UASM_i_LA_mostly); +UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA_mostly)); -void __uasminit UASM_i_LA(u32 **buf, unsigned int rs, long addr) +void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr)  { -	UASM_i_LA_mostly(buf, rs, addr); -	if (uasm_rel_lo(addr)) { -		if (!uasm_in_compat_space_p(addr)) -			uasm_i_daddiu(buf, rs, rs, uasm_rel_lo(addr)); +	ISAFUNC(UASM_i_LA_mostly)(buf, rs, addr); +	if (ISAFUNC(uasm_rel_lo(addr))) { +		if (!ISAFUNC(uasm_in_compat_space_p)(addr)) +			ISAFUNC(uasm_i_daddiu)(buf, rs, rs, +					ISAFUNC(uasm_rel_lo(addr)));  		else -			uasm_i_addiu(buf, rs, rs, uasm_rel_lo(addr)); +			ISAFUNC(uasm_i_addiu)(buf, rs, rs, +					ISAFUNC(uasm_rel_lo(addr)));  	}  } -UASM_EXPORT_SYMBOL(UASM_i_LA); +UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA));  /* Handle relocations. */ -void __uasminit -uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid) +void ISAFUNC(uasm_r_mips_pc16)(struct uasm_reloc **rel, u32 *addr, int lid)  {  	(*rel)->addr = addr;  	(*rel)->type = R_MIPS_PC16;  	(*rel)->lab = lid;  	(*rel)++;  } -UASM_EXPORT_SYMBOL(uasm_r_mips_pc16); - -static inline void __uasminit -__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) -{ -	long laddr = (long)lab->addr; -	long raddr = (long)rel->addr; - -	switch (rel->type) { -	case R_MIPS_PC16: -		*rel->addr |= build_bimm(laddr - (raddr + 4)); -		break; +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_r_mips_pc16)); -	default: -		panic("Unsupported Micro-assembler relocation %d", -		      rel->type); -	} -} +static inline void __resolve_relocs(struct uasm_reloc *rel, +				    struct uasm_label *lab); -void __uasminit -uasm_resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel, +				  struct uasm_label *lab)  {  	struct uasm_label *l; @@ -562,40 +447,39 @@ uasm_resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab)  			if (rel->lab == l->lab)  				__resolve_relocs(rel, l);  } -UASM_EXPORT_SYMBOL(uasm_resolve_relocs); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_resolve_relocs)); -void __uasminit -uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end, long off) +void ISAFUNC(uasm_move_relocs)(struct uasm_reloc *rel, u32 *first, u32 *end, +			       long off)  {  	for (; rel->lab != UASM_LABEL_INVALID; rel++)  		if (rel->addr >= first && rel->addr < end)  			rel->addr += off;  } -UASM_EXPORT_SYMBOL(uasm_move_relocs); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_relocs)); -void __uasminit -uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end, long off) +void ISAFUNC(uasm_move_labels)(struct uasm_label *lab, u32 *first, u32 *end, +			       long off)  {  	for (; lab->lab != UASM_LABEL_INVALID; lab++)  		if (lab->addr >= first && lab->addr < end)  			lab->addr += off;  } -UASM_EXPORT_SYMBOL(uasm_move_labels); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_labels)); -void __uasminit -uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab, u32 *first, -		  u32 *end, u32 *target) +void ISAFUNC(uasm_copy_handler)(struct uasm_reloc *rel, struct uasm_label *lab, +				u32 *first, u32 *end, u32 *target)  {  	long off = (long)(target - first);  	memcpy(target, first, (end - first) * sizeof(u32)); -	uasm_move_relocs(rel, first, end, off); -	uasm_move_labels(lab, first, end, off); +	ISAFUNC(uasm_move_relocs(rel, first, end, off)); +	ISAFUNC(uasm_move_labels(lab, first, end, off));  } -UASM_EXPORT_SYMBOL(uasm_copy_handler); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_copy_handler)); -int __uasminit uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr) +int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr)  {  	for (; rel->lab != UASM_LABEL_INVALID; rel++) {  		if (rel->addr == addr @@ -606,88 +490,92 @@ int __uasminit uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr)  	return 0;  } -UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_insn_has_bdelay));  /* Convenience functions for labeled branches. */ -void __uasminit -uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_bltz)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			   int lid) +{ +	uasm_r_mips_pc16(r, *p, lid); +	ISAFUNC(uasm_i_bltz)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bltz)); + +void ISAFUNC(uasm_il_b)(u32 **p, struct uasm_reloc **r, int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bltz(p, reg, 0); +	ISAFUNC(uasm_i_b)(p, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bltz); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b)); -void __uasminit -uasm_il_b(u32 **p, struct uasm_reloc **r, int lid) +void ISAFUNC(uasm_il_beq)(u32 **p, struct uasm_reloc **r, unsigned int r1, +			  unsigned int r2, int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_b(p, 0); +	ISAFUNC(uasm_i_beq)(p, r1, r2, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_b); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beq)); -void __uasminit -uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			   int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_beqz(p, reg, 0); +	ISAFUNC(uasm_i_beqz)(p, reg, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_beqz); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqz)); -void __uasminit -uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_beqzl)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			    int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_beqzl(p, reg, 0); +	ISAFUNC(uasm_i_beqzl)(p, reg, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_beqzl); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqzl)); -void __uasminit -uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1, -	unsigned int reg2, int lid) +void ISAFUNC(uasm_il_bne)(u32 **p, struct uasm_reloc **r, unsigned int reg1, +			  unsigned int reg2, int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bne(p, reg1, reg2, 0); +	ISAFUNC(uasm_i_bne)(p, reg1, reg2, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bne); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bne)); -void __uasminit -uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_bnez)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			   int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bnez(p, reg, 0); +	ISAFUNC(uasm_i_bnez)(p, reg, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bnez); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bnez)); -void __uasminit -uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_bgezl)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			    int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bgezl(p, reg, 0); +	ISAFUNC(uasm_i_bgezl)(p, reg, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bgezl); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgezl)); -void __uasminit -uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid) +void ISAFUNC(uasm_il_bgez)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			   int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bgez(p, reg, 0); +	ISAFUNC(uasm_i_bgez)(p, reg, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bgez); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgez)); -void __uasminit -uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg, -	      unsigned int bit, int lid) +void ISAFUNC(uasm_il_bbit0)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			    unsigned int bit, int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bbit0(p, reg, bit, 0); +	ISAFUNC(uasm_i_bbit0)(p, reg, bit, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bbit0); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit0)); -void __uasminit -uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg, -	      unsigned int bit, int lid) +void ISAFUNC(uasm_il_bbit1)(u32 **p, struct uasm_reloc **r, unsigned int reg, +			    unsigned int bit, int lid)  {  	uasm_r_mips_pc16(r, *p, lid); -	uasm_i_bbit1(p, reg, bit, 0); +	ISAFUNC(uasm_i_bbit1)(p, reg, bit, 0);  } -UASM_EXPORT_SYMBOL(uasm_il_bbit1); +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit1));  | 
