diff options
Diffstat (limited to 'arch/mips/lib')
| -rw-r--r-- | arch/mips/lib/Makefile | 24 | ||||
| -rw-r--r-- | arch/mips/lib/bitops.c | 179 | ||||
| -rw-r--r-- | arch/mips/lib/csum_partial.S | 325 | ||||
| -rw-r--r-- | arch/mips/lib/delay.c | 18 | ||||
| -rw-r--r-- | arch/mips/lib/dump_tlb.c | 6 | ||||
| -rw-r--r-- | arch/mips/lib/iomap-pci.c | 30 | ||||
| -rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 451 | ||||
| -rw-r--r-- | arch/mips/lib/memcpy.S | 427 | ||||
| -rw-r--r-- | arch/mips/lib/memset.S | 226 | ||||
| -rw-r--r-- | arch/mips/lib/mips-atomic.c | 161 | ||||
| -rw-r--r-- | arch/mips/lib/r3k_dump_tlb.c | 2 | ||||
| -rw-r--r-- | arch/mips/lib/strlen_user.S | 43 | ||||
| -rw-r--r-- | arch/mips/lib/strncpy_user.S | 63 | ||||
| -rw-r--r-- | arch/mips/lib/strnlen_user.S | 46 | ||||
| -rw-r--r-- | arch/mips/lib/uncached.c | 5 | 
15 files changed, 1114 insertions, 892 deletions
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 2adead5a8a3..eeddc58802e 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,32 +2,16 @@  # Makefile for MIPS-specific library files..  # -lib-y	+= csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \ -	   strlen_user.o strncpy_user.o strnlen_user.o uncached.o +lib-y	+= bitops.o csum_partial.o delay.o memcpy.o memset.o \ +	   mips-atomic.o strlen_user.o strncpy_user.o \ +	   strnlen_user.o uncached.o  obj-y			+= iomap.o  obj-$(CONFIG_PCI)	+= iomap-pci.o -obj-$(CONFIG_CPU_LOONGSON2)	+= dump_tlb.o -obj-$(CONFIG_CPU_MIPS32)	+= dump_tlb.o -obj-$(CONFIG_CPU_MIPS64)	+= dump_tlb.o -obj-$(CONFIG_CPU_NEVADA)	+= dump_tlb.o -obj-$(CONFIG_CPU_R10000)	+= dump_tlb.o +obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o  obj-$(CONFIG_CPU_R3000)		+= r3k_dump_tlb.o -obj-$(CONFIG_CPU_R4300)		+= dump_tlb.o -obj-$(CONFIG_CPU_R4X00)		+= dump_tlb.o -obj-$(CONFIG_CPU_R5000)		+= dump_tlb.o -obj-$(CONFIG_CPU_R5432)		+= dump_tlb.o -obj-$(CONFIG_CPU_R5500)		+= dump_tlb.o -obj-$(CONFIG_CPU_R6000)		+= -obj-$(CONFIG_CPU_R8000)		+= -obj-$(CONFIG_CPU_RM7000)	+= dump_tlb.o -obj-$(CONFIG_CPU_RM9000)	+= dump_tlb.o -obj-$(CONFIG_CPU_SB1)		+= dump_tlb.o  obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o -obj-$(CONFIG_CPU_TX49XX)	+= dump_tlb.o -obj-$(CONFIG_CPU_VR41XX)	+= dump_tlb.o -obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= dump_tlb.o  # libgcc-style stuff needed in the kernel  obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c new file mode 100644 index 00000000000..3b2a1e78a54 --- /dev/null +++ b/arch/mips/lib/bitops.c @@ -0,0 +1,179 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 1994-1997, 99, 2000, 06, 07 Ralf Baechle (ralf@linux-mips.org) + * Copyright (c) 1999, 2000  Silicon Graphics, Inc. + */ +#include <linux/bitops.h> +#include <linux/irqflags.h> +#include <linux/export.h> + + +/** + * __mips_set_bit - Atomically set a bit in memory.  This is called by + * set_bit() if it cannot find a faster solution. + * @nr: the bit to set + * @addr: the address to start counting from + */ +void __mips_set_bit(unsigned long nr, volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	*a |= mask; +	raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_set_bit); + + +/** + * __mips_clear_bit - Clears a bit in memory.  This is called by clear_bit() if + * it cannot find a faster solution. + * @nr: Bit to clear + * @addr: Address to start counting from + */ +void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	*a &= ~mask; +	raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_clear_bit); + + +/** + * __mips_change_bit - Toggle a bit in memory.	This is called by change_bit() + * if it cannot find a faster solution. + * @nr: Bit to change + * @addr: Address to start counting from + */ +void __mips_change_bit(unsigned long nr, volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	*a ^= mask; +	raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_change_bit); + + +/** + * __mips_test_and_set_bit - Set a bit and return its old value.  This is + * called by test_and_set_bit() if it cannot find a faster solution. + * @nr: Bit to set + * @addr: Address to count from + */ +int __mips_test_and_set_bit(unsigned long nr, +			    volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; +	int res; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	res = (mask & *a) != 0; +	*a |= mask; +	raw_local_irq_restore(flags); +	return res; +} +EXPORT_SYMBOL(__mips_test_and_set_bit); + + +/** + * __mips_test_and_set_bit_lock - Set a bit and return its old value.  This is + * called by test_and_set_bit_lock() if it cannot find a faster solution. + * @nr: Bit to set + * @addr: Address to count from + */ +int __mips_test_and_set_bit_lock(unsigned long nr, +				 volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; +	int res; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	res = (mask & *a) != 0; +	*a |= mask; +	raw_local_irq_restore(flags); +	return res; +} +EXPORT_SYMBOL(__mips_test_and_set_bit_lock); + + +/** + * __mips_test_and_clear_bit - Clear a bit and return its old value.  This is + * called by test_and_clear_bit() if it cannot find a faster solution. + * @nr: Bit to clear + * @addr: Address to count from + */ +int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; +	int res; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	res = (mask & *a) != 0; +	*a &= ~mask; +	raw_local_irq_restore(flags); +	return res; +} +EXPORT_SYMBOL(__mips_test_and_clear_bit); + + +/** + * __mips_test_and_change_bit - Change a bit and return its old value.	This is + * called by test_and_change_bit() if it cannot find a faster solution. + * @nr: Bit to change + * @addr: Address to count from + */ +int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +{ +	unsigned long *a = (unsigned long *)addr; +	unsigned bit = nr & SZLONG_MASK; +	unsigned long mask; +	unsigned long flags; +	int res; + +	a += nr >> SZLONG_LOG; +	mask = 1UL << bit; +	raw_local_irq_save(flags); +	res = (mask & *a) != 0; +	*a ^= mask; +	raw_local_irq_restore(flags); +	return res; +} +EXPORT_SYMBOL(__mips_test_and_change_bit); diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 6b876ca299e..9901237563c 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -8,6 +8,7 @@   * Copyright (C) 1998, 1999 Ralf Baechle   * Copyright (C) 1999 Silicon Graphics, Inc.   * Copyright (C) 2007  Maciej W. Rozycki + * Copyright (C) 2014 Imagination Technologies Ltd.   */  #include <linux/errno.h>  #include <asm/asm.h> @@ -55,20 +56,26 @@  #define UNIT(unit)  ((unit)*NBYTES)  #define ADDC(sum,reg)						\ +	.set	push;						\ +	.set	noat;						\  	ADD	sum, reg;					\  	sltu	v1, sum, reg;					\  	ADD	sum, v1;					\ +	.set	pop  #define ADDC32(sum,reg)						\ +	.set	push;						\ +	.set	noat;						\  	addu	sum, reg;					\  	sltu	v1, sum, reg;					\  	addu	sum, v1;					\ +	.set	pop  #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\  	LOAD	_t0, (offset + UNIT(0))(src);			\  	LOAD	_t1, (offset + UNIT(1))(src);			\ -	LOAD	_t2, (offset + UNIT(2))(src); 			\ -	LOAD	_t3, (offset + UNIT(3))(src); 			\ +	LOAD	_t2, (offset + UNIT(2))(src);			\ +	LOAD	_t3, (offset + UNIT(3))(src);			\  	ADDC(sum, _t0);						\  	ADDC(sum, _t1);						\  	ADDC(sum, _t2);						\ @@ -270,7 +277,7 @@ LEAF(csum_partial)  #endif  	/* odd buffer alignment? */ -#ifdef CPU_MIPSR2 +#ifdef CONFIG_CPU_MIPSR2  	wsbh	v1, sum  	movn	sum, v1, t7  #else @@ -285,7 +292,7 @@ LEAF(csum_partial)  1:  #endif  	.set	reorder -	/* Add the passed partial csum.  */ +	/* Add the passed partial csum.	 */  	ADDC32(sum, a2)  	jr	ra  	.set	noreorder @@ -296,9 +303,9 @@ LEAF(csum_partial)   * checksum and copy routines based on memcpy.S   *   *	csum_partial_copy_nocheck(src, dst, len, sum) - *	__csum_partial_copy_user(src, dst, len, sum, errp) + *	__csum_partial_copy_kernel(src, dst, len, sum, errp)   * - * See "Spec" in memcpy.S for details.  Unlike __copy_user, all + * See "Spec" in memcpy.S for details.	Unlike __copy_user, all   * function in this file use the standard calling convention.   */ @@ -327,20 +334,58 @@ LEAF(csum_partial)   * These handlers do not need to overwrite any data.   */ -#define EXC(inst_reg,addr,handler)		\ -9:	inst_reg, addr;				\ -	.section __ex_table,"a";		\ -	PTR	9b, handler;			\ -	.previous +/* Instruction type */ +#define LD_INSN 1 +#define ST_INSN 2 +#define LEGACY_MODE 1 +#define EVA_MODE    2 +#define USEROP   1 +#define KERNELOP 2 + +/* + * Wrapper to add an entry in the exception table + * in case the insn causes a memory exception. + * Arguments: + * insn    : Load/store instruction + * type    : Instruction type + * reg     : Register + * addr    : Address + * handler : Exception handler + */ +#define EXC(insn, type, reg, addr, handler)	\ +	.if \mode == LEGACY_MODE;		\ +9:		insn reg, addr;			\ +		.section __ex_table,"a";	\ +		PTR	9b, handler;		\ +		.previous;			\ +	/* This is enabled in EVA mode */	\ +	.else;					\ +		/* If loading from user or storing to user */	\ +		.if ((\from == USEROP) && (type == LD_INSN)) || \ +		    ((\to == USEROP) && (type == ST_INSN));	\ +9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\ +			.section __ex_table,"a";		\ +			PTR	9b, handler;			\ +			.previous;				\ +		.else;						\ +			/* EVA without exception */		\ +			insn reg, addr;				\ +		.endif;						\ +	.endif + +#undef LOAD  #ifdef USE_DOUBLE -#define LOAD   ld -#define LOADL  ldl -#define LOADR  ldr -#define STOREL sdl -#define STORER sdr -#define STORE  sd +#define LOADK	ld /* No exception */ +#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler) +#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler) +#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)  #define ADD    daddu  #define SUB    dsubu  #define SRL    dsrl @@ -352,12 +397,15 @@ LEAF(csum_partial)  #else -#define LOAD   lw -#define LOADL  lwl -#define LOADR  lwr -#define STOREL swl -#define STORER swr -#define STORE  sw +#define LOADK	lw /* No exception */ +#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler) +#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler) +#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)  #define ADD    addu  #define SUB    subu  #define SRL    srl @@ -371,16 +419,16 @@ LEAF(csum_partial)  #ifdef CONFIG_CPU_LITTLE_ENDIAN  #define LDFIRST LOADR -#define LDREST  LOADL +#define LDREST	LOADL  #define STFIRST STORER -#define STREST  STOREL +#define STREST	STOREL  #define SHIFT_DISCARD SLLV  #define SHIFT_DISCARD_REVERT SRLV  #else  #define LDFIRST LOADL -#define LDREST  LOADR +#define LDREST	LOADR  #define STFIRST STOREL -#define STREST  STORER +#define STREST	STORER  #define SHIFT_DISCARD SRLV  #define SHIFT_DISCARD_REVERT SLLV  #endif @@ -396,14 +444,20 @@ LEAF(csum_partial)  	.set	at=v1  #endif -LEAF(__csum_partial_copy_user) +	.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck +  	PTR_ADDU	AT, src, len	/* See (1) above. */ +	/* initialize __nocheck if this the first time we execute this +	 * macro +	 */  #ifdef CONFIG_64BIT  	move	errptr, a4  #else  	lw	errptr, 16(sp)  #endif -FEXPORT(csum_partial_copy_nocheck) +	.if \__nocheck == 1 +	FEXPORT(csum_partial_copy_nocheck) +	.endif  	move	sum, zero  	move	odd, zero  	/* @@ -419,48 +473,48 @@ FEXPORT(csum_partial_copy_nocheck)  	 */  	sltu	t2, len, NBYTES  	and	t1, dst, ADDRMASK -	bnez	t2, .Lcopy_bytes_checklen +	bnez	t2, .Lcopy_bytes_checklen\@  	 and	t0, src, ADDRMASK  	andi	odd, dst, 0x1			/* odd buffer? */ -	bnez	t1, .Ldst_unaligned +	bnez	t1, .Ldst_unaligned\@  	 nop -	bnez	t0, .Lsrc_unaligned_dst_aligned +	bnez	t0, .Lsrc_unaligned_dst_aligned\@  	/*  	 * use delay slot for fall-through  	 * src and dst are aligned; need to compute rem  	 */ -.Lboth_aligned: -	 SRL	t0, len, LOG_NBYTES+3    # +3 for 8 units/iter -	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES +.Lboth_aligned\@: +	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter +	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES  	 nop  	SUB	len, 8*NBYTES		# subtract here for bgez loop  	.align	4  1: -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) -EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy) -EXC(	LOAD	t5, UNIT(5)(src),	.Ll_exc_copy) -EXC(	LOAD	t6, UNIT(6)(src),	.Ll_exc_copy) -EXC(	LOAD	t7, UNIT(7)(src),	.Ll_exc_copy) +	LOAD(t0, UNIT(0)(src), .Ll_exc\@) +	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) +	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) +	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) +	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) +	LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) +	LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) +	LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@)  	SUB	len, len, 8*NBYTES  	ADD	src, src, 8*NBYTES -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc) +	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)  	ADDC(sum, t0) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc) +	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)  	ADDC(sum, t1) -EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc) +	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)  	ADDC(sum, t2) -EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc) +	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)  	ADDC(sum, t3) -EXC(	STORE	t4, UNIT(4)(dst),	.Ls_exc) +	STORE(t4, UNIT(4)(dst),	.Ls_exc\@)  	ADDC(sum, t4) -EXC(	STORE	t5, UNIT(5)(dst),	.Ls_exc) +	STORE(t5, UNIT(5)(dst),	.Ls_exc\@)  	ADDC(sum, t5) -EXC(	STORE	t6, UNIT(6)(dst),	.Ls_exc) +	STORE(t6, UNIT(6)(dst),	.Ls_exc\@)  	ADDC(sum, t6) -EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc) +	STORE(t7, UNIT(7)(dst),	.Ls_exc\@)  	ADDC(sum, t7)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 8*NBYTES @@ -471,44 +525,44 @@ EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc)  	/*  	 * len == the number of bytes left to copy < 8*NBYTES  	 */ -.Lcleanup_both_aligned: +.Lcleanup_both_aligned\@:  #define rem t7 -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	 sltu	t0, len, 4*NBYTES -	bnez	t0, .Lless_than_4units +	bnez	t0, .Lless_than_4units\@  	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES  	/*  	 * len >= 4*NBYTES  	 */ -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) +	LOAD(t0, UNIT(0)(src), .Ll_exc\@) +	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) +	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) +	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)  	SUB	len, len, 4*NBYTES  	ADD	src, src, 4*NBYTES -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc) +	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)  	ADDC(sum, t0) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc) +	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)  	ADDC(sum, t1) -EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc) +	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)  	ADDC(sum, t2) -EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc) +	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)  	ADDC(sum, t3)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 4*NBYTES -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	.set	noreorder -.Lless_than_4units: +.Lless_than_4units\@:  	/*  	 * rem = len % NBYTES  	 */ -	beq	rem, len, .Lcopy_bytes +	beq	rem, len, .Lcopy_bytes\@  	 nop  1: -EXC(	LOAD	t0, 0(src),		.Ll_exc) +	LOAD(t0, 0(src), .Ll_exc\@)  	ADD	src, src, NBYTES  	SUB	len, len, NBYTES -EXC(	STORE	t0, 0(dst),		.Ls_exc) +	STORE(t0, 0(dst), .Ls_exc\@)  	ADDC(sum, t0)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, NBYTES @@ -518,7 +572,7 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc)  	/*  	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)  	 * A loop would do only a byte at a time with possible branch -	 * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE +	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE  	 * because can't assume read-access to dst.  Instead, use  	 * STREST dst, which doesn't require read access to dst.  	 * @@ -527,20 +581,20 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc)  	 * more instruction-level parallelism.  	 */  #define bits t2 -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	 ADD	t1, dst, len	# t1 is just past last byte of dst  	li	bits, 8*NBYTES  	SLL	rem, len, 3	# rem = number of bits to keep -EXC(	LOAD	t0, 0(src),		.Ll_exc) -	SUB	bits, bits, rem	# bits = number of bits to discard +	LOAD(t0, 0(src), .Ll_exc\@) +	SUB	bits, bits, rem # bits = number of bits to discard  	SHIFT_DISCARD t0, t0, bits -EXC(	STREST	t0, -1(t1),		.Ls_exc) +	STREST(t0, -1(t1), .Ls_exc\@)  	SHIFT_DISCARD_REVERT t0, t0, bits  	.set reorder  	ADDC(sum, t0) -	b	.Ldone +	b	.Ldone\@  	.set noreorder -.Ldst_unaligned: +.Ldst_unaligned\@:  	/*  	 * dst is unaligned  	 * t0 = src & ADDRMASK @@ -551,26 +605,26 @@ EXC(	STREST	t0, -1(t1),		.Ls_exc)  	 * Set match = (src and dst have same alignment)  	 */  #define match rem -EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc) +	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)  	ADD	t2, zero, NBYTES -EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy) +	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)  	SUB	t2, t2, t1	# t2 = number of bytes copied  	xor	match, t0, t1 -EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc) +	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)  	SLL	t4, t1, 3		# t4 = number of bits to discard  	SHIFT_DISCARD t3, t3, t4  	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */  	ADDC(sum, t3) -	beq	len, t2, .Ldone +	beq	len, t2, .Ldone\@  	 SUB	len, len, t2  	ADD	dst, dst, t2 -	beqz	match, .Lboth_aligned +	beqz	match, .Lboth_aligned\@  	 ADD	src, src, t2 -.Lsrc_unaligned_dst_aligned: -	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter -	beqz	t0, .Lcleanup_src_unaligned -	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES +.Lsrc_unaligned_dst_aligned\@: +	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter +	beqz	t0, .Lcleanup_src_unaligned\@ +	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES  1:  /*   * Avoid consecutive LD*'s to the same register since some mips @@ -578,53 +632,53 @@ EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)   * It's OK to load FIRST(N+1) before REST(N) because the two addresses   * are to the same unit (unless src is aligned, but it's not).   */ -EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy) -	SUB     len, len, 4*NBYTES -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) -EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy) -EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy) -EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy) +	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) +	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) +	SUB	len, len, 4*NBYTES +	LDREST(t0, REST(0)(src), .Ll_exc_copy\@) +	LDREST(t1, REST(1)(src), .Ll_exc_copy\@) +	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) +	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) +	LDREST(t2, REST(2)(src), .Ll_exc_copy\@) +	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)  	ADD	src, src, 4*NBYTES  #ifdef CONFIG_CPU_SB1  	nop				# improves slotting  #endif -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc) +	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)  	ADDC(sum, t0) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc) +	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)  	ADDC(sum, t1) -EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc) +	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)  	ADDC(sum, t2) -EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc) +	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)  	ADDC(sum, t3)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 4*NBYTES  	bne	len, rem, 1b  	.set	noreorder -.Lcleanup_src_unaligned: -	beqz	len, .Ldone +.Lcleanup_src_unaligned\@: +	beqz	len, .Ldone\@  	 and	rem, len, NBYTES-1  # rem = len % NBYTES -	beq	rem, len, .Lcopy_bytes +	beq	rem, len, .Lcopy_bytes\@  	 nop  1: -EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) +	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) +	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)  	ADD	src, src, NBYTES  	SUB	len, len, NBYTES -EXC(	STORE	t0, 0(dst),		.Ls_exc) +	STORE(t0, 0(dst), .Ls_exc\@)  	ADDC(sum, t0)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, NBYTES  	bne	len, rem, 1b  	.set	noreorder -.Lcopy_bytes_checklen: -	beqz	len, .Ldone +.Lcopy_bytes_checklen\@: +	beqz	len, .Ldone\@  	 nop -.Lcopy_bytes: +.Lcopy_bytes\@:  	/* 0 < len < NBYTES  */  #ifdef CONFIG_CPU_LITTLE_ENDIAN  #define SHIFT_START 0 @@ -634,15 +688,15 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc)  #define SHIFT_INC -8  #endif  	move	t2, zero	# partial word -	li	t3, SHIFT_START	# shift +	li	t3, SHIFT_START # shift  /* use .Ll_exc_copy here to return correct sum on fault */  #define COPY_BYTE(N)			\ -EXC(	lbu	t0, N(src), .Ll_exc_copy);	\ +	LOADBU(t0, N(src), .Ll_exc_copy\@);	\  	SUB	len, len, 1;		\ -EXC(	sb	t0, N(dst), .Ls_exc);	\ +	STOREB(t0, N(dst), .Ls_exc\@);	\  	SLLV	t0, t0, t3;		\  	addu	t3, SHIFT_INC;		\ -	beqz	len, .Lcopy_bytes_done;	\ +	beqz	len, .Lcopy_bytes_done\@; \  	 or	t2, t0  	COPY_BYTE(0) @@ -653,15 +707,17 @@ EXC(	sb	t0, N(dst), .Ls_exc);	\  	COPY_BYTE(4)  	COPY_BYTE(5)  #endif -EXC(	lbu	t0, NBYTES-2(src), .Ll_exc_copy) +	LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@)  	SUB	len, len, 1 -EXC(	sb	t0, NBYTES-2(dst), .Ls_exc) +	STOREB(t0, NBYTES-2(dst), .Ls_exc\@)  	SLLV	t0, t0, t3  	or	t2, t0 -.Lcopy_bytes_done: +.Lcopy_bytes_done\@:  	ADDC(sum, t2) -.Ldone: +.Ldone\@:  	/* fold checksum */ +	.set	push +	.set	noat  #ifdef USE_DOUBLE  	dsll32	v1, sum, 0  	daddu	sum, v1 @@ -670,7 +726,7 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)  	addu	sum, v1  #endif -#ifdef CPU_MIPSR2 +#ifdef CONFIG_CPU_MIPSR2  	wsbh	v1, sum  	movn	sum, v1, odd  #else @@ -684,12 +740,13 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)  	or	sum, sum, t0  1:  #endif +	.set	pop  	.set reorder  	ADDC32(sum, psum)  	jr	ra  	.set noreorder -.Ll_exc_copy: +.Ll_exc_copy\@:  	/*  	 * Copy bytes from src until faulting load address (or until a  	 * lb faults) @@ -700,11 +757,11 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)  	 *  	 * Assumes src < THREAD_BUADDR($28)  	 */ -	LOAD	t0, TI_TASK($28) +	LOADK	t0, TI_TASK($28)  	 li	t2, SHIFT_START -	LOAD	t0, THREAD_BUADDR(t0) +	LOADK	t0, THREAD_BUADDR(t0)  1: -EXC(	lbu	t1, 0(src),	.Ll_exc) +	LOADBU(t1, 0(src), .Ll_exc\@)  	ADD	src, src, 1  	sb	t1, 0(dst)	# can't fault -- we're copy_from_user  	SLLV	t1, t1, t2 @@ -714,10 +771,10 @@ EXC(	lbu	t1, 0(src),	.Ll_exc)  	ADD	dst, dst, 1  	bne	src, t0, 1b  	.set	noreorder -.Ll_exc: -	LOAD	t0, TI_TASK($28) +.Ll_exc\@: +	LOADK	t0, TI_TASK($28)  	 nop -	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address +	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address  	 nop  	SUB	len, AT, t0		# len number of uncopied bytes  	/* @@ -733,7 +790,7 @@ EXC(	lbu	t1, 0(src),	.Ll_exc)  	 */  	.set	reorder				/* DADDI_WAR */  	SUB	src, len, 1 -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	.set	noreorder  1:	sb	zero, 0(dst)  	ADD	dst, dst, 1 @@ -748,13 +805,31 @@ EXC(	lbu	t1, 0(src),	.Ll_exc)  	 SUB	src, src, v1  #endif  	li	v1, -EFAULT -	b	.Ldone +	b	.Ldone\@  	 sw	v1, (errptr) -.Ls_exc: +.Ls_exc\@:  	li	v0, -1 /* invalid checksum */  	li	v1, -EFAULT  	jr	ra  	 sw	v1, (errptr)  	.set	pop -	END(__csum_partial_copy_user) +	.endm + +LEAF(__csum_partial_copy_kernel) +#ifndef CONFIG_EVA +FEXPORT(__csum_partial_copy_to_user) +FEXPORT(__csum_partial_copy_from_user) +#endif +__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 +END(__csum_partial_copy_kernel) + +#ifdef CONFIG_EVA +LEAF(__csum_partial_copy_to_user) +__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0 +END(__csum_partial_copy_to_user) + +LEAF(__csum_partial_copy_from_user) +__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0 +END(__csum_partial_copy_from_user) +#endif diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index 5995969e8c4..21d27c6819a 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -6,25 +6,33 @@   * Copyright (C) 1994 by Waldorf Electronics   * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle   * Copyright (C) 1999, 2000 Silicon Graphics, Inc. - * Copyright (C) 2007  Maciej W. Rozycki + * Copyright (C) 2007, 2014 Maciej W. Rozycki   */  #include <linux/module.h>  #include <linux/param.h>  #include <linux/smp.h> +#include <linux/stringify.h> +#include <asm/asm.h>  #include <asm/compiler.h>  #include <asm/war.h> -inline void __delay(unsigned int loops) +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS +#define GCC_DADDI_IMM_ASM() "I" +#else +#define GCC_DADDI_IMM_ASM() "r" +#endif + +void __delay(unsigned long loops)  {  	__asm__ __volatile__ (  	"	.set	noreorder				\n"  	"	.align	3					\n"  	"1:	bnez	%0, 1b					\n" -	"	subu	%0, 1					\n" +	"	 " __stringify(LONG_SUBU) "	%0, %1		\n"  	"	.set	reorder					\n"  	: "=r" (loops) -	: "0" (loops)); +	: GCC_DADDI_IMM_ASM() (1), "0" (loops));  }  EXPORT_SYMBOL(__delay); @@ -32,7 +40,7 @@ EXPORT_SYMBOL(__delay);   * Division by multiplication: you don't have to worry about   * loss of precision.   * - * Use only for very small delays ( < 1 msec).  Should probably use a + * Use only for very small delays ( < 1 msec).	Should probably use a   * lookup table, really, as the multiplications take much too long with   * short delays.  This is a "reasonable" implementation, though (and the   * first constant multiplications gets optimized away if the delay is diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c index 3f69725556a..32b9f21bfd8 100644 --- a/arch/mips/lib/dump_tlb.c +++ b/arch/mips/lib/dump_tlb.c @@ -50,8 +50,9 @@ static void dump_tlb(int first, int last)  {  	unsigned long s_entryhi, entryhi, asid;  	unsigned long long entrylo0, entrylo1; -	unsigned int s_index, pagemask, c0, c1, i; +	unsigned int s_index, s_pagemask, pagemask, c0, c1, i; +	s_pagemask = read_c0_pagemask();  	s_entryhi = read_c0_entryhi();  	s_index = read_c0_index();  	asid = s_entryhi & 0xff; @@ -62,7 +63,7 @@ static void dump_tlb(int first, int last)  		tlb_read();  		BARRIER();  		pagemask = read_c0_pagemask(); -		entryhi  = read_c0_entryhi(); +		entryhi	 = read_c0_entryhi();  		entrylo0 = read_c0_entrylo0();  		entrylo1 = read_c0_entrylo1(); @@ -103,6 +104,7 @@ static void dump_tlb(int first, int last)  	write_c0_entryhi(s_entryhi);  	write_c0_index(s_index); +	write_c0_pagemask(s_pagemask);  }  void dump_tlb_all(void) diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c index 2ab899c4b4c..fd35daa4531 100644 --- a/arch/mips/lib/iomap-pci.c +++ b/arch/mips/lib/iomap-pci.c @@ -10,8 +10,8 @@  #include <linux/module.h>  #include <asm/io.h> -static void __iomem *ioport_map_pci(struct pci_dev *dev, -                                     unsigned long port, unsigned int nr) +void __iomem *__pci_ioport_map(struct pci_dev *dev, +			       unsigned long port, unsigned int nr)  {  	struct pci_controller *ctrl = dev->bus->sysdata;  	unsigned long base = ctrl->io_map_base; @@ -40,32 +40,6 @@ static void __iomem *ioport_map_pci(struct pci_dev *dev,  	return (void __iomem *) (ctrl->io_map_base + port);  } -/* - * Create a virtual mapping cookie for a PCI BAR (memory or IO) - */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ -	resource_size_t start = pci_resource_start(dev, bar); -	resource_size_t len = pci_resource_len(dev, bar); -	unsigned long flags = pci_resource_flags(dev, bar); - -	if (!len || !start) -		return NULL; -	if (maxlen && len > maxlen) -		len = maxlen; -	if (flags & IORESOURCE_IO) -		return ioport_map_pci(dev, start, len); -	if (flags & IORESOURCE_MEM) { -		if (flags & IORESOURCE_CACHEABLE) -			return ioremap(start, len); -		return ioremap_nocache(start, len); -	} -	/* What? */ -	return NULL; -} - -EXPORT_SYMBOL(pci_iomap); -  void pci_iounmap(struct pci_dev *dev, void __iomem * addr)  {  	iounmap(addr); diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S deleted file mode 100644 index 68853a038d3..00000000000 --- a/arch/mips/lib/memcpy-inatomic.S +++ /dev/null @@ -1,451 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License.  See the file "COPYING" in the main directory of this archive - * for more details. - * - * Unified implementation of memcpy, memmove and the __copy_user backend. - * - * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. - * Copyright (C) 2002 Broadcom, Inc. - *   memcpy/copy_user author: Mark Vandevoorde - * Copyright (C) 2007  Maciej W. Rozycki - * - * Mnemonic names for arguments to memcpy/__copy_user - */ - -/* - * Hack to resolve longstanding prefetch issue - * - * Prefetching may be fatal on some systems if we're prefetching beyond the - * end of memory on some systems.  It's also a seriously bad idea on non - * dma-coherent systems. - */ -#ifdef CONFIG_DMA_NONCOHERENT -#undef CONFIG_CPU_HAS_PREFETCH -#endif -#ifdef CONFIG_MIPS_MALTA -#undef CONFIG_CPU_HAS_PREFETCH -#endif - -#include <asm/asm.h> -#include <asm/asm-offsets.h> -#include <asm/regdef.h> - -#define dst a0 -#define src a1 -#define len a2 - -/* - * Spec - * - * memcpy copies len bytes from src to dst and sets v0 to dst. - * It assumes that - *   - src and dst don't overlap - *   - src is readable - *   - dst is writable - * memcpy uses the standard calling convention - * - * __copy_user copies up to len bytes from src to dst and sets a2 (len) to - * the number of uncopied bytes due to an exception caused by a read or write. - * __copy_user assumes that src and dst don't overlap, and that the call is - * implementing one of the following: - *   copy_to_user - *     - src is readable  (no exceptions when reading src) - *   copy_from_user - *     - dst is writable  (no exceptions when writing dst) - * __copy_user uses a non-standard calling convention; see - * include/asm-mips/uaccess.h - * - * When an exception happens on a load, the handler must - # ensure that all of the destination buffer is overwritten to prevent - * leaking information to user mode programs. - */ - -/* - * Implementation - */ - -/* - * The exception handler for loads requires that: - *  1- AT contain the address of the byte just past the end of the source - *     of the copy, - *  2- src_entry <= src < AT, and - *  3- (dst - src) == (dst_entry - src_entry), - * The _entry suffix denotes values when __copy_user was called. - * - * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user - * (2) is met by incrementing src by the number of bytes copied - * (3) is met by not doing loads between a pair of increments of dst and src - * - * The exception handlers for stores adjust len (if necessary) and return. - * These handlers do not need to overwrite any data. - * - * For __rmemcpy and memmove an exception is always a kernel bug, therefore - * they're not protected. - */ - -#define EXC(inst_reg,addr,handler)		\ -9:	inst_reg, addr;				\ -	.section __ex_table,"a";		\ -	PTR	9b, handler;			\ -	.previous - -/* - * Only on the 64-bit kernel we can made use of 64-bit registers. - */ -#ifdef CONFIG_64BIT -#define USE_DOUBLE -#endif - -#ifdef USE_DOUBLE - -#define LOAD   ld -#define LOADL  ldl -#define LOADR  ldr -#define STOREL sdl -#define STORER sdr -#define STORE  sd -#define ADD    daddu -#define SUB    dsubu -#define SRL    dsrl -#define SRA    dsra -#define SLL    dsll -#define SLLV   dsllv -#define SRLV   dsrlv -#define NBYTES 8 -#define LOG_NBYTES 3 - -/* - * As we are sharing code base with the mips32 tree (which use the o32 ABI - * register definitions). We need to redefine the register definitions from - * the n64 ABI register naming to the o32 ABI register naming. - */ -#undef t0 -#undef t1 -#undef t2 -#undef t3 -#define t0	$8 -#define t1	$9 -#define t2	$10 -#define t3	$11 -#define t4	$12 -#define t5	$13 -#define t6	$14 -#define t7	$15 - -#else - -#define LOAD   lw -#define LOADL  lwl -#define LOADR  lwr -#define STOREL swl -#define STORER swr -#define STORE  sw -#define ADD    addu -#define SUB    subu -#define SRL    srl -#define SLL    sll -#define SRA    sra -#define SLLV   sllv -#define SRLV   srlv -#define NBYTES 4 -#define LOG_NBYTES 2 - -#endif /* USE_DOUBLE */ - -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define LDFIRST LOADR -#define LDREST  LOADL -#define STFIRST STORER -#define STREST  STOREL -#define SHIFT_DISCARD SLLV -#else -#define LDFIRST LOADL -#define LDREST  LOADR -#define STFIRST STOREL -#define STREST  STORER -#define SHIFT_DISCARD SRLV -#endif - -#define FIRST(unit) ((unit)*NBYTES) -#define REST(unit)  (FIRST(unit)+NBYTES-1) -#define UNIT(unit)  FIRST(unit) - -#define ADDRMASK (NBYTES-1) - -	.text -	.set	noreorder -#ifndef CONFIG_CPU_DADDI_WORKAROUNDS -	.set	noat -#else -	.set	at=v1 -#endif - -/* - * A combined memcpy/__copy_user - * __copy_user sets len to 0 for success; else to an upper bound of - * the number of uncopied bytes. - * memcpy sets v0 to dst. - */ -	.align	5 -LEAF(__copy_user_inatomic) -	/* -	 * Note: dst & src may be unaligned, len may be 0 -	 * Temps -	 */ -#define rem t8 - -	/* -	 * The "issue break"s below are very approximate. -	 * Issue delays for dcache fills will perturb the schedule, as will -	 * load queue full replay traps, etc. -	 * -	 * If len < NBYTES use byte operations. -	 */ -	PREF(	0, 0(src) ) -	PREF(	1, 0(dst) ) -	sltu	t2, len, NBYTES -	and	t1, dst, ADDRMASK -	PREF(	0, 1*32(src) ) -	PREF(	1, 1*32(dst) ) -	bnez	t2, .Lcopy_bytes_checklen -	 and	t0, src, ADDRMASK -	PREF(	0, 2*32(src) ) -	PREF(	1, 2*32(dst) ) -	bnez	t1, .Ldst_unaligned -	 nop -	bnez	t0, .Lsrc_unaligned_dst_aligned -	/* -	 * use delay slot for fall-through -	 * src and dst are aligned; need to compute rem -	 */ -.Lboth_aligned: -	 SRL	t0, len, LOG_NBYTES+3    	# +3 for 8 units/iter -	beqz	t0, .Lcleanup_both_aligned	# len < 8*NBYTES -	 and	rem, len, (8*NBYTES-1)	 	# rem = len % (8*NBYTES) -	PREF(	0, 3*32(src) ) -	PREF(	1, 3*32(dst) ) -	.align	4 -1: -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) -	SUB	len, len, 8*NBYTES -EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy) -EXC(	LOAD	t7, UNIT(5)(src),	.Ll_exc_copy) -	STORE	t0, UNIT(0)(dst) -	STORE	t1, UNIT(1)(dst) -EXC(	LOAD	t0, UNIT(6)(src),	.Ll_exc_copy) -EXC(	LOAD	t1, UNIT(7)(src),	.Ll_exc_copy) -	ADD	src, src, 8*NBYTES -	ADD	dst, dst, 8*NBYTES -	STORE	t2, UNIT(-6)(dst) -	STORE	t3, UNIT(-5)(dst) -	STORE	t4, UNIT(-4)(dst) -	STORE	t7, UNIT(-3)(dst) -	STORE	t0, UNIT(-2)(dst) -	STORE	t1, UNIT(-1)(dst) -	PREF(	0, 8*32(src) ) -	PREF(	1, 8*32(dst) ) -	bne	len, rem, 1b -	 nop - -	/* -	 * len == rem == the number of bytes left to copy < 8*NBYTES -	 */ -.Lcleanup_both_aligned: -	beqz	len, .Ldone -	 sltu	t0, len, 4*NBYTES -	bnez	t0, .Lless_than_4units -	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES -	/* -	 * len >= 4*NBYTES -	 */ -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) -	SUB	len, len, 4*NBYTES -	ADD	src, src, 4*NBYTES -	STORE	t0, UNIT(0)(dst) -	STORE	t1, UNIT(1)(dst) -	STORE	t2, UNIT(2)(dst) -	STORE	t3, UNIT(3)(dst) -	.set	reorder				/* DADDI_WAR */ -	ADD	dst, dst, 4*NBYTES -	beqz	len, .Ldone -	.set	noreorder -.Lless_than_4units: -	/* -	 * rem = len % NBYTES -	 */ -	beq	rem, len, .Lcopy_bytes -	 nop -1: -EXC(	LOAD	t0, 0(src),		.Ll_exc) -	ADD	src, src, NBYTES -	SUB	len, len, NBYTES -	STORE	t0, 0(dst) -	.set	reorder				/* DADDI_WAR */ -	ADD	dst, dst, NBYTES -	bne	rem, len, 1b -	.set	noreorder - -	/* -	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) -	 * A loop would do only a byte at a time with possible branch -	 * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE -	 * because can't assume read-access to dst.  Instead, use -	 * STREST dst, which doesn't require read access to dst. -	 * -	 * This code should perform better than a simple loop on modern, -	 * wide-issue mips processors because the code has fewer branches and -	 * more instruction-level parallelism. -	 */ -#define bits t2 -	beqz	len, .Ldone -	 ADD	t1, dst, len	# t1 is just past last byte of dst -	li	bits, 8*NBYTES -	SLL	rem, len, 3	# rem = number of bits to keep -EXC(	LOAD	t0, 0(src),		.Ll_exc) -	SUB	bits, bits, rem	# bits = number of bits to discard -	SHIFT_DISCARD t0, t0, bits -	STREST	t0, -1(t1) -	jr	ra -	 move	len, zero -.Ldst_unaligned: -	/* -	 * dst is unaligned -	 * t0 = src & ADDRMASK -	 * t1 = dst & ADDRMASK; T1 > 0 -	 * len >= NBYTES -	 * -	 * Copy enough bytes to align dst -	 * Set match = (src and dst have same alignment) -	 */ -#define match rem -EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc) -	ADD	t2, zero, NBYTES -EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy) -	SUB	t2, t2, t1	# t2 = number of bytes copied -	xor	match, t0, t1 -	STFIRST t3, FIRST(0)(dst) -	beq	len, t2, .Ldone -	 SUB	len, len, t2 -	ADD	dst, dst, t2 -	beqz	match, .Lboth_aligned -	 ADD	src, src, t2 - -.Lsrc_unaligned_dst_aligned: -	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter -	PREF(	0, 3*32(src) ) -	beqz	t0, .Lcleanup_src_unaligned -	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES -	PREF(	1, 3*32(dst) ) -1: -/* - * Avoid consecutive LD*'s to the same register since some mips - * implementations can't issue them in the same cycle. - * It's OK to load FIRST(N+1) before REST(N) because the two addresses - * are to the same unit (unless src is aligned, but it's not). - */ -EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy) -	SUB     len, len, 4*NBYTES -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) -EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy) -EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy) -EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy) -	PREF(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed) -	ADD	src, src, 4*NBYTES -#ifdef CONFIG_CPU_SB1 -	nop				# improves slotting -#endif -	STORE	t0, UNIT(0)(dst) -	STORE	t1, UNIT(1)(dst) -	STORE	t2, UNIT(2)(dst) -	STORE	t3, UNIT(3)(dst) -	PREF(	1, 9*32(dst) )     	# 1 is PREF_STORE (not streamed) -	.set	reorder				/* DADDI_WAR */ -	ADD	dst, dst, 4*NBYTES -	bne	len, rem, 1b -	.set	noreorder - -.Lcleanup_src_unaligned: -	beqz	len, .Ldone -	 and	rem, len, NBYTES-1  # rem = len % NBYTES -	beq	rem, len, .Lcopy_bytes -	 nop -1: -EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) -	ADD	src, src, NBYTES -	SUB	len, len, NBYTES -	STORE	t0, 0(dst) -	.set	reorder				/* DADDI_WAR */ -	ADD	dst, dst, NBYTES -	bne	len, rem, 1b -	.set	noreorder - -.Lcopy_bytes_checklen: -	beqz	len, .Ldone -	 nop -.Lcopy_bytes: -	/* 0 < len < NBYTES  */ -#define COPY_BYTE(N)			\ -EXC(	lb	t0, N(src), .Ll_exc);	\ -	SUB	len, len, 1;		\ -	beqz	len, .Ldone;		\ -	 sb	t0, N(dst) - -	COPY_BYTE(0) -	COPY_BYTE(1) -#ifdef USE_DOUBLE -	COPY_BYTE(2) -	COPY_BYTE(3) -	COPY_BYTE(4) -	COPY_BYTE(5) -#endif -EXC(	lb	t0, NBYTES-2(src), .Ll_exc) -	SUB	len, len, 1 -	jr	ra -	 sb	t0, NBYTES-2(dst) -.Ldone: -	jr	ra -	 nop -	END(__copy_user_inatomic) - -.Ll_exc_copy: -	/* -	 * Copy bytes from src until faulting load address (or until a -	 * lb faults) -	 * -	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) -	 * may be more than a byte beyond the last address. -	 * Hence, the lb below may get an exception. -	 * -	 * Assumes src < THREAD_BUADDR($28) -	 */ -	LOAD	t0, TI_TASK($28) -	 nop -	LOAD	t0, THREAD_BUADDR(t0) -1: -EXC(	lb	t1, 0(src),	.Ll_exc) -	ADD	src, src, 1 -	sb	t1, 0(dst)	# can't fault -- we're copy_from_user -	.set	reorder				/* DADDI_WAR */ -	ADD	dst, dst, 1 -	bne	src, t0, 1b -	.set	noreorder -.Ll_exc: -	LOAD	t0, TI_TASK($28) -	 nop -	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address -	 nop -	SUB	len, AT, t0		# len number of uncopied bytes -	jr	ra -	 nop diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 56a1f85a1ce..c17ef80cf65 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -10,6 +10,7 @@   * Copyright (C) 2002 Broadcom, Inc.   *   memcpy/copy_user author: Mark Vandevoorde   * Copyright (C) 2007  Maciej W. Rozycki + * Copyright (C) 2014 Imagination Technologies Ltd.   *   * Mnemonic names for arguments to memcpy/__copy_user   */ @@ -85,11 +86,51 @@   * they're not protected.   */ -#define EXC(inst_reg,addr,handler)		\ -9:	inst_reg, addr;				\ -	.section __ex_table,"a";		\ -	PTR	9b, handler;			\ -	.previous +/* Instruction type */ +#define LD_INSN 1 +#define ST_INSN 2 +/* Pretech type */ +#define SRC_PREFETCH 1 +#define DST_PREFETCH 2 +#define LEGACY_MODE 1 +#define EVA_MODE    2 +#define USEROP   1 +#define KERNELOP 2 + +/* + * Wrapper to add an entry in the exception table + * in case the insn causes a memory exception. + * Arguments: + * insn    : Load/store instruction + * type    : Instruction type + * reg     : Register + * addr    : Address + * handler : Exception handler + */ + +#define EXC(insn, type, reg, addr, handler)			\ +	.if \mode == LEGACY_MODE;				\ +9:		insn reg, addr;					\ +		.section __ex_table,"a";			\ +		PTR	9b, handler;				\ +		.previous;					\ +	/* This is assembled in EVA mode */			\ +	.else;							\ +		/* If loading from user or storing to user */	\ +		.if ((\from == USEROP) && (type == LD_INSN)) || \ +		    ((\to == USEROP) && (type == ST_INSN));	\ +9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\ +			.section __ex_table,"a";		\ +			PTR	9b, handler;			\ +			.previous;				\ +		.else;						\ +			/*					\ +			 *  Still in EVA, but no need for	\ +			 * exception handler or EVA insn	\ +			 */					\ +			insn reg, addr;				\ +		.endif;						\ +	.endif  /*   * Only on the 64-bit kernel we can made use of 64-bit registers. @@ -100,12 +141,13 @@  #ifdef USE_DOUBLE -#define LOAD   ld -#define LOADL  ldl -#define LOADR  ldr -#define STOREL sdl -#define STORER sdr -#define STORE  sd +#define LOADK ld /* No exception */ +#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)  #define ADD    daddu  #define SUB    dsubu  #define SRL    dsrl @@ -136,12 +178,13 @@  #else -#define LOAD   lw -#define LOADL  lwl -#define LOADR  lwr -#define STOREL swl -#define STORER swr -#define STORE  sw +#define LOADK lw /* No exception */ +#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler) +#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler) +#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler) +#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler) +#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler) +#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)  #define ADD    addu  #define SUB    subu  #define SRL    srl @@ -154,17 +197,44 @@  #endif /* USE_DOUBLE */ +#define LOADB(reg, addr, handler)	EXC(lb, LD_INSN, reg, addr, handler) +#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler) + +#define _PREF(hint, addr, type)						\ +	.if \mode == LEGACY_MODE;					\ +		PREF(hint, addr);					\ +	.else;								\ +		.if ((\from == USEROP) && (type == SRC_PREFETCH)) ||	\ +		    ((\to == USEROP) && (type == DST_PREFETCH));	\ +			/*						\ +			 * PREFE has only 9 bits for the offset		\ +			 * compared to PREF which has 16, so it may	\ +			 * need to use the $at register but this	\ +			 * register should remain intact because it's	\ +			 * used later on. Therefore use $v1.		\ +			 */						\ +			.set at=v1;					\ +			PREFE(hint, addr);				\ +			.set noat;					\ +		.else;							\ +			PREF(hint, addr);				\ +		.endif;							\ +	.endif + +#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) +#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) +  #ifdef CONFIG_CPU_LITTLE_ENDIAN  #define LDFIRST LOADR -#define LDREST  LOADL +#define LDREST	LOADL  #define STFIRST STORER -#define STREST  STOREL +#define STREST	STOREL  #define SHIFT_DISCARD SLLV  #else  #define LDFIRST LOADL -#define LDREST  LOADR +#define LDREST	LOADR  #define STFIRST STOREL -#define STREST  STORER +#define STREST	STORER  #define SHIFT_DISCARD SRLV  #endif @@ -182,17 +252,23 @@  	.set	at=v1  #endif -/* - * A combined memcpy/__copy_user - * __copy_user sets len to 0 for success; else to an upper bound of - * the number of uncopied bytes. - * memcpy sets v0 to dst. - */  	.align	5 -LEAF(memcpy)					/* a0=dst a1=src a2=len */ -	move	v0, dst				/* return value */ -.L__memcpy: -FEXPORT(__copy_user) + +	/* +	 * Macro to build the __copy_user common code +	 * Arguements: +	 * mode : LEGACY_MODE or EVA_MODE +	 * from : Source operand. USEROP or KERNELOP +	 * to   : Destination operand. USEROP or KERNELOP +	 */ +	.macro __BUILD_COPY_USER mode, from, to + +	/* initialize __memcpy if this the first time we execute this macro */ +	.ifnotdef __memcpy +	.set __memcpy, 1 +	.hidden __memcpy /* make sure it does not leak */ +	.endif +  	/*  	 * Note: dst & src may be unaligned, len may be 0  	 * Temps @@ -207,94 +283,94 @@ FEXPORT(__copy_user)  	 *  	 * If len < NBYTES use byte operations.  	 */ -	PREF(	0, 0(src) ) -	PREF(	1, 0(dst) ) +	PREFS(	0, 0(src) ) +	PREFD(	1, 0(dst) )  	sltu	t2, len, NBYTES  	and	t1, dst, ADDRMASK -	PREF(	0, 1*32(src) ) -	PREF(	1, 1*32(dst) ) -	bnez	t2, .Lcopy_bytes_checklen +	PREFS(	0, 1*32(src) ) +	PREFD(	1, 1*32(dst) ) +	bnez	t2, .Lcopy_bytes_checklen\@  	 and	t0, src, ADDRMASK -	PREF(	0, 2*32(src) ) -	PREF(	1, 2*32(dst) ) -	bnez	t1, .Ldst_unaligned +	PREFS(	0, 2*32(src) ) +	PREFD(	1, 2*32(dst) ) +	bnez	t1, .Ldst_unaligned\@  	 nop -	bnez	t0, .Lsrc_unaligned_dst_aligned +	bnez	t0, .Lsrc_unaligned_dst_aligned\@  	/*  	 * use delay slot for fall-through  	 * src and dst are aligned; need to compute rem  	 */ -.Lboth_aligned: -	 SRL	t0, len, LOG_NBYTES+3    # +3 for 8 units/iter -	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES +.Lboth_aligned\@: +	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter +	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES  	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES) -	PREF(	0, 3*32(src) ) -	PREF(	1, 3*32(dst) ) +	PREFS(	0, 3*32(src) ) +	PREFD(	1, 3*32(dst) )  	.align	4  1:  	R10KCBARRIER(0(ra)) -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) +	LOAD(t0, UNIT(0)(src), .Ll_exc\@) +	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) +	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) +	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)  	SUB	len, len, 8*NBYTES -EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy) -EXC(	LOAD	t7, UNIT(5)(src),	.Ll_exc_copy) -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p8u) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p7u) -EXC(	LOAD	t0, UNIT(6)(src),	.Ll_exc_copy) -EXC(	LOAD	t1, UNIT(7)(src),	.Ll_exc_copy) +	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) +	LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@) +	STORE(t0, UNIT(0)(dst),	.Ls_exc_p8u\@) +	STORE(t1, UNIT(1)(dst),	.Ls_exc_p7u\@) +	LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@) +	LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)  	ADD	src, src, 8*NBYTES  	ADD	dst, dst, 8*NBYTES -EXC(	STORE	t2, UNIT(-6)(dst),	.Ls_exc_p6u) -EXC(	STORE	t3, UNIT(-5)(dst),	.Ls_exc_p5u) -EXC(	STORE	t4, UNIT(-4)(dst),	.Ls_exc_p4u) -EXC(	STORE	t7, UNIT(-3)(dst),	.Ls_exc_p3u) -EXC(	STORE	t0, UNIT(-2)(dst),	.Ls_exc_p2u) -EXC(	STORE	t1, UNIT(-1)(dst),	.Ls_exc_p1u) -	PREF(	0, 8*32(src) ) -	PREF(	1, 8*32(dst) ) +	STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@) +	STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@) +	STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@) +	STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@) +	STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@) +	STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@) +	PREFS(	0, 8*32(src) ) +	PREFD(	1, 8*32(dst) )  	bne	len, rem, 1b  	 nop  	/*  	 * len == rem == the number of bytes left to copy < 8*NBYTES  	 */ -.Lcleanup_both_aligned: -	beqz	len, .Ldone +.Lcleanup_both_aligned\@: +	beqz	len, .Ldone\@  	 sltu	t0, len, 4*NBYTES -	bnez	t0, .Lless_than_4units +	bnez	t0, .Lless_than_4units\@  	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES  	/*  	 * len >= 4*NBYTES  	 */ -EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc) -EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy) -EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy) -EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy) +	LOAD( t0, UNIT(0)(src),	.Ll_exc\@) +	LOAD( t1, UNIT(1)(src),	.Ll_exc_copy\@) +	LOAD( t2, UNIT(2)(src),	.Ll_exc_copy\@) +	LOAD( t3, UNIT(3)(src),	.Ll_exc_copy\@)  	SUB	len, len, 4*NBYTES  	ADD	src, src, 4*NBYTES  	R10KCBARRIER(0(ra)) -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p4u) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p3u) -EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc_p2u) -EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc_p1u) +	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@) +	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@) +	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@) +	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 4*NBYTES -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	.set	noreorder -.Lless_than_4units: +.Lless_than_4units\@:  	/*  	 * rem = len % NBYTES  	 */ -	beq	rem, len, .Lcopy_bytes +	beq	rem, len, .Lcopy_bytes\@  	 nop  1:  	R10KCBARRIER(0(ra)) -EXC(	LOAD	t0, 0(src),		.Ll_exc) +	LOAD(t0, 0(src), .Ll_exc\@)  	ADD	src, src, NBYTES  	SUB	len, len, NBYTES -EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u) +	STORE(t0, 0(dst), .Ls_exc_p1u\@)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, NBYTES  	bne	rem, len, 1b @@ -303,7 +379,7 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u)  	/*  	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)  	 * A loop would do only a byte at a time with possible branch -	 * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE +	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE  	 * because can't assume read-access to dst.  Instead, use  	 * STREST dst, which doesn't require read access to dst.  	 * @@ -312,17 +388,17 @@ EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u)  	 * more instruction-level parallelism.  	 */  #define bits t2 -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	 ADD	t1, dst, len	# t1 is just past last byte of dst  	li	bits, 8*NBYTES  	SLL	rem, len, 3	# rem = number of bits to keep -EXC(	LOAD	t0, 0(src),		.Ll_exc) -	SUB	bits, bits, rem	# bits = number of bits to discard +	LOAD(t0, 0(src), .Ll_exc\@) +	SUB	bits, bits, rem # bits = number of bits to discard  	SHIFT_DISCARD t0, t0, bits -EXC(	STREST	t0, -1(t1),		.Ls_exc) +	STREST(t0, -1(t1), .Ls_exc\@)  	jr	ra  	 move	len, zero -.Ldst_unaligned: +.Ldst_unaligned\@:  	/*  	 * dst is unaligned  	 * t0 = src & ADDRMASK @@ -333,25 +409,25 @@ EXC(	STREST	t0, -1(t1),		.Ls_exc)  	 * Set match = (src and dst have same alignment)  	 */  #define match rem -EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc) +	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)  	ADD	t2, zero, NBYTES -EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy) +	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)  	SUB	t2, t2, t1	# t2 = number of bytes copied  	xor	match, t0, t1  	R10KCBARRIER(0(ra)) -EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc) -	beq	len, t2, .Ldone +	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) +	beq	len, t2, .Ldone\@  	 SUB	len, len, t2  	ADD	dst, dst, t2 -	beqz	match, .Lboth_aligned +	beqz	match, .Lboth_aligned\@  	 ADD	src, src, t2 -.Lsrc_unaligned_dst_aligned: -	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter -	PREF(	0, 3*32(src) ) -	beqz	t0, .Lcleanup_src_unaligned -	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES -	PREF(	1, 3*32(dst) ) +.Lsrc_unaligned_dst_aligned\@: +	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter +	PREFS(	0, 3*32(src) ) +	beqz	t0, .Lcleanup_src_unaligned\@ +	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES +	PREFD(	1, 3*32(dst) )  1:  /*   * Avoid consecutive LD*'s to the same register since some mips @@ -360,58 +436,58 @@ EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)   * are to the same unit (unless src is aligned, but it's not).   */  	R10KCBARRIER(0(ra)) -EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy) -	SUB     len, len, 4*NBYTES -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) -EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy) -EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy) -EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy) -EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy) -	PREF(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed) +	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) +	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) +	SUB	len, len, 4*NBYTES +	LDREST(t0, REST(0)(src), .Ll_exc_copy\@) +	LDREST(t1, REST(1)(src), .Ll_exc_copy\@) +	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) +	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) +	LDREST(t2, REST(2)(src), .Ll_exc_copy\@) +	LDREST(t3, REST(3)(src), .Ll_exc_copy\@) +	PREFS(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)  	ADD	src, src, 4*NBYTES  #ifdef CONFIG_CPU_SB1  	nop				# improves slotting  #endif -EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p4u) -EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p3u) -EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc_p2u) -EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc_p1u) -	PREF(	1, 9*32(dst) )     	# 1 is PREF_STORE (not streamed) +	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@) +	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@) +	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@) +	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@) +	PREFD(	1, 9*32(dst) )		# 1 is PREF_STORE (not streamed)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 4*NBYTES  	bne	len, rem, 1b  	.set	noreorder -.Lcleanup_src_unaligned: -	beqz	len, .Ldone +.Lcleanup_src_unaligned\@: +	beqz	len, .Ldone\@  	 and	rem, len, NBYTES-1  # rem = len % NBYTES -	beq	rem, len, .Lcopy_bytes +	beq	rem, len, .Lcopy_bytes\@  	 nop  1:  	R10KCBARRIER(0(ra)) -EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc) -EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy) +	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) +	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)  	ADD	src, src, NBYTES  	SUB	len, len, NBYTES -EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u) +	STORE(t0, 0(dst), .Ls_exc_p1u\@)  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, NBYTES  	bne	len, rem, 1b  	.set	noreorder -.Lcopy_bytes_checklen: -	beqz	len, .Ldone +.Lcopy_bytes_checklen\@: +	beqz	len, .Ldone\@  	 nop -.Lcopy_bytes: +.Lcopy_bytes\@:  	/* 0 < len < NBYTES  */  	R10KCBARRIER(0(ra))  #define COPY_BYTE(N)			\ -EXC(	lb	t0, N(src), .Ll_exc);	\ +	LOADB(t0, N(src), .Ll_exc\@);	\  	SUB	len, len, 1;		\ -	beqz	len, .Ldone;		\ -EXC(	 sb	t0, N(dst), .Ls_exc_p1) +	beqz	len, .Ldone\@;		\ +	STOREB(t0, N(dst), .Ls_exc_p1\@)  	COPY_BYTE(0)  	COPY_BYTE(1) @@ -421,16 +497,19 @@ EXC(	 sb	t0, N(dst), .Ls_exc_p1)  	COPY_BYTE(4)  	COPY_BYTE(5)  #endif -EXC(	lb	t0, NBYTES-2(src), .Ll_exc) +	LOADB(t0, NBYTES-2(src), .Ll_exc\@)  	SUB	len, len, 1  	jr	ra -EXC(	 sb	t0, NBYTES-2(dst), .Ls_exc_p1) -.Ldone: +	STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) +.Ldone\@:  	jr	ra -	 nop +	.if __memcpy == 1  	END(memcpy) +	.set __memcpy, 0 +	.hidden __memcpy +	.endif -.Ll_exc_copy: +.Ll_exc_copy\@:  	/*  	 * Copy bytes from src until faulting load address (or until a  	 * lb faults) @@ -441,23 +520,24 @@ EXC(	 sb	t0, NBYTES-2(dst), .Ls_exc_p1)  	 *  	 * Assumes src < THREAD_BUADDR($28)  	 */ -	LOAD	t0, TI_TASK($28) +	LOADK	t0, TI_TASK($28)  	 nop -	LOAD	t0, THREAD_BUADDR(t0) +	LOADK	t0, THREAD_BUADDR(t0)  1: -EXC(	lb	t1, 0(src),	.Ll_exc) +	LOADB(t1, 0(src), .Ll_exc\@)  	ADD	src, src, 1  	sb	t1, 0(dst)	# can't fault -- we're copy_from_user  	.set	reorder				/* DADDI_WAR */  	ADD	dst, dst, 1  	bne	src, t0, 1b  	.set	noreorder -.Ll_exc: -	LOAD	t0, TI_TASK($28) +.Ll_exc\@: +	LOADK	t0, TI_TASK($28)  	 nop -	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address +	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address  	 nop  	SUB	len, AT, t0		# len number of uncopied bytes +	bnez	t6, .Ldone\@	/* Skip the zeroing part if inatomic */  	/*  	 * Here's where we rely on src and dst being incremented in tandem,  	 *   See (3) above. @@ -471,7 +551,7 @@ EXC(	lb	t1, 0(src),	.Ll_exc)  	 */  	.set	reorder				/* DADDI_WAR */  	SUB	src, len, 1 -	beqz	len, .Ldone +	beqz	len, .Ldone\@  	.set	noreorder  1:	sb	zero, 0(dst)  	ADD	dst, dst, 1 @@ -491,8 +571,8 @@ EXC(	lb	t1, 0(src),	.Ll_exc)  #define SEXC(n)							\ -	.set	reorder;			/* DADDI_WAR */	\ -.Ls_exc_p ## n ## u:						\ +	.set	reorder;			/* DADDI_WAR */ \ +.Ls_exc_p ## n ## u\@:						\  	ADD	len, len, n*NBYTES;				\  	jr	ra;						\  	.set	noreorder @@ -506,14 +586,15 @@ SEXC(3)  SEXC(2)  SEXC(1) -.Ls_exc_p1: +.Ls_exc_p1\@:  	.set	reorder				/* DADDI_WAR */  	ADD	len, len, 1  	jr	ra  	.set	noreorder -.Ls_exc: +.Ls_exc\@:  	jr	ra  	 nop +	.endm  	.align	5  LEAF(memmove) @@ -564,3 +645,71 @@ LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */  	jr	ra  	 move	a2, zero  	END(__rmemcpy) + +/* + * t6 is used as a flag to note inatomic mode. + */ +LEAF(__copy_user_inatomic) +	b	__copy_user_common +	li	t6, 1 +	END(__copy_user_inatomic) + +/* + * A combined memcpy/__copy_user + * __copy_user sets len to 0 for success; else to an upper bound of + * the number of uncopied bytes. + * memcpy sets v0 to dst. + */ +	.align	5 +LEAF(memcpy)					/* a0=dst a1=src a2=len */ +	move	v0, dst				/* return value */ +.L__memcpy: +FEXPORT(__copy_user) +	li	t6, 0	/* not inatomic */ +__copy_user_common: +	/* Legacy Mode, user <-> user */ +	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP + +#ifdef CONFIG_EVA + +/* + * For EVA we need distinct symbols for reading and writing to user space. + * This is because we need to use specific EVA instructions to perform the + * virtual <-> physical translation when a virtual address is actually in user + * space + */ + +LEAF(__copy_user_inatomic_eva) +	b       __copy_from_user_common +	li	t6, 1 +	END(__copy_user_inatomic_eva) + +/* + * __copy_from_user (EVA) + */ + +LEAF(__copy_from_user_eva) +	li	t6, 0	/* not inatomic */ +__copy_from_user_common: +	__BUILD_COPY_USER EVA_MODE USEROP KERNELOP +END(__copy_from_user_eva) + + + +/* + * __copy_to_user (EVA) + */ + +LEAF(__copy_to_user_eva) +__BUILD_COPY_USER EVA_MODE KERNELOP USEROP +END(__copy_to_user_eva) + +/* + * __copy_in_user (EVA) + */ + +LEAF(__copy_in_user_eva) +__BUILD_COPY_USER EVA_MODE USEROP USEROP +END(__copy_in_user_eva) + +#endif diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 77dc3b20110..7b0e5462ca5 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -5,7 +5,8 @@   *   * Copyright (C) 1998, 1999, 2000 by Ralf Baechle   * Copyright (C) 1999, 2000 Silicon Graphics, Inc. - * Copyright (C) 2007  Maciej W. Rozycki + * Copyright (C) 2007 by Maciej W. Rozycki + * Copyright (C) 2011, 2012 MIPS Technologies, Inc.   */  #include <asm/asm.h>  #include <asm/asm-offsets.h> @@ -19,162 +20,229 @@  #define LONG_S_R sdr  #endif +#ifdef CONFIG_CPU_MICROMIPS +#define STORSIZE (LONGSIZE * 2) +#define STORMASK (STORSIZE - 1) +#define FILL64RG t8 +#define FILLPTRG t7 +#undef  LONG_S +#define LONG_S LONG_SP +#else +#define STORSIZE LONGSIZE +#define STORMASK LONGMASK +#define FILL64RG a1 +#define FILLPTRG t0 +#endif + +#define LEGACY_MODE 1 +#define EVA_MODE    2 + +/* + * No need to protect it with EVA #ifdefery. The generated block of code + * will never be assembled if EVA is not enabled. + */ +#define __EVAFY(insn, reg, addr) __BUILD_EVA_INSN(insn##e, reg, addr) +#define ___BUILD_EVA_INSN(insn, reg, addr) __EVAFY(insn, reg, addr) +  #define EX(insn,reg,addr,handler)			\ -9:	insn	reg, addr;				\ -	.section __ex_table,"a"; 			\ -	PTR	9b, handler; 				\ +	.if \mode == LEGACY_MODE;			\ +9:		insn	reg, addr;			\ +	.else;						\ +9:		___BUILD_EVA_INSN(insn, reg, addr);	\ +	.endif;						\ +	.section __ex_table,"a";			\ +	PTR	9b, handler;				\  	.previous -	.macro	f_fill64 dst, offset, val, fixup -	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup) -#if LONGSIZE == 4 -	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) -	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) +	.macro	f_fill64 dst, offset, val, fixup, mode +	EX(LONG_S, \val, (\offset +  0 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  1 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  2 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  3 * STORSIZE)(\dst), \fixup) +#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS)) +	EX(LONG_S, \val, (\offset +  4 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  5 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  6 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  7 * STORSIZE)(\dst), \fixup) +#endif +#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) +	EX(LONG_S, \val, (\offset +  8 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset +  9 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup) +	EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)  #endif  	.endm -/* - * memset(void *s, int c, size_t n) - * - * a0: start of area to clear - * a1: char to fill with - * a2: size of area to clear - */  	.set	noreorder  	.align	5 -LEAF(memset) -	beqz		a1, 1f -	 move		v0, a0			/* result */ -	andi		a1, 0xff		/* spread fillword */ -	LONG_SLL		t1, a1, 8 -	or		a1, t1 -	LONG_SLL		t1, a1, 16 -#if LONGSIZE == 8 -	or		a1, t1 -	LONG_SLL		t1, a1, 32 +	/* +	 * Macro to generate the __bzero{,_user} symbol +	 * Arguments: +	 * mode: LEGACY_MODE or EVA_MODE +	 */ +	.macro __BUILD_BZERO mode +	/* Initialize __memset if this is the first time we call this macro */ +	.ifnotdef __memset +	.set __memset, 1 +	.hidden __memset /* Make sure it does not leak */ +	.endif + +	sltiu		t0, a2, STORSIZE	/* very small region? */ +	bnez		t0, .Lsmall_memset\@ +	andi		t0, a0, STORMASK	/* aligned? */ + +#ifdef CONFIG_CPU_MICROMIPS +	move		t8, a1			/* used by 'swp' instruction */ +	move		t9, a1  #endif -	or		a1, t1 -1: - -FEXPORT(__bzero) -	sltiu		t0, a2, LONGSIZE	/* very small region? */ -	bnez		t0, .Lsmall_memset -	 andi		t0, a0, LONGMASK	/* aligned? */ -  #ifndef CONFIG_CPU_DADDI_WORKAROUNDS  	beqz		t0, 1f -	 PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */ +	PTR_SUBU	t0, STORSIZE		/* alignment in bytes */  #else  	.set		noat -	li		AT, LONGSIZE +	li		AT, STORSIZE  	beqz		t0, 1f -	 PTR_SUBU	t0, AT			/* alignment in bytes */ +	PTR_SUBU	t0, AT			/* alignment in bytes */  	.set		at  #endif  	R10KCBARRIER(0(ra))  #ifdef __MIPSEB__ -	EX(LONG_S_L, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */ +	EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@)	/* make word/dword aligned */  #endif  #ifdef __MIPSEL__ -	EX(LONG_S_R, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */ +	EX(LONG_S_R, a1, (a0), .Lfirst_fixup\@)	/* make word/dword aligned */  #endif  	PTR_SUBU	a0, t0			/* long align ptr */  	PTR_ADDU	a2, t0			/* correct size */  1:	ori		t1, a2, 0x3f		/* # of full blocks */  	xori		t1, 0x3f -	beqz		t1, .Lmemset_partial	/* no block to fill */ -	 andi		t0, a2, 0x40-LONGSIZE +	beqz		t1, .Lmemset_partial\@	/* no block to fill */ +	andi		t0, a2, 0x40-STORSIZE  	PTR_ADDU	t1, a0			/* end address */  	.set		reorder  1:	PTR_ADDIU	a0, 64  	R10KCBARRIER(0(ra)) -	f_fill64 a0, -64, a1, .Lfwd_fixup +	f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode  	bne		t1, a0, 1b  	.set		noreorder -.Lmemset_partial: +.Lmemset_partial\@:  	R10KCBARRIER(0(ra))  	PTR_LA		t1, 2f			/* where to start */ +#ifdef CONFIG_CPU_MICROMIPS +	LONG_SRL	t7, t0, 1 +#endif  #if LONGSIZE == 4 -	PTR_SUBU	t1, t0 +	PTR_SUBU	t1, FILLPTRG  #else  	.set		noat -	LONG_SRL		AT, t0, 1 +	LONG_SRL	AT, FILLPTRG, 1  	PTR_SUBU	t1, AT  	.set		at  #endif  	jr		t1 -	 PTR_ADDU	a0, t0			/* dest ptr */ +	PTR_ADDU	a0, t0			/* dest ptr */  	.set		push  	.set		noreorder  	.set		nomacro -	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */ +	/* ... but first do longs ... */ +	f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode  2:	.set		pop -	andi		a2, LONGMASK		/* At most one long to go */ +	andi		a2, STORMASK		/* At most one long to go */  	beqz		a2, 1f -	 PTR_ADDU	a0, a2			/* What's left */ +	PTR_ADDU	a0, a2			/* What's left */  	R10KCBARRIER(0(ra))  #ifdef __MIPSEB__ -	EX(LONG_S_R, a1, -1(a0), .Llast_fixup) +	EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)  #endif  #ifdef __MIPSEL__ -	EX(LONG_S_L, a1, -1(a0), .Llast_fixup) +	EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)  #endif  1:	jr		ra -	 move		a2, zero +	move		a2, zero -.Lsmall_memset: +.Lsmall_memset\@:  	beqz		a2, 2f -	 PTR_ADDU	t1, a0, a2 +	PTR_ADDU	t1, a0, a2  1:	PTR_ADDIU	a0, 1			/* fill bytewise */  	R10KCBARRIER(0(ra))  	bne		t1, a0, 1b -	 sb		a1, -1(a0) +	sb		a1, -1(a0)  2:	jr		ra			/* done */ -	 move		a2, zero +	move		a2, zero +	.if __memset == 1  	END(memset) +	.set __memset, 0 +	.hidden __memset +	.endif -.Lfirst_fixup: +.Lfirst_fixup\@:  	jr	ra -	 nop +	nop -.Lfwd_fixup: +.Lfwd_fixup\@:  	PTR_L		t0, TI_TASK($28) -	LONG_L		t0, THREAD_BUADDR(t0)  	andi		a2, 0x3f +	LONG_L		t0, THREAD_BUADDR(t0)  	LONG_ADDU	a2, t1  	jr		ra -	 LONG_SUBU	a2, t0 +	LONG_SUBU	a2, t0 -.Lpartial_fixup: +.Lpartial_fixup\@:  	PTR_L		t0, TI_TASK($28) +	andi		a2, STORMASK  	LONG_L		t0, THREAD_BUADDR(t0) -	andi		a2, LONGMASK  	LONG_ADDU	a2, t1  	jr		ra -	 LONG_SUBU	a2, t0 +	LONG_SUBU	a2, t0 -.Llast_fixup: +.Llast_fixup\@:  	jr		ra -	 andi		v1, a2, LONGMASK +	andi		v1, a2, STORMASK + +	.endm + +/* + * memset(void *s, int c, size_t n) + * + * a0: start of area to clear + * a1: char to fill with + * a2: size of area to clear + */ + +LEAF(memset) +	beqz		a1, 1f +	move		v0, a0			/* result */ + +	andi		a1, 0xff		/* spread fillword */ +	LONG_SLL		t1, a1, 8 +	or		a1, t1 +	LONG_SLL		t1, a1, 16 +#if LONGSIZE == 8 +	or		a1, t1 +	LONG_SLL		t1, a1, 32 +#endif +	or		a1, t1 +1: +#ifndef CONFIG_EVA +FEXPORT(__bzero) +#endif +	__BUILD_BZERO LEGACY_MODE + +#ifdef CONFIG_EVA +LEAF(__bzero) +	__BUILD_BZERO EVA_MODE +END(__bzero) +#endif diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c new file mode 100644 index 00000000000..57bcdaf1f1c --- /dev/null +++ b/arch/mips/lib/mips-atomic.c @@ -0,0 +1,161 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003 by Ralf Baechle + * Copyright (C) 1996 by Paul M. Antoine + * Copyright (C) 1999 Silicon Graphics + * Copyright (C) 2000 MIPS Technologies, Inc. + */ +#include <asm/irqflags.h> +#include <asm/hazards.h> +#include <linux/compiler.h> +#include <linux/preempt.h> +#include <linux/export.h> +#include <linux/stringify.h> + +#ifndef CONFIG_CPU_MIPSR2 + +/* + * For cli() we have to insert nops to make sure that the new value + * has actually arrived in the status register before the end of this + * macro. + * R4000/R4400 need three nops, the R4600 two nops and the R10000 needs + * no nops at all. + */ +/* + * For TX49, operating only IE bit is not enough. + * + * If mfc0 $12 follows store and the mfc0 is last instruction of a + * page and fetching the next instruction causes TLB miss, the result + * of the mfc0 might wrongly contain EXL bit. + * + * ERT-TX49H2-027, ERT-TX49H3-012, ERT-TX49HL3-006, ERT-TX49H4-008 + * + * Workaround: mask EXL bit of the result or place a nop before mfc0. + */ +notrace void arch_local_irq_disable(void) +{ +	preempt_disable(); + +	__asm__ __volatile__( +	"	.set	push						\n" +	"	.set	noat						\n" +#if   defined(CONFIG_CPU_MIPSR2) +	/* see irqflags.h for inline function */ +#else +	"	mfc0	$1,$12						\n" +	"	ori	$1,0x1f						\n" +	"	xori	$1,0x1f						\n" +	"	.set	noreorder					\n" +	"	mtc0	$1,$12						\n" +#endif +	"	" __stringify(__irq_disable_hazard) "			\n" +	"	.set	pop						\n" +	: /* no outputs */ +	: /* no inputs */ +	: "memory"); + +	preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_disable); + + +notrace unsigned long arch_local_irq_save(void) +{ +	unsigned long flags; + +	preempt_disable(); + +	__asm__ __volatile__( +	"	.set	push						\n" +	"	.set	reorder						\n" +	"	.set	noat						\n" +#if   defined(CONFIG_CPU_MIPSR2) +	/* see irqflags.h for inline function */ +#else +	"	mfc0	%[flags], $12					\n" +	"	ori	$1, %[flags], 0x1f				\n" +	"	xori	$1, 0x1f					\n" +	"	.set	noreorder					\n" +	"	mtc0	$1, $12						\n" +#endif +	"	" __stringify(__irq_disable_hazard) "			\n" +	"	.set	pop						\n" +	: [flags] "=r" (flags) +	: /* no inputs */ +	: "memory"); + +	preempt_enable(); + +	return flags; +} +EXPORT_SYMBOL(arch_local_irq_save); + +notrace void arch_local_irq_restore(unsigned long flags) +{ +	unsigned long __tmp1; + +	preempt_disable(); + +	__asm__ __volatile__( +	"	.set	push						\n" +	"	.set	noreorder					\n" +	"	.set	noat						\n" +#if   defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU) +	/* see irqflags.h for inline function */ +#elif defined(CONFIG_CPU_MIPSR2) +	/* see irqflags.h for inline function */ +#else +	"	mfc0	$1, $12						\n" +	"	andi	%[flags], 1					\n" +	"	ori	$1, 0x1f					\n" +	"	xori	$1, 0x1f					\n" +	"	or	%[flags], $1					\n" +	"	mtc0	%[flags], $12					\n" +#endif +	"	" __stringify(__irq_disable_hazard) "			\n" +	"	.set	pop						\n" +	: [flags] "=r" (__tmp1) +	: "0" (flags) +	: "memory"); + +	preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + + +notrace void __arch_local_irq_restore(unsigned long flags) +{ +	unsigned long __tmp1; + +	preempt_disable(); + +	__asm__ __volatile__( +	"	.set	push						\n" +	"	.set	noreorder					\n" +	"	.set	noat						\n" +#if   defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU) +	/* see irqflags.h for inline function */ +#elif defined(CONFIG_CPU_MIPSR2) +	/* see irqflags.h for inline function */ +#else +	"	mfc0	$1, $12						\n" +	"	andi	%[flags], 1					\n" +	"	ori	$1, 0x1f					\n" +	"	xori	$1, 0x1f					\n" +	"	or	%[flags], $1					\n" +	"	mtc0	%[flags], $12					\n" +#endif +	"	" __stringify(__irq_disable_hazard) "			\n" +	"	.set	pop						\n" +	: [flags] "=r" (__tmp1) +	: "0" (flags) +	: "memory"); + +	preempt_enable(); +} +EXPORT_SYMBOL(__arch_local_irq_restore); + +#endif /* !CONFIG_CPU_MIPSR2 */ diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 9cee907975a..91615c2ef0c 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -30,7 +30,7 @@ static void dump_tlb(int first, int last)  			"tlbr\n\t"  			"nop\n\t"  			".set\treorder"); -		entryhi  = read_c0_entryhi(); +		entryhi	 = read_c0_entryhi();  		entrylo0 = read_c0_entrylo0();  		/* Unused entries have a virtual address of KSEG0.  */ diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S index fdbb970f670..bef65c98df5 100644 --- a/arch/mips/lib/strlen_user.S +++ b/arch/mips/lib/strlen_user.S @@ -3,8 +3,9 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle - * Copyright (c) 1999 Silicon Graphics, Inc. + * Copyright (C) 1996, 1998, 1999, 2004 by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2011 MIPS Technologies, Inc.   */  #include <asm/asm.h>  #include <asm/asm-offsets.h> @@ -21,19 +22,43 @@   *   * Return 0 for error   */ -LEAF(__strlen_user_asm) +	.macro __BUILD_STRLEN_ASM func +LEAF(__strlen_\func\()_asm)  	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?  	and		v0, a0 -	bnez		v0, .Lfault +	bnez		v0, .Lfault\@ -FEXPORT(__strlen_user_nocheck_asm) +FEXPORT(__strlen_\func\()_nocheck_asm)  	move		v0, a0 -1:	EX(lb, t0, (v0), .Lfault) +.ifeqs "\func", "kernel" +1:	EX(lbu, v1, (v0), .Lfault\@) +.else +1:	EX(lbue, v1, (v0), .Lfault\@) +.endif  	PTR_ADDIU	v0, 1 -	bnez		t0, 1b +	bnez		v1, 1b  	PTR_SUBU	v0, a0  	jr		ra -	END(__strlen_user_asm) +	END(__strlen_\func\()_asm) -.Lfault:	move		v0, zero +.Lfault\@:	move		v0, zero  	jr		ra +	.endm + +#ifndef CONFIG_EVA +	/* Set aliases */ +	.global __strlen_user_asm +	.global __strlen_user_nocheck_asm +	.set __strlen_user_asm, __strlen_kernel_asm +	.set __strlen_user_nocheck_asm, __strlen_kernel_nocheck_asm +#endif + +__BUILD_STRLEN_ASM kernel + +#ifdef CONFIG_EVA + +	.set push +	.set eva +__BUILD_STRLEN_ASM user +	.set pop +#endif diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 7201b2ff08c..3c32baf8b49 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -3,7 +3,8 @@   * License.  See the file "COPYING" in the main directory of this archive   * for more details.   * - * Copyright (c) 1996, 1999 by Ralf Baechle + * Copyright (C) 1996, 1999 by Ralf Baechle + * Copyright (C) 2011 MIPS Technologies, Inc.   */  #include <linux/errno.h>  #include <asm/asm.h> @@ -23,37 +24,61 @@  /*   * Ugly special case have to check: we might get passed a user space - * pointer which wraps into the kernel space.  We don't deal with that.  If + * pointer which wraps into the kernel space.  We don't deal with that.	 If   * it happens at most some bytes of the exceptions handlers will be copied.   */ -LEAF(__strncpy_from_user_asm) +	.macro __BUILD_STRNCPY_ASM func +LEAF(__strncpy_from_\func\()_asm)  	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?  	and		v0, a1 -	bnez		v0, .Lfault +	bnez		v0, .Lfault\@ -FEXPORT(__strncpy_from_user_nocheck_asm) -	move		v0, zero +FEXPORT(__strncpy_from_\func\()_nocheck_asm) +	move		t0, zero  	move		v1, a1 -	.set		noreorder -1:	EX(lbu, t0, (v1), .Lfault) +.ifeqs "\func","kernel" +1:	EX(lbu, v0, (v1), .Lfault\@) +.else +1:	EX(lbue, v0, (v1), .Lfault\@) +.endif  	PTR_ADDIU	v1, 1  	R10KCBARRIER(0(ra)) -	beqz		t0, 2f -	 sb		t0, (a0) -	PTR_ADDIU	v0, 1 -	.set		reorder +	sb		v0, (a0) +	beqz		v0, 2f +	PTR_ADDIU	t0, 1  	PTR_ADDIU	a0, 1 -	bne		v0, a2, 1b -2:	PTR_ADDU	t0, a1, v0 -	xor		t0, a1 -	bltz		t0, .Lfault +	bne		t0, a2, 1b +2:	PTR_ADDU	v0, a1, t0 +	xor		v0, a1 +	bltz		v0, .Lfault\@ +	move		v0, t0  	jr		ra			# return n -	END(__strncpy_from_user_asm) +	END(__strncpy_from_\func\()_asm) -.Lfault:	li		v0, -EFAULT +.Lfault\@: +	li		v0, -EFAULT  	jr		ra  	.section	__ex_table,"a" -	PTR		1b, .Lfault +	PTR		1b, .Lfault\@  	.previous + +	.endm + +#ifndef CONFIG_EVA +	/* Set aliases */ +	.global __strncpy_from_user_asm +	.global __strncpy_from_user_nocheck_asm +	.set __strncpy_from_user_asm, __strncpy_from_kernel_asm +	.set __strncpy_from_user_nocheck_asm, __strncpy_from_kernel_nocheck_asm +#endif + +__BUILD_STRNCPY_ASM kernel + +#ifdef CONFIG_EVA +	.set push +	.set eva +__BUILD_STRNCPY_ASM user +	.set pop +#endif diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index c768e300061..f3af6995e2a 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -17,30 +17,54 @@  	.previous  /* - * Return the size of a string including the ending NUL character upto a + * Return the size of a string including the ending NUL character up to a   * maximum of a1 or 0 in case of error.   *   * Note: for performance reasons we deliberately accept that a user may - *       make strlen_user and strnlen_user access the first few KSEG0 - *       bytes.  There's nothing secret there.  On 64-bit accessing beyond - *       the maximum is a tad hairier ... + *	 make strlen_user and strnlen_user access the first few KSEG0 + *	 bytes.	 There's nothing secret there.	On 64-bit accessing beyond + *	 the maximum is a tad hairier ...   */ -LEAF(__strnlen_user_asm) +	.macro __BUILD_STRNLEN_ASM func +LEAF(__strnlen_\func\()_asm)  	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?  	and		v0, a0 -	bnez		v0, .Lfault +	bnez		v0, .Lfault\@ -FEXPORT(__strnlen_user_nocheck_asm) +FEXPORT(__strnlen_\func\()_nocheck_asm)  	move		v0, a0  	PTR_ADDU	a1, a0			# stop pointer  1:	beq		v0, a1, 1f		# limit reached? -	EX(lb, t0, (v0), .Lfault) -	PTR_ADDU	v0, 1 +.ifeqs "\func", "kernel" +	EX(lb, t0, (v0), .Lfault\@) +.else +	EX(lbe, t0, (v0), .Lfault\@) +.endif +	PTR_ADDIU	v0, 1  	bnez		t0, 1b  1:	PTR_SUBU	v0, a0  	jr		ra -	END(__strnlen_user_asm) +	END(__strnlen_\func\()_asm) -.Lfault: +.Lfault\@:  	move		v0, zero  	jr		ra +	.endm + +#ifndef CONFIG_EVA +	/* Set aliases */ +	.global __strnlen_user_asm +	.global __strnlen_user_nocheck_asm +	.set __strnlen_user_asm, __strnlen_kernel_asm +	.set __strnlen_user_nocheck_asm, __strnlen_kernel_nocheck_asm +#endif + +__BUILD_STRNLEN_ASM kernel + +#ifdef CONFIG_EVA + +	.set push +	.set eva +__BUILD_STRNLEN_ASM user +	.set pop +#endif diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index a6d1c77034d..09d5deea747 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -4,11 +4,10 @@   * for more details.   *   * Copyright (C) 2005 Thiemo Seufer - * Copyright (C) 2005  MIPS Technologies, Inc.  All rights reserved. + * Copyright (C) 2005  MIPS Technologies, Inc.	All rights reserved.   *	Author: Maciej W. Rozycki <macro@mips.com>   */ -#include <linux/init.h>  #include <asm/addrspace.h>  #include <asm/bug.h> @@ -36,7 +35,7 @@   * values, so we can avoid sharing the same stack area between a cached   * and the uncached mode.   */ -unsigned long __cpuinit run_uncached(void *func) +unsigned long run_uncached(void *func)  {  	register long sp __asm__("$sp");  	register long ret __asm__("$2");  | 
