diff options
Diffstat (limited to 'arch/sparc/lib')
51 files changed, 1236 insertions, 3625 deletions
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 846d1c4374e..3269b023409 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -4,18 +4,16 @@  asflags-y := -ansi -DST_DIV0=0x02  ccflags-y := -Werror -lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o +lib-$(CONFIG_SPARC32) += ashrdi3.o  lib-$(CONFIG_SPARC32) += memcpy.o memset.o  lib-y                 += strlen.o  lib-y                 += checksum_$(BITS).o  lib-$(CONFIG_SPARC32) += blockops.o  lib-y                 += memscan_$(BITS).o memcmp.o strncmp_$(BITS).o -lib-y                 += strncpy_from_user_$(BITS).o strlen_user_$(BITS).o  lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o  lib-$(CONFIG_SPARC32) += copy_user.o locks.o -lib-y                 += atomic_$(BITS).o +lib-$(CONFIG_SPARC64) += atomic_64.o  lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o -lib-$(CONFIG_SPARC32) += rwsem_32.o  lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o  lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o @@ -32,16 +30,18 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o  lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o  lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o -lib-$(CONFIG_SPARC64) +=  NG2patch.o NG2page.o +lib-$(CONFIG_SPARC64) +=  NG2patch.o + +lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o +lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o  lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o  lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o  lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o -lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o +lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o -obj-y                 += iomap.o -obj-$(CONFIG_SPARC32) += atomic32.o +obj-$(CONFIG_SPARC64) += iomap.o +obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o  obj-y                 += ksyms.o  obj-$(CONFIG_SPARC64) += PeeCeeI.o -obj-y                 += usercopy.o diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 0aed75653b5..30eee6e8a81 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -14,7 +14,7 @@  #define FPRS_FEF  0x04  #ifdef MEMCPY_DEBUG  #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ -		     clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; +		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;  #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs  #else  #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs @@ -90,49 +90,49 @@  	faligndata	%x7, %x8, %f14;  #define FREG_MOVE_1(x0) \ -	fmovd		%x0, %f0; +	fsrc2		%x0, %f0;  #define FREG_MOVE_2(x0, x1) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2;  #define FREG_MOVE_3(x0, x1, x2) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4;  #define FREG_MOVE_4(x0, x1, x2, x3) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; \ -	fmovd		%x3, %f6; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4; \ +	fsrc2		%x3, %f6;  #define FREG_MOVE_5(x0, x1, x2, x3, x4) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; \ -	fmovd		%x3, %f6; \ -	fmovd		%x4, %f8; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4; \ +	fsrc2		%x3, %f6; \ +	fsrc2		%x4, %f8;  #define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; \ -	fmovd		%x3, %f6; \ -	fmovd		%x4, %f8; \ -	fmovd		%x5, %f10; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4; \ +	fsrc2		%x3, %f6; \ +	fsrc2		%x4, %f8; \ +	fsrc2		%x5, %f10;  #define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; \ -	fmovd		%x3, %f6; \ -	fmovd		%x4, %f8; \ -	fmovd		%x5, %f10; \ -	fmovd		%x6, %f12; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4; \ +	fsrc2		%x3, %f6; \ +	fsrc2		%x4, %f8; \ +	fsrc2		%x5, %f10; \ +	fsrc2		%x6, %f12;  #define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ -	fmovd		%x0, %f0; \ -	fmovd		%x1, %f2; \ -	fmovd		%x2, %f4; \ -	fmovd		%x3, %f6; \ -	fmovd		%x4, %f8; \ -	fmovd		%x5, %f10; \ -	fmovd		%x6, %f12; \ -	fmovd		%x7, %f14; +	fsrc2		%x0, %f0; \ +	fsrc2		%x1, %f2; \ +	fsrc2		%x2, %f4; \ +	fsrc2		%x3, %f6; \ +	fsrc2		%x4, %f8; \ +	fsrc2		%x5, %f10; \ +	fsrc2		%x6, %f12; \ +	fsrc2		%x7, %f14;  #define FREG_LOAD_1(base, x0) \  	EX_LD(LOAD(ldd, base + 0x00, %x0))  #define FREG_LOAD_2(base, x0, x1) \ @@ -182,13 +182,13 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	cmp		%g2, 0  	tne		%xcc, 5  	PREAMBLE -	mov		%o0, GLOBAL_SPARE +	mov		%o0, %o3  	cmp		%o2, 0  	be,pn		%XCC, 85f -	 or		%o0, %o1, %o3 +	 or		%o0, %o1, GLOBAL_SPARE  	cmp		%o2, 16  	blu,a,pn	%XCC, 80f -	 or		%o3, %o2, %o3 +	 or		GLOBAL_SPARE, %o2, GLOBAL_SPARE  	/* 2 blocks (128 bytes) is the minimum we can do the block  	 * copy with.  We need to ensure that we'll iterate at least @@ -202,7 +202,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	 */  	cmp		%o2, (4 * 64)  	blu,pt		%XCC, 75f -	 andcc		%o3, 0x7, %g0 +	 andcc		GLOBAL_SPARE, 0x7, %g0  	/* %o0:	dst  	 * %o1:	src @@ -236,6 +236,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	 */  	VISEntryHalf +	membar		#Sync  	alignaddr	%o1, %g0, %g0  	add		%o1, (64 - 1), %o4 @@ -404,13 +405,13 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	 * over. If anything is left, we copy it one byte at a time.  	 */  	brz,pt		%o2, 85f -	 sub		%o0, %o1, %o3 +	 sub		%o0, %o1, GLOBAL_SPARE  	ba,a,pt		%XCC, 90f  	.align		64  75: /* 16 < len <= 64 */  	bne,pn		%XCC, 75f -	 sub		%o0, %o1, %o3 +	 sub		%o0, %o1, GLOBAL_SPARE  72:  	andn		%o2, 0xf, %o4 @@ -420,9 +421,9 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	add		%o1, 0x08, %o1  	EX_LD(LOAD(ldx, %o1, %g1))  	sub		%o1, 0x08, %o1 -	EX_ST(STORE(stx, %o5, %o1 + %o3)) +	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))  	add		%o1, 0x8, %o1 -	EX_ST(STORE(stx, %g1, %o1 + %o3)) +	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))  	bgu,pt		%XCC, 1b  	 add		%o1, 0x8, %o1  73:	andcc		%o2, 0x8, %g0 @@ -430,14 +431,14 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	 nop  	sub		%o2, 0x8, %o2  	EX_LD(LOAD(ldx, %o1, %o5)) -	EX_ST(STORE(stx, %o5, %o1 + %o3)) +	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))  	add		%o1, 0x8, %o1  1:	andcc		%o2, 0x4, %g0  	be,pt		%XCC, 1f  	 nop  	sub		%o2, 0x4, %o2  	EX_LD(LOAD(lduw, %o1, %o5)) -	EX_ST(STORE(stw, %o5, %o1 + %o3)) +	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))  	add		%o1, 0x4, %o1  1:	cmp		%o2, 0  	be,pt		%XCC, 85f @@ -454,11 +455,11 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  1:	subcc		%g1, 1, %g1  	EX_LD(LOAD(ldub, %o1, %o5)) -	EX_ST(STORE(stb, %o5, %o1 + %o3)) +	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))  	bgu,pt		%icc, 1b  	 add		%o1, 1, %o1 -2:	add		%o1, %o3, %o0 +2:	add		%o1, GLOBAL_SPARE, %o0  	andcc		%o1, 0x7, %g1  	bne,pt		%icc, 8f  	 sll		%g1, 3, %g1 @@ -468,16 +469,16 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	 nop  	ba,a,pt		%xcc, 73b -8:	mov		64, %o3 +8:	mov		64, GLOBAL_SPARE  	andn		%o1, 0x7, %o1  	EX_LD(LOAD(ldx, %o1, %g2)) -	sub		%o3, %g1, %o3 +	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE  	andn		%o2, 0x7, %o4  	sllx		%g2, %g1, %g2  1:	add		%o1, 0x8, %o1  	EX_LD(LOAD(ldx, %o1, %g3))  	subcc		%o4, 0x8, %o4 -	srlx		%g3, %o3, %o5 +	srlx		%g3, GLOBAL_SPARE, %o5  	or		%o5, %g2, %o5  	EX_ST(STORE(stx, %o5, %o0))  	add		%o0, 0x8, %o0 @@ -489,32 +490,32 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */  	be,pn		%icc, 85f  	 add		%o1, %g1, %o1  	ba,pt		%xcc, 90f -	 sub		%o0, %o1, %o3 +	 sub		%o0, %o1, GLOBAL_SPARE  	.align		64  80: /* 0 < len <= 16 */ -	andcc		%o3, 0x3, %g0 +	andcc		GLOBAL_SPARE, 0x3, %g0  	bne,pn		%XCC, 90f -	 sub		%o0, %o1, %o3 +	 sub		%o0, %o1, GLOBAL_SPARE  1:  	subcc		%o2, 4, %o2  	EX_LD(LOAD(lduw, %o1, %g1)) -	EX_ST(STORE(stw, %g1, %o1 + %o3)) +	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))  	bgu,pt		%XCC, 1b  	 add		%o1, 4, %o1  85:	retl -	 mov		EX_RETVAL(GLOBAL_SPARE), %o0 +	 mov		EX_RETVAL(%o3), %o0  	.align		32  90:  	subcc		%o2, 1, %o2  	EX_LD(LOAD(ldub, %o1, %g1)) -	EX_ST(STORE(stb, %g1, %o1 + %o3)) +	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))  	bgu,pt		%XCC, 90b  	 add		%o1, 1, %o1  	retl -	 mov		EX_RETVAL(GLOBAL_SPARE), %o0 +	 mov		EX_RETVAL(%o3), %o0  	.size		FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG2page.S b/arch/sparc/lib/NG2page.S deleted file mode 100644 index 73b6b7c72cb..00000000000 --- a/arch/sparc/lib/NG2page.S +++ /dev/null @@ -1,61 +0,0 @@ -/* NG2page.S: Niagara-2 optimized clear and copy page. - * - * Copyright (C) 2007 (davem@davemloft.net) - */ - -#include <asm/asi.h> -#include <asm/page.h> -#include <asm/visasm.h> - -	.text -	.align	32 - -	/* This is heavily simplified from the sun4u variants -	 * because Niagara-2 does not have any D-cache aliasing issues. -	 */ -NG2copy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */ -	prefetch	[%o1 + 0x00], #one_read -	prefetch	[%o1 + 0x40], #one_read -	VISEntryHalf -	set		PAGE_SIZE, %g7 -	sub		%o0, %o1, %g3 -1:	stxa		%g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P -	subcc		%g7, 64, %g7 -	ldda		[%o1] ASI_BLK_P, %f0 -	stda		%f0, [%o1 + %g3] ASI_BLK_P -	add		%o1, 64, %o1 -	bne,pt		%xcc, 1b -	 prefetch	[%o1 + 0x40], #one_read -	membar		#Sync -	VISExitHalf -	retl -	 nop - -#define BRANCH_ALWAYS	0x10680000 -#define NOP		0x01000000 -#define NG_DO_PATCH(OLD, NEW)	\ -	sethi	%hi(NEW), %g1; \ -	or	%g1, %lo(NEW), %g1; \ -	sethi	%hi(OLD), %g2; \ -	or	%g2, %lo(OLD), %g2; \ -	sub	%g1, %g2, %g1; \ -	sethi	%hi(BRANCH_ALWAYS), %g3; \ -	sll	%g1, 11, %g1; \ -	srl	%g1, 11 + 2, %g1; \ -	or	%g3, %lo(BRANCH_ALWAYS), %g3; \ -	or	%g3, %g1, %g3; \ -	stw	%g3, [%g2]; \ -	sethi	%hi(NOP), %g3; \ -	or	%g3, %lo(NOP), %g3; \ -	stw	%g3, [%g2 + 0x4]; \ -	flush	%g2; - -	.globl	niagara2_patch_pageops -	.type	niagara2_patch_pageops,#function -niagara2_patch_pageops: -	NG_DO_PATCH(copy_user_page, NG2copy_user_page) -	NG_DO_PATCH(_clear_page, NGclear_page) -	NG_DO_PATCH(clear_user_page, NGclear_user_page) -	retl -	 nop -	.size	niagara2_patch_pageops,.-niagara2_patch_pageops diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S new file mode 100644 index 00000000000..e16c88204a4 --- /dev/null +++ b/arch/sparc/lib/NG4clear_page.S @@ -0,0 +1,29 @@ +/* NG4copy_page.S: Niagara-4 optimized clear page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + +	.text + +	.register	%g3, #scratch + +	.align		32 +	.globl		NG4clear_page +	.globl		NG4clear_user_page +NG4clear_page:		/* %o0=dest */ +NG4clear_user_page:	/* %o0=dest, %o1=vaddr */ +	set		PAGE_SIZE, %g7 +	mov		0x20, %g3 +1:	stxa		%g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P +	subcc		%g7, 0x40, %g7 +	stxa		%g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P +	bne,pt		%xcc, 1b +	 add		%o0, 0x40, %o0 +	membar		#StoreLoad|#StoreStore +	retl +	 nop +	.size		NG4clear_page,.-NG4clear_page +	.size		NG4clear_user_page,.-NG4clear_user_page
\ No newline at end of file diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 00000000000..fd9f903ffa3 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -0,0 +1,30 @@ +/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x)		\ +98:	x;			\ +	.section __ex_table,"a";\ +	.align 4;		\ +	.word 98b, __retl_one_asi;\ +	.text;			\ +	.align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS	0x11 +#endif + +#define FUNC_NAME		NG4copy_from_user +#define LOAD(type,addr,dest)	type##a [addr] %asi, dest +#define EX_RETVAL(x)		0 + +#ifdef __KERNEL__ +#define PREAMBLE					\ +	rd		%asi, %g1;			\ +	cmp		%g1, ASI_AIUS;			\ +	bne,pn		%icc, ___copy_in_user;		\ +	 nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 00000000000..28504e88c53 --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S @@ -0,0 +1,57 @@ +/* NG4copy_page.S: Niagara-4 optimized copy page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + +	.text +	.align		32 + +	.register	%g2, #scratch +	.register	%g3, #scratch + +	.globl		NG4copy_user_page +NG4copy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */ +	prefetch	[%o1 + 0x000], #n_reads_strong +	prefetch	[%o1 + 0x040], #n_reads_strong +	prefetch	[%o1 + 0x080], #n_reads_strong +	prefetch	[%o1 + 0x0c0], #n_reads_strong +	set		PAGE_SIZE, %g7 +	prefetch	[%o1 + 0x100], #n_reads_strong +	prefetch	[%o1 + 0x140], #n_reads_strong +	prefetch	[%o1 + 0x180], #n_reads_strong +	prefetch	[%o1 + 0x1c0], #n_reads_strong +1: +	ldx		[%o1 + 0x00], %o2 +	subcc		%g7, 0x40, %g7 +	ldx		[%o1 + 0x08], %o3 +	ldx		[%o1 + 0x10], %o4 +	ldx		[%o1 + 0x18], %o5 +	ldx		[%o1 + 0x20], %g1 +	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	ldx		[%o1 + 0x28], %g2 +	stxa		%o3, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	ldx		[%o1 + 0x30], %g3 +	stxa		%o4, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	ldx		[%o1 + 0x38], %o2 +	add		%o1, 0x40, %o1 +	stxa		%o5, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	stxa		%g1, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	stxa		%g2, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	stxa		%g3, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P +	add		%o0, 0x08, %o0 +	bne,pt		%icc, 1b +	 prefetch	[%o1 + 0x200], #n_reads_strong +	retl +	 membar		#StoreLoad | #StoreStore +	.size		NG4copy_user_page,.-NG4copy_user_page diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 00000000000..9744c4540a8 --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -0,0 +1,39 @@ +/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x)		\ +98:	x;			\ +	.section __ex_table,"a";\ +	.align 4;		\ +	.word 98b, __retl_one_asi;\ +	.text;			\ +	.align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS	0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME		NG4copy_to_user +#define STORE(type,src,addr)	type##a src, [addr] %asi +#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS +#define EX_RETVAL(x)		0 + +#ifdef __KERNEL__ +	/* Writing to %asi is _expensive_ so we hardcode it. +	 * Reading %asi to check for KERNEL_DS is comparatively +	 * cheap. +	 */ +#define PREAMBLE					\ +	rd		%asi, %g1;			\ +	cmp		%g1, ASI_AIUS;			\ +	bne,pn		%icc, ___copy_in_user;		\ +	 nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 00000000000..9cf2ee01cee --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S @@ -0,0 +1,360 @@ +/* NG4memcpy.S: Niagara-4 optimized memcpy. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include <asm/visasm.h> +#include <asm/asi.h> +#define GLOBAL_SPARE	%g7 +#else +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define FPRS_FEF  0x04 + +/* On T4 it is very expensive to access ASRs like %fprs and + * %asi, avoiding a read or a write can save ~50 cycles. + */ +#define FPU_ENTER			\ +	rd	%fprs, %o5;		\ +	andcc	%o5, FPRS_FEF, %g0;	\ +	be,a,pn	%icc, 999f;		\ +	 wr	%g0, FPRS_FEF, %fprs;	\ +	999: + +#ifdef MEMCPY_DEBUG +#define VISEntryHalf FPU_ENTER; \ +		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntryHalf FPU_ENTER +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif + +#define GLOBAL_SPARE	%g5 +#endif + +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI	0x80		/* ASI_P */ +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x)	x +#endif + +#ifndef EX_ST +#define EX_ST(x)	x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x)	x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest)	type [addr], dest +#endif + +#ifndef STORE +#ifndef MEMCPY_DEBUG +#define STORE(type,src,addr)	type src, [addr] +#else +#define STORE(type,src,addr)	type##a src, [addr] %asi +#endif +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME	NG4memcpy +#endif +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + +	.register	%g2,#scratch +	.register	%g3,#scratch + +	.text +	.align		64 + +	.globl	FUNC_NAME +	.type	FUNC_NAME,#function +FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */ +#ifdef MEMCPY_DEBUG +	wr		%g0, 0x80, %asi +#endif +	srlx		%o2, 31, %g2 +	cmp		%g2, 0 +	tne		%XCC, 5 +	PREAMBLE +	mov		%o0, %o3 +	brz,pn		%o2, .Lexit +	 cmp		%o2, 3 +	ble,pn		%icc, .Ltiny +	 cmp		%o2, 19 +	ble,pn		%icc, .Lsmall +	 or		%o0, %o1, %g2 +	cmp		%o2, 128 +	bl,pn		%icc, .Lmedium +	 nop + +.Llarge:/* len >= 0x80 */ +	/* First get dest 8 byte aligned.  */ +	sub		%g0, %o0, %g1 +	and		%g1, 0x7, %g1 +	brz,pt		%g1, 51f +	 sub		%o2, %g1, %o2 + +1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +	add		%o1, 1, %o1 +	subcc		%g1, 1, %g1 +	add		%o0, 1, %o0 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stb, %g2, %o0 - 0x01)) + +51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x080, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x100, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x140, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x180, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) +	LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + +	/* Check if we can use the straight fully aligned +	 * loop, or we require the alignaddr/faligndata variant. +	 */ +	andcc		%o1, 0x7, %o5 +	bne,pn		%icc, .Llarge_src_unaligned +	 sub		%g0, %o0, %g1 + +	/* Legitimize the use of initializing stores by getting dest +	 * to be 64-byte aligned. +	 */ +	and		%g1, 0x3f, %g1 +	brz,pt		%g1, .Llarge_aligned +	 sub		%o2, %g1, %o2 + +1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +	add		%o1, 8, %o1 +	subcc		%g1, 8, %g1 +	add		%o0, 8, %o0 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stx, %g2, %o0 - 0x08)) + +.Llarge_aligned: +	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ +	andn		%o2, 0x3f, %o4 +	sub		%o2, %o4, %o2 + +1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +	add		%o1, 0x40, %o1 +	EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) +	subcc		%o4, 0x40, %o4 +	EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) +	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) +	EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) +	EX_ST(STORE_INIT(%g1, %o0)) +	add		%o0, 0x08, %o0 +	EX_ST(STORE_INIT(%g2, %o0)) +	add		%o0, 0x08, %o0 +	EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) +	EX_ST(STORE_INIT(%g3, %o0)) +	add		%o0, 0x08, %o0 +	EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) +	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) +	add		%o0, 0x08, %o0 +	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) +	EX_ST(STORE_INIT(%o5, %o0)) +	add		%o0, 0x08, %o0 +	EX_ST(STORE_INIT(%g2, %o0)) +	add		%o0, 0x08, %o0 +	EX_ST(STORE_INIT(%g3, %o0)) +	add		%o0, 0x08, %o0 +	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) +	add		%o0, 0x08, %o0 +	bne,pt		%icc, 1b +	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + +	membar		#StoreLoad | #StoreStore + +	brz,pn		%o2, .Lexit +	 cmp		%o2, 19 +	ble,pn		%icc, .Lsmall_unaligned +	 nop +	ba,a,pt		%icc, .Lmedium_noprefetch + +.Lexit:	retl +	 mov		EX_RETVAL(%o3), %o0 + +.Llarge_src_unaligned: +	andn		%o2, 0x3f, %o4 +	sub		%o2, %o4, %o2 +	VISEntryHalf +	alignaddr	%o1, %g0, %g1 +	add		%o1, %o4, %o1 +	EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +1:	EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) +	subcc		%o4, 0x40, %o4 +	EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) +	EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) +	EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) +	EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) +	EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) +	EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) +	faligndata	%f0, %f2, %f16 +	EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) +	faligndata	%f2, %f4, %f18 +	add		%g1, 0x40, %g1 +	faligndata	%f4, %f6, %f20 +	faligndata	%f6, %f8, %f22 +	faligndata	%f8, %f10, %f24 +	faligndata	%f10, %f12, %f26 +	faligndata	%f12, %f14, %f28 +	faligndata	%f14, %f0, %f30 +	EX_ST(STORE(std, %f16, %o0 + 0x00)) +	EX_ST(STORE(std, %f18, %o0 + 0x08)) +	EX_ST(STORE(std, %f20, %o0 + 0x10)) +	EX_ST(STORE(std, %f22, %o0 + 0x18)) +	EX_ST(STORE(std, %f24, %o0 + 0x20)) +	EX_ST(STORE(std, %f26, %o0 + 0x28)) +	EX_ST(STORE(std, %f28, %o0 + 0x30)) +	EX_ST(STORE(std, %f30, %o0 + 0x38)) +	add		%o0, 0x40, %o0 +	bne,pt		%icc, 1b +	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +	VISExitHalf + +	brz,pn		%o2, .Lexit +	 cmp		%o2, 19 +	ble,pn		%icc, .Lsmall_unaligned +	 nop +	ba,a,pt		%icc, .Lmedium_unaligned + +.Lmedium: +	LOAD(prefetch, %o1 + 0x40, #n_reads_strong) +	andcc		%g2, 0x7, %g0 +	bne,pn		%icc, .Lmedium_unaligned +	 nop +.Lmedium_noprefetch: +	andncc		%o2, 0x20 - 1, %o5 +	be,pn		%icc, 2f +	 sub		%o2, %o5, %o2 +1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +	EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) +	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) +	EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +	add		%o1, 0x20, %o1 +	subcc		%o5, 0x20, %o5 +	EX_ST(STORE(stx, %g1, %o0 + 0x00)) +	EX_ST(STORE(stx, %g2, %o0 + 0x08)) +	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) +	EX_ST(STORE(stx, %o4, %o0 + 0x18)) +	bne,pt		%icc, 1b +	 add		%o0, 0x20, %o0 +2:	andcc		%o2, 0x18, %o5 +	be,pt		%icc, 3f +	 sub		%o2, %o5, %o2 +1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +	add		%o1, 0x08, %o1 +	add		%o0, 0x08, %o0 +	subcc		%o5, 0x08, %o5 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stx, %g1, %o0 - 0x08)) +3:	brz,pt		%o2, .Lexit +	 cmp		%o2, 0x04 +	bl,pn		%icc, .Ltiny +	 nop +	EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) +	add		%o1, 0x04, %o1 +	add		%o0, 0x04, %o0 +	subcc		%o2, 0x04, %o2 +	bne,pn		%icc, .Ltiny +	 EX_ST(STORE(stw, %g1, %o0 - 0x04)) +	ba,a,pt		%icc, .Lexit +.Lmedium_unaligned: +	/* First get dest 8 byte aligned.  */ +	sub		%g0, %o0, %g1 +	and		%g1, 0x7, %g1 +	brz,pt		%g1, 2f +	 sub		%o2, %g1, %o2 + +1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +	add		%o1, 1, %o1 +	subcc		%g1, 1, %g1 +	add		%o0, 1, %o0 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stb, %g2, %o0 - 0x01)) +2: +	and		%o1, 0x7, %g1 +	brz,pn		%g1, .Lmedium_noprefetch +	 sll		%g1, 3, %g1 +	mov		64, %g2 +	sub		%g2, %g1, %g2 +	andn		%o1, 0x7, %o1 +	EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) +	sllx		%o4, %g1, %o4 +	andn		%o2, 0x08 - 1, %o5 +	sub		%o2, %o5, %o2 +1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +	add		%o1, 0x08, %o1 +	subcc		%o5, 0x08, %o5 +	srlx		%g3, %g2, GLOBAL_SPARE +	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE +	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) +	add		%o0, 0x08, %o0 +	bne,pt		%icc, 1b +	 sllx		%g3, %g1, %o4 +	srl		%g1, 3, %g1 +	add		%o1, %g1, %o1 +	brz,pn		%o2, .Lexit +	 nop +	ba,pt		%icc, .Lsmall_unaligned + +.Ltiny: +	EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +	subcc		%o2, 1, %o2 +	be,pn		%icc, .Lexit +	 EX_ST(STORE(stb, %g1, %o0 + 0x00)) +	EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) +	subcc		%o2, 1, %o2 +	be,pn		%icc, .Lexit +	 EX_ST(STORE(stb, %g1, %o0 + 0x01)) +	EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) +	ba,pt		%icc, .Lexit +	 EX_ST(STORE(stb, %g1, %o0 + 0x02)) + +.Lsmall: +	andcc		%g2, 0x3, %g0 +	bne,pn		%icc, .Lsmall_unaligned +	 andn		%o2, 0x4 - 1, %o5 +	sub		%o2, %o5, %o2 +1: +	EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) +	add		%o1, 0x04, %o1 +	subcc		%o5, 0x04, %o5 +	add		%o0, 0x04, %o0 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stw, %g1, %o0 - 0x04)) +	brz,pt		%o2, .Lexit +	 nop +	ba,a,pt		%icc, .Ltiny + +.Lsmall_unaligned: +1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +	add		%o1, 1, %o1 +	add		%o0, 1, %o0 +	subcc		%o2, 1, %o2 +	bne,pt		%icc, 1b +	 EX_ST(STORE(stb, %g1, %o0 - 0x01)) +	ba,a,pt		%icc, .Lexit +	.size		FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S new file mode 100644 index 00000000000..41da4bdd95c --- /dev/null +++ b/arch/sparc/lib/NG4memset.S @@ -0,0 +1,105 @@ +/* NG4memset.S: Niagara-4 optimized memset/bzero. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#include <asm/asi.h> + +	.register	%g2, #scratch +	.register	%g3, #scratch + +	.text +	.align		32 +	.globl		NG4memset +NG4memset: +	andcc		%o1, 0xff, %o4 +	be,pt		%icc, 1f +	 mov		%o2, %o1 +	sllx		%o4, 8, %g1 +	or		%g1, %o4, %o2 +	sllx		%o2, 16, %g1 +	or		%g1, %o2, %o2 +	sllx		%o2, 32, %g1 +	ba,pt		%icc, 1f +	 or		%g1, %o2, %o4 +	.size		NG4memset,.-NG4memset + +	.align		32 +	.globl		NG4bzero +NG4bzero: +	clr		%o4 +1:	cmp		%o1, 16 +	ble		%icc, .Ltiny +	 mov		%o0, %o3 +	sub		%g0, %o0, %g1 +	and		%g1, 0x7, %g1 +	brz,pt		%g1, .Laligned8 +	 sub		%o1, %g1, %o1 +1:	stb		%o4, [%o0 + 0x00] +	subcc		%g1, 1, %g1 +	bne,pt		%icc, 1b +	 add		%o0, 1, %o0 +.Laligned8: +	cmp		%o1, 64 + (64 - 8) +	ble		.Lmedium +	 sub		%g0, %o0, %g1 +	andcc		%g1, (64 - 1), %g1 +	brz,pn		%g1, .Laligned64 +	 sub		%o1, %g1, %o1 +1:	stx		%o4, [%o0 + 0x00] +	subcc		%g1, 8, %g1 +	bne,pt		%icc, 1b +	 add		%o0, 0x8, %o0 +.Laligned64: +	andn		%o1, 64 - 1, %g1 +	sub		%o1, %g1, %o1 +	brnz,pn		%o4, .Lnon_bzero_loop +	 mov		0x20, %g2 +1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +	subcc		%g1, 0x40, %g1 +	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +	bne,pt		%icc, 1b +	 add		%o0, 0x40, %o0 +.Lpostloop: +	cmp		%o1, 8 +	bl,pn		%icc, .Ltiny +	 membar		#StoreStore|#StoreLoad +.Lmedium: +	andn		%o1, 0x7, %g1 +	sub		%o1, %g1, %o1 +1:	stx		%o4, [%o0 + 0x00] +	subcc		%g1, 0x8, %g1 +	bne,pt		%icc, 1b +	 add		%o0, 0x08, %o0 +	andcc		%o1, 0x4, %g1 +	be,pt		%icc, .Ltiny +	 sub		%o1, %g1, %o1 +	stw		%o4, [%o0 + 0x00] +	add		%o0, 0x4, %o0 +.Ltiny: +	cmp		%o1, 0 +	be,pn		%icc, .Lexit +1:	 subcc		%o1, 1, %o1 +	stb		%o4, [%o0 + 0x00] +	bne,pt		%icc, 1b +	 add		%o0, 1, %o0 +.Lexit: +	retl +	 mov		%o3, %o0 +.Lnon_bzero_loop: +	mov		0x08, %g3 +	mov		0x28, %o5 +1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +	subcc		%g1, 0x40, %g1 +	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P +	add		%o0, 0x10, %o0 +	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P +	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P +	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P +	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P +	bne,pt		%icc, 1b +	 add		%o0, 0x30, %o0 +	ba,a,pt		%icc, .Lpostloop +	.size		NG4bzero,.-NG4bzero diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 00000000000..a114cbcf2a4 --- /dev/null +++ b/arch/sparc/lib/NG4patch.S @@ -0,0 +1,54 @@ +/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define BRANCH_ALWAYS	0x10680000 +#define NOP		0x01000000 +#define NG_DO_PATCH(OLD, NEW)	\ +	sethi	%hi(NEW), %g1; \ +	or	%g1, %lo(NEW), %g1; \ +	sethi	%hi(OLD), %g2; \ +	or	%g2, %lo(OLD), %g2; \ +	sub	%g1, %g2, %g1; \ +	sethi	%hi(BRANCH_ALWAYS), %g3; \ +	sll	%g1, 11, %g1; \ +	srl	%g1, 11 + 2, %g1; \ +	or	%g3, %lo(BRANCH_ALWAYS), %g3; \ +	or	%g3, %g1, %g3; \ +	stw	%g3, [%g2]; \ +	sethi	%hi(NOP), %g3; \ +	or	%g3, %lo(NOP), %g3; \ +	stw	%g3, [%g2 + 0x4]; \ +	flush	%g2; + +	.globl	niagara4_patch_copyops +	.type	niagara4_patch_copyops,#function +niagara4_patch_copyops: +	NG_DO_PATCH(memcpy, NG4memcpy) +	NG_DO_PATCH(___copy_from_user, NG4copy_from_user) +	NG_DO_PATCH(___copy_to_user, NG4copy_to_user) +	retl +	 nop +	.size	niagara4_patch_copyops,.-niagara4_patch_copyops + +	.globl	niagara4_patch_bzero +	.type	niagara4_patch_bzero,#function +niagara4_patch_bzero: +	NG_DO_PATCH(memset, NG4memset) +	NG_DO_PATCH(__bzero, NG4bzero) +	NG_DO_PATCH(__clear_user, NGclear_user) +	NG_DO_PATCH(tsb_init, NGtsb_init) +	retl +	 nop +	.size	niagara4_patch_bzero,.-niagara4_patch_bzero + +	.globl	niagara4_patch_pageops +	.type	niagara4_patch_pageops,#function +niagara4_patch_pageops: +	NG_DO_PATCH(copy_user_page, NG4copy_user_page) +	NG_DO_PATCH(_clear_page, NG4clear_page) +	NG_DO_PATCH(clear_user_page, NG4clear_user_page) +	retl +	 nop +	.size	niagara4_patch_pageops,.-niagara4_patch_pageops diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index 428920de05b..423d46e2258 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S @@ -16,55 +16,93 @@  	 */  NGcopy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */ -	prefetch	[%o1 + 0x00], #one_read -	mov		8, %g1 -	mov		16, %g2 -	mov		24, %g3 +	save		%sp, -192, %sp +	rd		%asi, %g3 +	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi  	set		PAGE_SIZE, %g7 +	prefetch	[%i1 + 0x00], #one_read +	prefetch	[%i1 + 0x40], #one_read -1:	ldda		[%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 -	ldda		[%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 -	prefetch	[%o1 + 0x40], #one_read -	add		%o1, 32, %o1 -	stxa		%o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P -	ldda		[%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 -	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P -	ldda		[%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 -	add		%o1, 32, %o1 -	add		%o0, 32, %o0 -	stxa		%o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P -	subcc		%g7, 64, %g7 +1:	prefetch	[%i1 + 0x80], #one_read +	prefetch	[%i1 + 0xc0], #one_read +	ldda		[%i1 + 0x00] %asi, %o2 +	ldda		[%i1 + 0x10] %asi, %o4 +	ldda		[%i1 + 0x20] %asi, %l2 +	ldda		[%i1 + 0x30] %asi, %l4 +	stxa		%o2, [%i0 + 0x00] %asi +	stxa		%o3, [%i0 + 0x08] %asi +	stxa		%o4, [%i0 + 0x10] %asi +	stxa		%o5, [%i0 + 0x18] %asi +	stxa		%l2, [%i0 + 0x20] %asi +	stxa		%l3, [%i0 + 0x28] %asi +	stxa		%l4, [%i0 + 0x30] %asi +	stxa		%l5, [%i0 + 0x38] %asi +	ldda		[%i1 + 0x40] %asi, %o2 +	ldda		[%i1 + 0x50] %asi, %o4 +	ldda		[%i1 + 0x60] %asi, %l2 +	ldda		[%i1 + 0x70] %asi, %l4 +	stxa		%o2, [%i0 + 0x40] %asi +	stxa		%o3, [%i0 + 0x48] %asi +	stxa		%o4, [%i0 + 0x50] %asi +	stxa		%o5, [%i0 + 0x58] %asi +	stxa		%l2, [%i0 + 0x60] %asi +	stxa		%l3, [%i0 + 0x68] %asi +	stxa		%l4, [%i0 + 0x70] %asi +	stxa		%l5, [%i0 + 0x78] %asi +	add		%i1, 128, %i1 +	subcc		%g7, 128, %g7  	bne,pt		%xcc, 1b -	 add		%o0, 32, %o0 +	 add		%i0, 128, %i0 +	wr		%g3, 0x0, %asi  	membar		#Sync -	retl -	 nop +	ret +	 restore -	.globl		NGclear_page, NGclear_user_page +	.align		32 +	.globl		NGclear_page +	.globl		NGclear_user_page  NGclear_page:		/* %o0=dest */  NGclear_user_page:	/* %o0=dest, %o1=vaddr */ -	mov		8, %g1 -	mov		16, %g2 -	mov		24, %g3 +	rd		%asi, %g3 +	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi  	set		PAGE_SIZE, %g7 -1:	stxa		%g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P -	add		%o0, 32, %o0 -	stxa		%g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P -	stxa		%g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P -	subcc		%g7, 64, %g7 +1:	stxa		%g0, [%o0 + 0x00] %asi +	stxa		%g0, [%o0 + 0x08] %asi +	stxa		%g0, [%o0 + 0x10] %asi +	stxa		%g0, [%o0 + 0x18] %asi +	stxa		%g0, [%o0 + 0x20] %asi +	stxa		%g0, [%o0 + 0x28] %asi +	stxa		%g0, [%o0 + 0x30] %asi +	stxa		%g0, [%o0 + 0x38] %asi +	stxa		%g0, [%o0 + 0x40] %asi +	stxa		%g0, [%o0 + 0x48] %asi +	stxa		%g0, [%o0 + 0x50] %asi +	stxa		%g0, [%o0 + 0x58] %asi +	stxa		%g0, [%o0 + 0x60] %asi +	stxa		%g0, [%o0 + 0x68] %asi +	stxa		%g0, [%o0 + 0x70] %asi +	stxa		%g0, [%o0 + 0x78] %asi +	stxa		%g0, [%o0 + 0x80] %asi +	stxa		%g0, [%o0 + 0x88] %asi +	stxa		%g0, [%o0 + 0x90] %asi +	stxa		%g0, [%o0 + 0x98] %asi +	stxa		%g0, [%o0 + 0xa0] %asi +	stxa		%g0, [%o0 + 0xa8] %asi +	stxa		%g0, [%o0 + 0xb0] %asi +	stxa		%g0, [%o0 + 0xb8] %asi +	stxa		%g0, [%o0 + 0xc0] %asi +	stxa		%g0, [%o0 + 0xc8] %asi +	stxa		%g0, [%o0 + 0xd0] %asi +	stxa		%g0, [%o0 + 0xd8] %asi +	stxa		%g0, [%o0 + 0xe0] %asi +	stxa		%g0, [%o0 + 0xe8] %asi +	stxa		%g0, [%o0 + 0xf0] %asi +	stxa		%g0, [%o0 + 0xf8] %asi +	subcc		%g7, 256, %g7  	bne,pt		%xcc, 1b -	 add		%o0, 32, %o0 +	 add		%o0, 256, %o0 +	wr		%g3, 0x0, %asi  	membar		#Sync  	retl  	 nop diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index bafd2fc07ac..b67142b7768 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -109,7 +109,7 @@  #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)	\  	subcc			%left, 8, %left;	\  	bl,pn			%xcc, 95f;		\ -	 fsrc1			%f0, %f1; +	 fsrc2			%f0, %f1;  #define UNEVEN_VISCHUNK(dest, f0, f1, left)		\  	UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)	\ @@ -201,7 +201,7 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */  	andn		%o1, (0x40 - 1), %o1  	and		%g2, 7, %g2  	andncc		%g3, 0x7, %g3 -	fmovd		%f0, %f2 +	fsrc2		%f0, %f2  	sub		%g3, 0x8, %g3  	sub		%o2, %GLOBAL_SPARE, %o2 diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S index 17912e60871..86f60de07b0 100644 --- a/arch/sparc/lib/ashldi3.S +++ b/arch/sparc/lib/ashldi3.S @@ -5,10 +5,10 @@   * Copyright (C) 1999 David S. Miller (davem@redhat.com)   */ +#include <linux/linkage.h> +  	.text -	.align	4 -	.globl	__ashldi3 -__ashldi3: +ENTRY(__ashldi3)  	cmp	%o2, 0  	be	9f  	 mov	0x20, %g2 @@ -32,3 +32,4 @@ __ashldi3:  9:  	retl  	 nop +ENDPROC(__ashldi3) diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S index 85398fd6dcc..6eb8ba2dd50 100644 --- a/arch/sparc/lib/ashrdi3.S +++ b/arch/sparc/lib/ashrdi3.S @@ -5,10 +5,10 @@   * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)   */ +#include <linux/linkage.h> +  	.text -	.align	4 -	.globl __ashrdi3 -__ashrdi3: +ENTRY(__ashrdi3)  	tst	%o2  	be	3f  	 or	%g0, 32, %g2 @@ -34,3 +34,4 @@ __ashrdi3:  3:  	jmpl	%o7 + 8, %g0  	 nop +ENDPROC(__ashrdi3) diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index cbddeb38ffd..1d32b54089a 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -7,7 +7,7 @@   * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf   */ -#include <asm/atomic.h> +#include <linux/atomic.h>  #include <linux/spinlock.h>  #include <linux/module.h> @@ -16,7 +16,7 @@  #define ATOMIC_HASH(a)	(&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])  spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { -	[0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED +	[0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)  };  #else /* SMP */ @@ -55,7 +55,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)  }  EXPORT_SYMBOL(atomic_cmpxchg); -int atomic_add_unless(atomic_t *v, int a, int u) +int __atomic_add_unless(atomic_t *v, int a, int u)  {  	int ret;  	unsigned long flags; @@ -65,9 +65,9 @@ int atomic_add_unless(atomic_t *v, int a, int u)  	if (ret != u)  		v->counter += a;  	spin_unlock_irqrestore(ATOMIC_HASH(v), flags); -	return ret != u; +	return ret;  } -EXPORT_SYMBOL(atomic_add_unless); +EXPORT_SYMBOL(__atomic_add_unless);  /* Atomic operations are already serializing */  void atomic_set(atomic_t *v, int i) diff --git a/arch/sparc/lib/atomic_32.S b/arch/sparc/lib/atomic_32.S deleted file mode 100644 index 178cbb8ae1b..00000000000 --- a/arch/sparc/lib/atomic_32.S +++ /dev/null @@ -1,99 +0,0 @@ -/* atomic.S: Move this stuff here for better ICACHE hit rates. - * - * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - -	.text -	.align	4 - -	.globl  __atomic_begin -__atomic_begin: - -#ifndef CONFIG_SMP -	.globl	___xchg32_sun4c -___xchg32_sun4c: -	rd	%psr, %g3 -	andcc	%g3, PSR_PIL, %g0 -	bne	1f -	 nop -	wr	%g3, PSR_PIL, %psr -	nop; nop; nop -1: -	andcc	%g3, PSR_PIL, %g0 -	ld	[%g1], %g7 -	bne	1f -	 st	%g2, [%g1] -	wr	%g3, 0x0, %psr -	nop; nop; nop -1: -	mov	%g7, %g2 -	jmpl	%o7 + 8, %g0 -	 mov	%g4, %o7 - -	.globl	___xchg32_sun4md -___xchg32_sun4md: -	swap	[%g1], %g2 -	jmpl	%o7 + 8, %g0 -	 mov	%g4, %o7 -#endif - -	/* Read asm-sparc/atomic.h carefully to understand how this works for SMP. -	 * Really, some things here for SMP are overly clever, go read the header. -	 */ -	.globl	___atomic24_add -___atomic24_add: -	rd	%psr, %g3		! Keep the code small, old way was stupid -	nop; nop; nop;			! Let the bits set -	or	%g3, PSR_PIL, %g7	! Disable interrupts -	wr	%g7, 0x0, %psr		! Set %psr -	nop; nop; nop;			! Let the bits set -#ifdef CONFIG_SMP -1:	ldstub	[%g1 + 3], %g7		! Spin on the byte lock for SMP. -	orcc	%g7, 0x0, %g0		! Did we get it? -	bne	1b			! Nope... -	 ld	[%g1], %g7		! Load locked atomic24_t -	sra	%g7, 8, %g7		! Get signed 24-bit integer -	add	%g7, %g2, %g2		! Add in argument -	sll	%g2, 8, %g7		! Transpose back to atomic24_t -	st	%g7, [%g1]		! Clever: This releases the lock as well. -#else -	ld	[%g1], %g7		! Load locked atomic24_t -	add	%g7, %g2, %g2		! Add in argument -	st	%g2, [%g1]		! Store it back -#endif -	wr	%g3, 0x0, %psr		! Restore original PSR_PIL -	nop; nop; nop;			! Let the bits set -	jmpl	%o7, %g0		! NOTE: not + 8, see callers in atomic.h -	 mov	%g4, %o7		! Restore %o7 - -	.globl	___atomic24_sub -___atomic24_sub: -	rd	%psr, %g3		! Keep the code small, old way was stupid -	nop; nop; nop;			! Let the bits set -	or	%g3, PSR_PIL, %g7	! Disable interrupts -	wr	%g7, 0x0, %psr		! Set %psr -	nop; nop; nop;			! Let the bits set -#ifdef CONFIG_SMP -1:	ldstub	[%g1 + 3], %g7		! Spin on the byte lock for SMP. -	orcc	%g7, 0x0, %g0		! Did we get it? -	bne	1b			! Nope... -	 ld	[%g1], %g7		! Load locked atomic24_t -	sra	%g7, 8, %g7		! Get signed 24-bit integer -	sub	%g7, %g2, %g2		! Subtract argument -	sll	%g2, 8, %g7		! Transpose back to atomic24_t -	st	%g7, [%g1]		! Clever: This releases the lock as well -#else -	ld	[%g1], %g7		! Load locked atomic24_t -	sub	%g7, %g2, %g2		! Subtract argument -	st	%g2, [%g1]		! Store it back -#endif -	wr	%g3, 0x0, %psr		! Restore original PSR_PIL -	nop; nop; nop;			! Let the bits set -	jmpl	%o7, %g0		! NOTE: not + 8, see callers in atomic.h -	 mov	%g4, %o7		! Restore %o7 - -	.globl  __atomic_end -__atomic_end: diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 59186e0fcf3..85c233d0a34 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -1,8 +1,9 @@  /* atomic.S: These things are too big to do inline.   * - * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)   */ +#include <linux/linkage.h>  #include <asm/asi.h>  #include <asm/backoff.h> @@ -13,9 +14,7 @@  	 * memory barriers, and a second which returns  	 * a value and does the barriers.  	 */ -	.globl	atomic_add -	.type	atomic_add,#function -atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic_add) /* %o0 = increment, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	lduw	[%o1], %g1  	add	%g1, %o0, %g7 @@ -26,11 +25,9 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */  	retl  	 nop  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic_add, .-atomic_add +ENDPROC(atomic_add) -	.globl	atomic_sub -	.type	atomic_sub,#function -atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic_sub) /* %o0 = decrement, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	lduw	[%o1], %g1  	sub	%g1, %o0, %g7 @@ -41,11 +38,9 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */  	retl  	 nop  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic_sub, .-atomic_sub +ENDPROC(atomic_sub) -	.globl	atomic_add_ret -	.type	atomic_add_ret,#function -atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic_add_ret) /* %o0 = increment, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	lduw	[%o1], %g1  	add	%g1, %o0, %g7 @@ -56,11 +51,9 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */  	retl  	 sra	%g1, 0, %o0  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic_add_ret, .-atomic_add_ret +ENDPROC(atomic_add_ret) -	.globl	atomic_sub_ret -	.type	atomic_sub_ret,#function -atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	lduw	[%o1], %g1  	sub	%g1, %o0, %g7 @@ -71,11 +64,9 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */  	retl  	 sra	%g1, 0, %o0  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic_sub_ret, .-atomic_sub_ret +ENDPROC(atomic_sub_ret) -	.globl	atomic64_add -	.type	atomic64_add,#function -atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic64_add) /* %o0 = increment, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	ldx	[%o1], %g1  	add	%g1, %o0, %g7 @@ -86,11 +77,9 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */  	retl  	 nop  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic64_add, .-atomic64_add +ENDPROC(atomic64_add) -	.globl	atomic64_sub -	.type	atomic64_sub,#function -atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic64_sub) /* %o0 = decrement, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	ldx	[%o1], %g1  	sub	%g1, %o0, %g7 @@ -101,11 +90,9 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */  	retl  	 nop  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic64_sub, .-atomic64_sub +ENDPROC(atomic64_sub) -	.globl	atomic64_add_ret -	.type	atomic64_add_ret,#function -atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic64_add_ret) /* %o0 = increment, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	ldx	[%o1], %g1  	add	%g1, %o0, %g7 @@ -116,11 +103,9 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */  	retl  	 add	%g1, %o0, %o0  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic64_add_ret, .-atomic64_add_ret +ENDPROC(atomic64_add_ret) -	.globl	atomic64_sub_ret -	.type	atomic64_sub_ret,#function -atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */  	BACKOFF_SETUP(%o2)  1:	ldx	[%o1], %g1  	sub	%g1, %o0, %g7 @@ -131,4 +116,18 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */  	retl  	 sub	%g1, %o0, %o0  2:	BACKOFF_SPIN(%o2, %o3, 1b) -	.size	atomic64_sub_ret, .-atomic64_sub_ret +ENDPROC(atomic64_sub_ret) + +ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */ +	BACKOFF_SETUP(%o2) +1:	ldx	[%o0], %g1 +	brlez,pn %g1, 3f +	 sub	%g1, 1, %g7 +	casx	[%o0], %g1, %g7 +	cmp	%g1, %g7 +	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b) +	 nop +3:	retl +	 sub	%g1, 1, %o0 +2:	BACKOFF_SPIN(%o2, %o3, 1b) +ENDPROC(atomic64_dec_if_positive) diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 764b3eb7b60..8ec4e9c0251 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -10,7 +10,7 @@   */  #include <linux/string.h> -#include <linux/bitops.h> +#include <linux/bitmap.h>  #include <asm/bitext.h> @@ -80,8 +80,7 @@ int bit_map_string_get(struct bit_map *t, int len, int align)  		while (test_bit(offset + i, t->map) == 0) {  			i++;  			if (i == len) { -				for (i = 0; i < len; i++) -					__set_bit(offset + i, t->map); +				bitmap_set(t->map, offset, len);  				if (offset == t->first_free)  					t->first_free = find_next_zero_bit  							(t->map, t->size, @@ -120,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len)  void bit_map_init(struct bit_map *t, unsigned long *map, int size)  { - -	if ((size & 07) != 0) -		BUG(); -	memset(map, 0, size>>3); - +	bitmap_zero(map, size);  	memset(t, 0, sizeof *t);  	spin_lock_init(&t->lock);  	t->map = map; diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S index 3dc61d5537c..36f72cc0e67 100644 --- a/arch/sparc/lib/bitops.S +++ b/arch/sparc/lib/bitops.S @@ -3,14 +3,13 @@   * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)   */ +#include <linux/linkage.h>  #include <asm/asi.h>  #include <asm/backoff.h>  	.text -	.globl	test_and_set_bit -	.type	test_and_set_bit,#function -test_and_set_bit:	/* %o0=nr, %o1=addr */ +ENTRY(test_and_set_bit)	/* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -29,11 +28,9 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	test_and_set_bit, .-test_and_set_bit +ENDPROC(test_and_set_bit) -	.globl	test_and_clear_bit -	.type	test_and_clear_bit,#function -test_and_clear_bit:	/* %o0=nr, %o1=addr */ +ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -52,11 +49,9 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	test_and_clear_bit, .-test_and_clear_bit +ENDPROC(test_and_clear_bit) -	.globl	test_and_change_bit -	.type	test_and_change_bit,#function -test_and_change_bit:	/* %o0=nr, %o1=addr */ +ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -75,11 +70,9 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	test_and_change_bit, .-test_and_change_bit +ENDPROC(test_and_change_bit) -	.globl	set_bit -	.type	set_bit,#function -set_bit:		/* %o0=nr, %o1=addr */ +ENTRY(set_bit) /* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -96,11 +89,9 @@ set_bit:		/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	set_bit, .-set_bit +ENDPROC(set_bit) -	.globl	clear_bit -	.type	clear_bit,#function -clear_bit:		/* %o0=nr, %o1=addr */ +ENTRY(clear_bit) /* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -117,11 +108,9 @@ clear_bit:		/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	clear_bit, .-clear_bit +ENDPROC(clear_bit) -	.globl	change_bit -	.type	change_bit,#function -change_bit:		/* %o0=nr, %o1=addr */ +ENTRY(change_bit) /* %o0=nr, %o1=addr */  	BACKOFF_SETUP(%o3)  	srlx	%o0, 6, %g1  	mov	1, %o2 @@ -138,4 +127,4 @@ change_bit:		/* %o0=nr, %o1=addr */  	retl  	 nop  2:	BACKOFF_SPIN(%o3, %o4, 1b) -	.size	change_bit, .-change_bit +ENDPROC(change_bit) diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S index 804be87f9a4..3c771011ff4 100644 --- a/arch/sparc/lib/blockops.S +++ b/arch/sparc/lib/blockops.S @@ -4,6 +4,7 @@   * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)   */ +#include <linux/linkage.h>  #include <asm/page.h>  	/* Zero out 64 bytes of memory at (buf + offset). @@ -44,10 +45,7 @@  	 */  	.text -	.align	4 -	.globl	bzero_1page, __copy_1page - -bzero_1page: +ENTRY(bzero_1page)  /* NOTE: If you change the number of insns of this routine, please check   * arch/sparc/mm/hypersparc.S */  	/* %o0 = buf */ @@ -65,8 +63,9 @@ bzero_1page:  	retl  	 nop +ENDPROC(bzero_1page) -__copy_1page: +ENTRY(__copy_1page)  /* NOTE: If you change the number of insns of this routine, please check   * arch/sparc/mm/hypersparc.S */  	/* %o0 = dst, %o1 = src */ @@ -87,3 +86,4 @@ __copy_1page:  	retl  	 nop +ENDPROC(__copy_1page) diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S index 615f401edf6..8c058114b64 100644 --- a/arch/sparc/lib/bzero.S +++ b/arch/sparc/lib/bzero.S @@ -4,11 +4,11 @@   * Copyright (C) 2005 David S. Miller <davem@davemloft.net>   */ +#include <linux/linkage.h> +  	.text -	.globl	memset -	.type	memset, #function -memset:			/* %o0=buf, %o1=pat, %o2=len */ +ENTRY(memset) /* %o0=buf, %o1=pat, %o2=len */  	and		%o1, 0xff, %o3  	mov		%o2, %o1  	sllx		%o3, 8, %g1 @@ -19,9 +19,7 @@ memset:			/* %o0=buf, %o1=pat, %o2=len */  	ba,pt		%xcc, 1f  	 or		%g1, %o2, %o2 -	.globl	__bzero -	.type	__bzero, #function -__bzero:		/* %o0=buf, %o1=len */ +ENTRY(__bzero) /* %o0=buf, %o1=len */  	clr		%o2  1:	mov		%o0, %o3  	brz,pn		%o1, __bzero_done @@ -78,8 +76,8 @@ __bzero_tiny:  __bzero_done:  	retl  	 mov		%o3, %o0 -	.size		__bzero, .-__bzero -	.size		memset, .-memset +ENDPROC(__bzero) +ENDPROC(memset)  #define EX_ST(x,y)		\  98:	x,y;			\ @@ -89,9 +87,7 @@ __bzero_done:  	.text;			\  	.align 4; -	.globl	__clear_user -	.type	__clear_user, #function -__clear_user:		/* %o0=buf, %o1=len */ +ENTRY(__clear_user) /* %o0=buf, %o1=len */  	brz,pn		%o1, __clear_user_done  	 cmp		%o1, 16  	bl,pn		%icc, __clear_user_tiny @@ -146,4 +142,4 @@ __clear_user_tiny:  __clear_user_done:  	retl  	 clr		%o0 -	.size		__clear_user, .-__clear_user +ENDPROC(__clear_user) diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 3632cb34e91..0084c3361e1 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -289,10 +289,16 @@ cc_end_cruft:  	/* Also, handle the alignment code out of band. */  cc_dword_align: -	cmp	%g1, 6 -	bl,a	ccte +	cmp	%g1, 16 +	bge	1f +	 srl	%g1, 1, %o3 +2:	cmp	%o3, 0 +	be,a	ccte  	 andcc	%g1, 0xf, %o3 -	andcc	%o0, 0x1, %g0 +	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits) +	be,a	2b +	 srl	%o3, 1, %o3 +1:	andcc	%o0, 0x1, %g0  	bne	ccslow  	 andcc	%o0, 0x2, %g0  	be	1f diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f6c2a..46272dfc26e 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -37,10 +37,10 @@ _clear_page:		/* %o0=dest */  	.globl		clear_user_page  clear_user_page:	/* %o0=dest, %o1=vaddr */  	lduw		[%g6 + TI_PRE_COUNT], %o2 -	sethi		%uhi(PAGE_OFFSET), %g2 +	sethi		%hi(PAGE_OFFSET), %g2  	sethi		%hi(PAGE_SIZE), %o4 -	sllx		%g2, 32, %g2 +	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2  	sethi		%hi(PAGE_KERNEL_LOCKED), %g3  	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index b243d3b606b..dd16c61f326 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -34,10 +34,10 @@  #endif  #define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7)	\ -	fmovd	%reg0, %f48; 	fmovd	%reg1, %f50;		\ -	fmovd	%reg2, %f52; 	fmovd	%reg3, %f54;		\ -	fmovd	%reg4, %f56; 	fmovd	%reg5, %f58;		\ -	fmovd	%reg6, %f60; 	fmovd	%reg7, %f62; +	fsrc2	%reg0, %f48; 	fsrc2	%reg1, %f50;		\ +	fsrc2	%reg2, %f52; 	fsrc2	%reg3, %f54;		\ +	fsrc2	%reg4, %f56; 	fsrc2	%reg5, %f58;		\ +	fsrc2	%reg6, %f60; 	fsrc2	%reg7, %f62;  	.text @@ -46,10 +46,10 @@  	.type		copy_user_page,#function  copy_user_page:		/* %o0=dest, %o1=src, %o2=vaddr */  	lduw		[%g6 + TI_PRE_COUNT], %o4 -	sethi		%uhi(PAGE_OFFSET), %g2 +	sethi		%hi(PAGE_OFFSET), %g2  	sethi		%hi(PAGE_SIZE), %o3 -	sllx		%g2, 32, %g2 +	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2  	sethi		%hi(PAGE_KERNEL_LOCKED), %g3  	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 @@ -104,60 +104,60 @@ cheetah_copy_page_insn:  	prefetch	[%o1 + 0x140], #one_read  	ldd		[%o1 + 0x010], %f4  	prefetch	[%o1 + 0x180], #one_read -	fmovd		%f0, %f16 +	fsrc2		%f0, %f16  	ldd		[%o1 + 0x018], %f6 -	fmovd		%f2, %f18 +	fsrc2		%f2, %f18  	ldd		[%o1 + 0x020], %f8 -	fmovd		%f4, %f20 +	fsrc2		%f4, %f20  	ldd		[%o1 + 0x028], %f10 -	fmovd		%f6, %f22 +	fsrc2		%f6, %f22  	ldd		[%o1 + 0x030], %f12 -	fmovd		%f8, %f24 +	fsrc2		%f8, %f24  	ldd		[%o1 + 0x038], %f14 -	fmovd		%f10, %f26 +	fsrc2		%f10, %f26  	ldd		[%o1 + 0x040], %f0  1:	ldd		[%o1 + 0x048], %f2 -	fmovd		%f12, %f28 +	fsrc2		%f12, %f28  	ldd		[%o1 + 0x050], %f4 -	fmovd		%f14, %f30 +	fsrc2		%f14, %f30  	stda		%f16, [%o0] ASI_BLK_P  	ldd		[%o1 + 0x058], %f6 -	fmovd		%f0, %f16 +	fsrc2		%f0, %f16  	ldd		[%o1 + 0x060], %f8 -	fmovd		%f2, %f18 +	fsrc2		%f2, %f18  	ldd		[%o1 + 0x068], %f10 -	fmovd		%f4, %f20 +	fsrc2		%f4, %f20  	ldd		[%o1 + 0x070], %f12 -	fmovd		%f6, %f22 +	fsrc2		%f6, %f22  	ldd		[%o1 + 0x078], %f14 -	fmovd		%f8, %f24 +	fsrc2		%f8, %f24  	ldd		[%o1 + 0x080], %f0  	prefetch	[%o1 + 0x180], #one_read -	fmovd		%f10, %f26 +	fsrc2		%f10, %f26  	subcc		%o2, 1, %o2  	add		%o0, 0x40, %o0  	bne,pt		%xcc, 1b  	 add		%o1, 0x40, %o1  	ldd		[%o1 + 0x048], %f2 -	fmovd		%f12, %f28 +	fsrc2		%f12, %f28  	ldd		[%o1 + 0x050], %f4 -	fmovd		%f14, %f30 +	fsrc2		%f14, %f30  	stda		%f16, [%o0] ASI_BLK_P  	ldd		[%o1 + 0x058], %f6 -	fmovd		%f0, %f16 +	fsrc2		%f0, %f16  	ldd		[%o1 + 0x060], %f8 -	fmovd		%f2, %f18 +	fsrc2		%f2, %f18  	ldd		[%o1 + 0x068], %f10 -	fmovd		%f4, %f20 +	fsrc2		%f4, %f20  	ldd		[%o1 + 0x070], %f12 -	fmovd		%f6, %f22 +	fsrc2		%f6, %f22  	add		%o0, 0x40, %o0  	ldd		[%o1 + 0x078], %f14 -	fmovd		%f8, %f24 -	fmovd		%f10, %f26 -	fmovd		%f12, %f28 -	fmovd		%f14, %f30 +	fsrc2		%f8, %f24 +	fsrc2		%f10, %f26 +	fsrc2		%f12, %f28 +	fsrc2		%f14, %f30  	stda		%f16, [%o0] ASI_BLK_P  	membar		#Sync  	VISExitHalf diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index 681b3683da9..9614b48b6ef 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -17,21 +17,6 @@ along with GNU CC; see the file COPYING.  If not, write to  the Free Software Foundation, 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.  */ -	.data -	.align 8 -	.globl	__clz_tab -__clz_tab: -	.byte	0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 -	.byte	6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 -	.byte	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -	.byte	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 -	.byte	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 -	.byte	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 -	.byte	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 -	.byte	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 -	.size	 __clz_tab,256 -	.global .udiv -  	.text  	.align 4  	.globl __divdi3 @@ -97,8 +82,9 @@ __divdi3:  	bne .LL85  	mov %i0,%o2  	mov 1,%o0 -	call .udiv,0  	mov 0,%o1 +	wr %g0, 0, %y +	udiv %o0, %o1, %o0  	mov %o0,%o4  	mov %i0,%o2  .LL85: diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S new file mode 100644 index 00000000000..b39389f6989 --- /dev/null +++ b/arch/sparc/lib/ffs.S @@ -0,0 +1,84 @@ +#include <linux/linkage.h> + +	.register	%g2,#scratch + +	.text +	.align	32 + +ENTRY(ffs) +	brnz,pt	%o0, 1f +	 mov	1, %o1 +	retl +	 clr	%o0 +	nop +	nop +ENTRY(__ffs) +	sllx	%o0, 32, %g1		/* 1  */ +	srlx	%o0, 32, %g2 + +	clr	%o1			/* 2  */ +	movrz	%g1, %g2, %o0 + +	movrz	%g1, 32, %o1		/* 3  */ +1:	clr	%o2 + +	sllx	%o0, (64 - 16), %g1	/* 4  */ +	srlx	%o0, 16, %g2 + +	movrz	%g1, %g2, %o0		/* 5  */ +	clr	%o3 + +	movrz	%g1, 16, %o2		/* 6  */ +	clr	%o4 + +	and	%o0, 0xff, %g1		/* 7  */ +	srlx	%o0, 8, %g2 + +	movrz	%g1, %g2, %o0		/* 8  */ +	clr	%o5 + +	movrz	%g1, 8, %o3		/* 9  */ +	add	%o2, %o1, %o2 + +	and	%o0, 0xf, %g1		/* 10 */ +	srlx	%o0, 4, %g2 + +	movrz	%g1, %g2, %o0		/* 11 */ +	add	%o2, %o3, %o2 + +	movrz	%g1, 4, %o4		/* 12 */ + +	and	%o0, 0x3, %g1		/* 13 */ +	srlx	%o0, 2, %g2 + +	movrz	%g1, %g2, %o0		/* 14 */ +	add	%o2, %o4, %o2 + +	movrz	%g1, 2, %o5		/* 15 */ + +	and	%o0, 0x1, %g1		/* 16 */ + +	add	%o2, %o5, %o2		/* 17 */ +	xor	%g1, 0x1, %g1 + +	retl				/* 18 */ +	 add	%o2, %g1, %o0 +ENDPROC(ffs) +ENDPROC(__ffs) + +	.section	.popc_6insn_patch, "ax" +	.word		ffs +	brz,pn	%o0, 98f +	 neg	%o0, %g1 +	xnor	%o0, %g1, %o1 +	popc	%o1, %o0 +98:	retl +	 nop +	.word		__ffs +	neg	%o0, %g1 +	xnor	%o0, %g1, %o1 +	popc	%o1, %o0 +	retl +	 sub	%o0, 1, %o0 +	nop +	.previous diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S new file mode 100644 index 00000000000..95414e0a680 --- /dev/null +++ b/arch/sparc/lib/hweight.S @@ -0,0 +1,51 @@ +#include <linux/linkage.h> + +	.text +	.align	32 +ENTRY(__arch_hweight8) +	ba,pt	%xcc, __sw_hweight8 +	 nop +	nop +ENDPROC(__arch_hweight8) +	.section	.popc_3insn_patch, "ax" +	.word		__arch_hweight8 +	sllx		%o0, 64-8, %g1 +	retl +	 popc		%g1, %o0 +	.previous + +ENTRY(__arch_hweight16) +	ba,pt	%xcc, __sw_hweight16 +	 nop +	nop +ENDPROC(__arch_hweight16) +	.section	.popc_3insn_patch, "ax" +	.word		__arch_hweight16 +	sllx		%o0, 64-16, %g1 +	retl +	 popc		%g1, %o0 +	.previous + +ENTRY(__arch_hweight32) +	ba,pt	%xcc, __sw_hweight32 +	 nop +	nop +ENDPROC(__arch_hweight32) +	.section	.popc_3insn_patch, "ax" +	.word		__arch_hweight32 +	sllx		%o0, 64-32, %g1 +	retl +	 popc		%g1, %o0 +	.previous + +ENTRY(__arch_hweight64) +	ba,pt	%xcc, __sw_hweight64 +	 nop +	nop +ENDPROC(__arch_hweight64) +	.section	.popc_3insn_patch, "ax" +	.word		__arch_hweight64 +	retl +	 popc		%o0, %o0 +	nop +	.previous diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index 9ef37e13a92..c4d42a50ebc 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -18,31 +18,8 @@ void ioport_unmap(void __iomem *addr)  EXPORT_SYMBOL(ioport_map);  EXPORT_SYMBOL(ioport_unmap); -/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ -	resource_size_t start = pci_resource_start(dev, bar); -	resource_size_t len = pci_resource_len(dev, bar); -	unsigned long flags = pci_resource_flags(dev, bar); - -	if (!len || !start) -		return NULL; -	if (maxlen && len > maxlen) -		len = maxlen; -	if (flags & IORESOURCE_IO) -		return ioport_map(start, len); -	if (flags & IORESOURCE_MEM) { -		if (flags & IORESOURCE_CACHEABLE) -			return ioremap(start, len); -		return ioremap_nocache(start, len); -	} -	/* What? */ -	return NULL; -} -  void pci_iounmap(struct pci_dev *dev, void __iomem * addr)  {  	/* nothing to do */  } -EXPORT_SYMBOL(pci_iomap);  EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S index 58ca5b9a877..4742d59029e 100644 --- a/arch/sparc/lib/ipcsum.S +++ b/arch/sparc/lib/ipcsum.S @@ -1,8 +1,7 @@ +#include <linux/linkage.h> +  	.text -	.align	32 -	.globl	ip_fast_csum -	.type	ip_fast_csum,#function -ip_fast_csum:	/* %o0 = iph, %o1 = ihl */ +ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */  	sub	%o1, 4, %g7  	lduw	[%o0 + 0x00], %o2  	lduw	[%o0 + 0x04], %g2 @@ -31,4 +30,4 @@ ip_fast_csum:	/* %o0 = iph, %o1 = ihl */  	set	0xffff, %o1  	retl  	 and	%o2, %o1, %o0 -	.size	ip_fast_csum, .-ip_fast_csum +ENDPROC(ip_fast_csum) diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1b30bb3bfdb..323335b9cd2 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -15,8 +15,6 @@  /* string functions */  EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(__strlen_user); -EXPORT_SYMBOL(__strnlen_user);  EXPORT_SYMBOL(strncmp);  /* mem* functions */ @@ -33,9 +31,6 @@ EXPORT_SYMBOL(memset);  EXPORT_SYMBOL(memmove);  EXPORT_SYMBOL(__bzero); -/* Moving data to/from/in userspace. */ -EXPORT_SYMBOL(__strncpy_from_user); -  /* Networking helper routines. */  EXPORT_SYMBOL(csum_partial); @@ -56,24 +51,10 @@ extern int __divdi3(int, int);  extern void (*__copy_1page)(void *, const void *);  extern void (*bzero_1page)(void *); -extern int __strncmp(const char *, const char *, __kernel_size_t); -  extern void ___rw_read_enter(void);  extern void ___rw_read_try(void);  extern void ___rw_read_exit(void);  extern void ___rw_write_enter(void); -extern void ___atomic24_add(void); -extern void ___atomic24_sub(void); - -/* Alias functions whose names begin with "." and export the aliases. - * The module references will be fixed up by module_frob_arch_sections. - */ -extern int _Div(int, int); -extern int _Mul(int, int); -extern int _Rem(int, int); -extern unsigned _Udiv(unsigned, unsigned); -extern unsigned _Umul(unsigned, unsigned); -extern unsigned _Urem(unsigned, unsigned);  /* Networking helper routines. */  EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); @@ -83,9 +64,6 @@ EXPORT_SYMBOL(__copy_1page);  EXPORT_SYMBOL(__memmove);  EXPORT_SYMBOL(bzero_1page); -/* string functions */ -EXPORT_SYMBOL(__strncmp); -  /* Moving data to/from/in userspace. */  EXPORT_SYMBOL(__copy_user); @@ -97,22 +75,11 @@ EXPORT_SYMBOL(___rw_read_exit);  EXPORT_SYMBOL(___rw_write_enter);  #endif -/* Atomic operations. */ -EXPORT_SYMBOL(___atomic24_add); -EXPORT_SYMBOL(___atomic24_sub); -  EXPORT_SYMBOL(__ashrdi3);  EXPORT_SYMBOL(__ashldi3);  EXPORT_SYMBOL(__lshrdi3);  EXPORT_SYMBOL(__muldi3);  EXPORT_SYMBOL(__divdi3); - -EXPORT_SYMBOL(_Rem); -EXPORT_SYMBOL(_Urem); -EXPORT_SYMBOL(_Mul); -EXPORT_SYMBOL(_Umul); -EXPORT_SYMBOL(_Div); -EXPORT_SYMBOL(_Udiv);  #endif  /* @@ -131,15 +98,6 @@ EXPORT_SYMBOL(___copy_from_user);  EXPORT_SYMBOL(___copy_in_user);  EXPORT_SYMBOL(__clear_user); -/* RW semaphores */ -EXPORT_SYMBOL(__down_read); -EXPORT_SYMBOL(__down_read_trylock); -EXPORT_SYMBOL(__down_write); -EXPORT_SYMBOL(__down_write_trylock); -EXPORT_SYMBOL(__up_read); -EXPORT_SYMBOL(__up_write); -EXPORT_SYMBOL(__downgrade_write); -  /* Atomic counter implementation. */  EXPORT_SYMBOL(atomic_add);  EXPORT_SYMBOL(atomic_add_ret); @@ -149,6 +107,7 @@ EXPORT_SYMBOL(atomic64_add);  EXPORT_SYMBOL(atomic64_add_ret);  EXPORT_SYMBOL(atomic64_sub);  EXPORT_SYMBOL(atomic64_sub_ret); +EXPORT_SYMBOL(atomic64_dec_if_positive);  /* Atomic bit operations. */  EXPORT_SYMBOL(test_and_set_bit); @@ -167,6 +126,10 @@ EXPORT_SYMBOL(copy_user_page);  void VISenter(void);  EXPORT_SYMBOL(VISenter); +/* CRYPTO code needs this */ +void VISenterhalf(void); +EXPORT_SYMBOL(VISenterhalf); +  extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);  extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,  		unsigned long *); diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S index 47a1354c160..60ebc7cdbee 100644 --- a/arch/sparc/lib/lshrdi3.S +++ b/arch/sparc/lib/lshrdi3.S @@ -1,6 +1,6 @@ +#include <linux/linkage.h> -	.globl	__lshrdi3 -__lshrdi3: +ENTRY(__lshrdi3)  	cmp	%o2, 0  	be	3f  	 mov	0x20, %g2 @@ -24,3 +24,4 @@ __lshrdi3:  3:  	retl   	 nop  +ENDPROC(__lshrdi3) diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S index 34fe6575173..4d8c497517b 100644 --- a/arch/sparc/lib/memcpy.S +++ b/arch/sparc/lib/memcpy.S @@ -7,40 +7,12 @@   * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)   */ -#ifdef __KERNEL__ - -#define FUNC(x) 											\ +#define FUNC(x) 		\  	.globl	x;		\  	.type	x,@function;	\ -	.align	4;											\ +	.align	4;		\  x: -#undef FASTER_REVERSE -#undef FASTER_NONALIGNED -#define FASTER_ALIGNED - -/* In kernel these functions don't return a value. - * One should use macros in asm/string.h for that purpose. - * We return 0, so that bugs are more apparent. - */ -#define SETUP_RETL -#define RETL_INSN	clr	%o0 - -#else - -/* libc */ - -#include "DEFS.h" - -#define FASTER_REVERSE -#define FASTER_NONALIGNED -#define FASTER_ALIGNED - -#define SETUP_RETL	mov	%o0, %g6 -#define RETL_INSN	mov	%g6, %o0 - -#endif -  /* Both these macros have to start with exactly the same insn */  #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \  	ldd	[%src + (offset) + 0x00], %t0; \ @@ -164,30 +136,6 @@ x:  	.text  	.align	4 -#ifdef FASTER_REVERSE - -70:	/* rdword_align */ - -	andcc		%o1, 1, %g0 -	be		4f -	 andcc		%o1, 2, %g0 - -	ldub		[%o1 - 1], %g2 -	sub		%o1, 1, %o1 -	stb		%g2, [%o0 - 1] -	sub		%o2, 1, %o2 -	be		3f -	 sub		%o0, 1, %o0 -4: -	lduh		[%o1 - 2], %g2 -	sub		%o1, 2, %o1 -	sth		%g2, [%o0 - 2] -	sub		%o2, 2, %o2 -	b		3f -	 sub		%o0, 2, %o0 - -#endif /* FASTER_REVERSE */ -  0:  	retl  	 nop		! Only bcopy returns here and it retuns void... @@ -198,7 +146,7 @@ FUNC(__memmove)  #endif  FUNC(memmove)  	cmp		%o0, %o1 -	SETUP_RETL +	mov		%o0, %g7  	bleu		9f  	 sub		%o0, %o1, %o4 @@ -207,8 +155,6 @@ FUNC(memmove)  	bleu		0f  	 andcc		%o4, 3, %o5 -#ifndef FASTER_REVERSE -  	add		%o1, %o2, %o1  	add		%o0, %o2, %o0  	sub		%o1, 1, %o1 @@ -224,295 +170,7 @@ FUNC(memmove)  	 sub		%o0, 1, %o0  	retl -	 RETL_INSN - -#else /* FASTER_REVERSE */ - -	add		%o1, %o2, %o1 -	add		%o0, %o2, %o0 -	bne		77f -	 cmp		%o2, 15 -	bleu		91f -	 andcc		%o1, 3, %g0 -	bne		70b -3: -	 andcc		%o1, 4, %g0 - -	be		2f -	 mov		%o2, %g1 - -	ld		[%o1 - 4], %o4 -	sub		%g1, 4, %g1 -	st		%o4, [%o0 - 4] -	sub		%o1, 4, %o1 -	sub		%o0, 4, %o0 -2: -	andcc		%g1, 0xffffff80, %g7 -	be		3f -	 andcc		%o0, 4, %g0 - -	be		74f + 4 -5: -	RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -	subcc		%g7, 128, %g7 -	sub		%o1, 128, %o1 -	bne		5b -	 sub		%o0, 128, %o0 -3: -	andcc		%g1, 0x70, %g7 -	be		72f -	 andcc		%g1, 8, %g0 - -	sethi		%hi(72f), %o5 -	srl		%g7, 1, %o4 -	add		%g7, %o4, %o4 -	sub		%o1, %g7, %o1 -	sub		%o5, %o4, %o5 -	jmpl		%o5 + %lo(72f), %g0 -	 sub		%o0, %g7, %o0 - -71:	/* rmemcpy_table */ -	RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) -	RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) - -72:	/* rmemcpy_table_end */ - -	be		73f -	 andcc		%g1, 4, %g0 - -	ldd		[%o1 - 0x08], %g2 -	sub		%o0, 8, %o0 -	sub		%o1, 8, %o1 -	st		%g2, [%o0] -	st		%g3, [%o0 + 0x04] - -73:	/* rmemcpy_last7 */ - -	be		1f -	 andcc		%g1, 2, %g0 - -	ld		[%o1 - 4], %g2 -	sub		%o1, 4, %o1 -	st		%g2, [%o0 - 4] -	sub		%o0, 4, %o0 -1: -	be		1f -	 andcc		%g1, 1, %g0 - -	lduh		[%o1 - 2], %g2 -	sub		%o1, 2, %o1 -	sth		%g2, [%o0 - 2] -	sub		%o0, 2, %o0 -1: -	be		1f -	 nop - -	ldub		[%o1 - 1], %g2 -	stb		%g2, [%o0 - 1] -1: -	retl - 	 RETL_INSN - -74:	/* rldd_std */ -	RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) -	RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -	subcc		%g7, 128, %g7 -	sub		%o1, 128, %o1 -	bne		74b -	 sub		%o0, 128, %o0 - -	andcc		%g1, 0x70, %g7 -	be		72b -	 andcc		%g1, 8, %g0 - -	sethi		%hi(72b), %o5 -	srl		%g7, 1, %o4 -	add		%g7, %o4, %o4 -	sub		%o1, %g7, %o1 -	sub		%o5, %o4, %o5 -	jmpl		%o5 + %lo(72b), %g0 -	 sub		%o0, %g7, %o0 - -75:	/* rshort_end */ - -	and		%o2, 0xe, %o3 -2: -	sethi		%hi(76f), %o5 -	sll		%o3, 3, %o4 -	sub		%o0, %o3, %o0 -	sub		%o5, %o4, %o5 -	sub		%o1, %o3, %o1 -	jmpl		%o5 + %lo(76f), %g0 -	 andcc		%o2, 1, %g0 - -	RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) -	RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) - -76:	/* rshort_table_end */ - -	be		1f -	 nop -	ldub		[%o1 - 1], %g2 -	stb		%g2, [%o0 - 1] -1: -	retl - 	 RETL_INSN - -91:	/* rshort_aligned_end */ - -	bne		75b -	 andcc		%o2, 8, %g0 - -	be		1f -	 andcc		%o2, 4, %g0 - -	ld		[%o1 - 0x08], %g2 -	ld		[%o1 - 0x04], %g3 -	sub		%o1, 8, %o1 -	st		%g2, [%o0 - 0x08] -	st		%g3, [%o0 - 0x04] -	sub		%o0, 8, %o0 -1: -	b		73b -	 mov		%o2, %g1 - -77:	/* rnon_aligned */ -	cmp		%o2, 15 -	bleu		75b -	 andcc		%o0, 3, %g0 -	be		64f -	 andcc		%o0, 1, %g0 -	be		63f -	 andcc		%o0, 2, %g0 -	ldub		[%o1 - 1], %g5 -	sub		%o1, 1, %o1 -	stb		%g5, [%o0 - 1] -	sub		%o0, 1, %o0 -	be		64f -	 sub		%o2, 1, %o2 -63: -	ldub		[%o1 - 1], %g5 -	sub		%o1, 2, %o1 -	stb		%g5, [%o0 - 1] -	sub		%o0, 2, %o0 -	ldub		[%o1], %g5 -	sub		%o2, 2, %o2 -	stb		%g5, [%o0] -64:	 -	and		%o1, 3, %g2 -	and		%o1, -4, %o1 -	and		%o2, 0xc, %g3 -	add		%o1, 4, %o1 -	cmp		%g3, 4 -	sll		%g2, 3, %g4 -	mov		32, %g2 -	be		4f -	 sub		%g2, %g4, %g7 - -	blu		3f -	 cmp		%g3, 8 - -	be		2f -	 srl		%o2, 2, %g3 - -	ld		[%o1 - 4], %o3 -	add		%o0, -8, %o0 -	ld		[%o1 - 8], %o4 -	add		%o1, -16, %o1 -	b		7f -	 add		%g3, 1, %g3 -2: -	ld		[%o1 - 4], %o4 -	add		%o0, -4, %o0 -	ld		[%o1 - 8], %g1 -	add		%o1, -12, %o1 -	b		8f -	 add		%g3, 2, %g3 -3: -	ld		[%o1 - 4], %o5 -	add		%o0, -12, %o0 -	ld		[%o1 - 8], %o3 -	add		%o1, -20, %o1 -	b		6f -	 srl		%o2, 2, %g3 -4: -	ld		[%o1 - 4], %g1 -	srl		%o2, 2, %g3 -	ld		[%o1 - 8], %o5 -	add		%o1, -24, %o1 -	add		%o0, -16, %o0 -	add		%g3, -1, %g3 - -	ld		[%o1 + 12], %o3 -5: -	sll		%o5, %g4, %g2 -	srl		%g1, %g7, %g5 -	or		%g2, %g5, %g2 -	st		%g2, [%o0 + 12] -6: -	ld		[%o1 + 8], %o4 -	sll		%o3, %g4, %g2 -	srl		%o5, %g7, %g5 -	or		%g2, %g5, %g2 -	st		%g2, [%o0 + 8] -7: -	ld		[%o1 + 4], %g1 -	sll		%o4, %g4, %g2 -	srl		%o3, %g7, %g5 -	or		%g2, %g5, %g2 -	st		%g2, [%o0 + 4] -8: -	ld		[%o1], %o5 -	sll		%g1, %g4, %g2 -	srl		%o4, %g7, %g5 -	addcc		%g3, -4, %g3 -	or		%g2, %g5, %g2 -	add		%o1, -16, %o1 -	st		%g2, [%o0] -	add		%o0, -16, %o0 -	bne,a		5b	 -	 ld		[%o1 + 12], %o3 -	sll		%o5, %g4, %g2 -	srl		%g1, %g7, %g5 -	srl		%g4, 3, %g3 -	or		%g2, %g5, %g2 -	add		%o1, %g3, %o1 -	andcc		%o2, 2, %g0 -	st		%g2, [%o0 + 12] -	be		1f -	 andcc		%o2, 1, %g0 -	 -	ldub		[%o1 + 15], %g5 -	add		%o1, -2, %o1 -	stb		%g5, [%o0 + 11] -	add		%o0, -2, %o0 -	ldub		[%o1 + 16], %g5 -	stb		%g5, [%o0 + 12] -1: -	be		1f -	 nop -	ldub		[%o1 + 15], %g5 -	stb		%g5, [%o0 + 11] -1: -	retl -	 RETL_INSN - -#endif /* FASTER_REVERSE */ +	 mov		%g7, %o0  /* NOTE: This code is executed just for the cases,           where %src (=%o1) & 3 is != 0. @@ -546,7 +204,7 @@ FUNC(memmove)  FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	sub		%o0, %o1, %o4 -	SETUP_RETL +	mov		%o0, %g7  9:  	andcc		%o4, 3, %o5  0: @@ -569,7 +227,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	add		%o1, 4, %o1  	add		%o0, 4, %o0  2: -	andcc		%g1, 0xffffff80, %g7 +	andcc		%g1, 0xffffff80, %g0  	be		3f  	 andcc		%o0, 4, %g0 @@ -579,22 +237,23 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)  	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)  	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -	subcc		%g7, 128, %g7 +	sub		%g1, 128, %g1  	add		%o1, 128, %o1 -	bne		5b +	cmp		%g1, 128 +	bge		5b  	 add		%o0, 128, %o0  3: -	andcc		%g1, 0x70, %g7 +	andcc		%g1, 0x70, %g4  	be		80f  	 andcc		%g1, 8, %g0  	sethi		%hi(80f), %o5 -	srl		%g7, 1, %o4 -	add		%g7, %o4, %o4 -	add		%o1, %g7, %o1 +	srl		%g4, 1, %o4 +	add		%g4, %o4, %o4 +	add		%o1, %g4, %o1  	sub		%o5, %o4, %o5  	jmpl		%o5 + %lo(80f), %g0 -	 add		%o0, %g7, %o0 +	 add		%o0, %g4, %o0  79:	/* memcpy_table */ @@ -641,43 +300,28 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	stb		%g2, [%o0]  1:  	retl - 	 RETL_INSN +	 mov		%g7, %o0  82:	/* ldd_std */  	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)  	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)  	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)  	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) -	subcc		%g7, 128, %g7 +	subcc		%g1, 128, %g1  	add		%o1, 128, %o1 -	bne		82b +	cmp		%g1, 128 +	bge		82b  	 add		%o0, 128, %o0 -#ifndef FASTER_ALIGNED - -	andcc		%g1, 0x70, %g7 -	be		80b -	 andcc		%g1, 8, %g0 - -	sethi		%hi(80b), %o5 -	srl		%g7, 1, %o4 -	add		%g7, %o4, %o4 -	add		%o1, %g7, %o1 -	sub		%o5, %o4, %o5 -	jmpl		%o5 + %lo(80b), %g0 -	 add		%o0, %g7, %o0 - -#else /* FASTER_ALIGNED */ - -	andcc		%g1, 0x70, %g7 +	andcc		%g1, 0x70, %g4  	be		84f  	 andcc		%g1, 8, %g0  	sethi		%hi(84f), %o5 -	add		%o1, %g7, %o1 -	sub		%o5, %g7, %o5 +	add		%o1, %g4, %o1 +	sub		%o5, %g4, %o5  	jmpl		%o5 + %lo(84f), %g0 -	 add		%o0, %g7, %o0 +	 add		%o0, %g4, %o0  83:	/* amemcpy_table */ @@ -721,382 +365,132 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	stb		%g2, [%o0]  1:  	retl - 	 RETL_INSN - -#endif /* FASTER_ALIGNED */ +	 mov		%g7, %o0  86:	/* non_aligned */  	cmp		%o2, 6  	bleu		88f +	 nop -#ifdef FASTER_NONALIGNED - -	 cmp		%o2, 256 -	bcc		87f - -#endif /* FASTER_NONALIGNED */ - -	 andcc		%o0, 3, %g0 +	save		%sp, -96, %sp +	andcc		%i0, 3, %g0  	be		61f -	 andcc		%o0, 1, %g0 +	 andcc		%i0, 1, %g0  	be		60f -	 andcc		%o0, 2, %g0 +	 andcc		%i0, 2, %g0 -	ldub		[%o1], %g5 -	add		%o1, 1, %o1 -	stb		%g5, [%o0] -	sub		%o2, 1, %o2 +	ldub		[%i1], %g5 +	add		%i1, 1, %i1 +	stb		%g5, [%i0] +	sub		%i2, 1, %i2  	bne		61f -	 add		%o0, 1, %o0 +	 add		%i0, 1, %i0  60: -	ldub		[%o1], %g3 -	add		%o1, 2, %o1 -	stb		%g3, [%o0] -	sub		%o2, 2, %o2 -	ldub		[%o1 - 1], %g3 -	add		%o0, 2, %o0 -	stb		%g3, [%o0 - 1] +	ldub		[%i1], %g3 +	add		%i1, 2, %i1 +	stb		%g3, [%i0] +	sub		%i2, 2, %i2 +	ldub		[%i1 - 1], %g3 +	add		%i0, 2, %i0 +	stb		%g3, [%i0 - 1]  61: -	and		%o1, 3, %g2 -	and		%o2, 0xc, %g3 -	and		%o1, -4, %o1 +	and		%i1, 3, %g2 +	and		%i2, 0xc, %g3 +	and		%i1, -4, %i1  	cmp		%g3, 4  	sll		%g2, 3, %g4  	mov		32, %g2  	be		4f -	 sub		%g2, %g4, %g7 +	 sub		%g2, %g4, %l0  	blu		3f  	 cmp		%g3, 0x8  	be		2f -	 srl		%o2, 2, %g3 +	 srl		%i2, 2, %g3 -	ld		[%o1], %o3 -	add		%o0, -8, %o0 -	ld		[%o1 + 4], %o4 +	ld		[%i1], %i3 +	add		%i0, -8, %i0 +	ld		[%i1 + 4], %i4  	b		8f  	 add		%g3, 1, %g3  2: -	ld		[%o1], %o4 -	add		%o0, -12, %o0 -	ld		[%o1 + 4], %o5 +	ld		[%i1], %i4 +	add		%i0, -12, %i0 +	ld		[%i1 + 4], %i5  	add		%g3, 2, %g3  	b		9f -	 add		%o1, -4, %o1 +	 add		%i1, -4, %i1  3: -	ld		[%o1], %g1 -	add		%o0, -4, %o0 -	ld		[%o1 + 4], %o3 -	srl		%o2, 2, %g3 +	ld		[%i1], %g1 +	add		%i0, -4, %i0 +	ld		[%i1 + 4], %i3 +	srl		%i2, 2, %g3  	b		7f -	 add		%o1, 4, %o1 +	 add		%i1, 4, %i1  4: -	ld		[%o1], %o5 -	cmp		%o2, 7 -	ld		[%o1 + 4], %g1 -	srl		%o2, 2, %g3 +	ld		[%i1], %i5 +	cmp		%i2, 7 +	ld		[%i1 + 4], %g1 +	srl		%i2, 2, %g3  	bleu		10f -	 add		%o1, 8, %o1 +	 add		%i1, 8, %i1 -	ld		[%o1], %o3 +	ld		[%i1], %i3  	add		%g3, -1, %g3  5: -	sll		%o5, %g4, %g2 -	srl		%g1, %g7, %g5 +	sll		%i5, %g4, %g2 +	srl		%g1, %l0, %g5  	or		%g2, %g5, %g2 -	st		%g2, [%o0] +	st		%g2, [%i0]  7: -	ld		[%o1 + 4], %o4 +	ld		[%i1 + 4], %i4  	sll		%g1, %g4, %g2 -	srl		%o3, %g7, %g5 +	srl		%i3, %l0, %g5  	or		%g2, %g5, %g2 -	st		%g2, [%o0 + 4] +	st		%g2, [%i0 + 4]  8: -	ld		[%o1 + 8], %o5 -	sll		%o3, %g4, %g2 -	srl		%o4, %g7, %g5 +	ld		[%i1 + 8], %i5 +	sll		%i3, %g4, %g2 +	srl		%i4, %l0, %g5  	or		%g2, %g5, %g2 -	st		%g2, [%o0 + 8] +	st		%g2, [%i0 + 8]  9: -	ld		[%o1 + 12], %g1 -	sll		%o4, %g4, %g2 -	srl		%o5, %g7, %g5 +	ld		[%i1 + 12], %g1 +	sll		%i4, %g4, %g2 +	srl		%i5, %l0, %g5  	addcc		%g3, -4, %g3  	or		%g2, %g5, %g2 -	add		%o1, 16, %o1 -	st		%g2, [%o0 + 12] -	add		%o0, 16, %o0 +	add		%i1, 16, %i1 +	st		%g2, [%i0 + 12] +	add		%i0, 16, %i0  	bne,a		5b -	 ld		[%o1], %o3 +	 ld		[%i1], %i3  10: -	sll		%o5, %g4, %g2 -	srl		%g1, %g7, %g5 -	srl		%g7, 3, %g3 +	sll		%i5, %g4, %g2 +	srl		%g1, %l0, %g5 +	srl		%l0, 3, %g3  	or		%g2, %g5, %g2 -	sub		%o1, %g3, %o1 -	andcc		%o2, 2, %g0 -	st		%g2, [%o0] +	sub		%i1, %g3, %i1 +	andcc		%i2, 2, %g0 +	st		%g2, [%i0]  	be		1f -	 andcc		%o2, 1, %g0 - -	ldub		[%o1], %g2 -	add		%o1, 2, %o1 -	stb		%g2, [%o0 + 4] -	add		%o0, 2, %o0 -	ldub		[%o1 - 1], %g2 -	stb		%g2, [%o0 + 3] +	 andcc		%i2, 1, %g0 + +	ldub		[%i1], %g2 +	add		%i1, 2, %i1 +	stb		%g2, [%i0 + 4] +	add		%i0, 2, %i0 +	ldub		[%i1 - 1], %g2 +	stb		%g2, [%i0 + 3]  1:  	be		1f  	 nop -	ldub		[%o1], %g2 -	stb		%g2, [%o0 + 4] -1: -	retl -	 RETL_INSN - -#ifdef FASTER_NONALIGNED - -87:	/* faster_nonaligned */ - -	andcc		%o1, 3, %g0 -	be		3f -	 andcc		%o1, 1, %g0 - -	be		4f -	 andcc		%o1, 2, %g0 - -	ldub		[%o1], %g2 -	add		%o1, 1, %o1 -	stb		%g2, [%o0] -	sub		%o2, 1, %o2 -	bne		3f -	 add		%o0, 1, %o0 -4: -	lduh		[%o1], %g2 -	add		%o1, 2, %o1 -	srl		%g2, 8, %g3 -	sub		%o2, 2, %o2 -	stb		%g3, [%o0] -	add		%o0, 2, %o0 -	stb		%g2, [%o0 - 1] -3: -	 andcc		%o1, 4, %g0 - -	bne		2f -	 cmp		%o5, 1 - -	ld		[%o1], %o4 -	srl		%o4, 24, %g2 -	stb		%g2, [%o0] -	srl		%o4, 16, %g3 -	stb		%g3, [%o0 + 1] -	srl		%o4, 8, %g2 -	stb		%g2, [%o0 + 2] -	sub		%o2, 4, %o2 -	stb		%o4, [%o0 + 3] -	add		%o1, 4, %o1 -	add		%o0, 4, %o0 -2: -	be		33f -	 cmp		%o5, 2 -	be		32f -	 sub		%o2, 4, %o2 -31: -	ld		[%o1], %g2 -	add		%o1, 4, %o1 -	srl		%g2, 24, %g3 -	and		%o0, 7, %g5 -	stb		%g3, [%o0] -	cmp		%g5, 7 -	sll		%g2, 8, %g1 -	add		%o0, 4, %o0 -	be		41f -	 and		%o2, 0xffffffc0, %o3 -	ld		[%o0 - 7], %o4 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		4b -	 add		%o0, 64, %o0 - -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 16, %g2 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 16, %g2 -1: -	st		%o4, [%o0 - 7] -	sth		%g2, [%o0 - 3] -	srl		%g1, 8, %g4 -	b		88f -	 stb		%g4, [%o0 - 1] -32: -	ld		[%o1], %g2 -	add		%o1, 4, %o1 -	srl		%g2, 16, %g3 -	and		%o0, 7, %g5 -	sth		%g3, [%o0] -	cmp		%g5, 6 -	sll		%g2, 16, %g1 -	add		%o0, 4, %o0 -	be		42f -	 and		%o2, 0xffffffc0, %o3 -	ld		[%o0 - 6], %o4 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		4b -	 add		%o0, 64, %o0 - -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 16, %g2 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 16, %g2 -1: -	st		%o4, [%o0 - 6] -	b		88f -	 sth		%g2, [%o0 - 2] -33: -	ld		[%o1], %g2 -	sub		%o2, 4, %o2 -	srl		%g2, 24, %g3 -	and		%o0, 7, %g5 -	stb		%g3, [%o0] -	cmp		%g5, 5 -	srl		%g2, 8, %g4 -	sll		%g2, 24, %g1 -	sth		%g4, [%o0 + 1] -	add		%o1, 4, %o1 -	be		43f -	 and		%o2, 0xffffffc0, %o3 - -	ld		[%o0 - 1], %o4 -	add		%o0, 4, %o0 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) -	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) -	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) -	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		4b -	 add		%o0, 64, %o0 - -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 24, %g2 -4: -	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 24, %g2 -1: -	st		%o4, [%o0 - 5] -	b		88f -	 stb		%g2, [%o0 - 1] -41: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		41b -	 add		%o0, 64, %o0 -	  -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 16, %g2 -4: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 16, %g2 +	ldub		[%i1], %g2 +	stb		%g2, [%i0 + 4]  1: -	sth		%g2, [%o0 - 3] -	srl		%g1, 8, %g4 -	b		88f -	 stb		%g4, [%o0 - 1] -43: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) -	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		43b -	 add		%o0, 64, %o0 - -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 24, %g2 -4: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 24, %g2 -1: -	stb		%g2, [%o0 + 3] -	b		88f -	 add		%o0, 4, %o0 -42: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	subcc		%o3, 64, %o3 -	add		%o1, 64, %o1 -	bne		42b -	 add		%o0, 64, %o0 -	  -	andcc		%o2, 0x30, %o3 -	be,a		1f -	 srl		%g1, 16, %g2 -4: -	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2) -	subcc		%o3, 16, %o3 -	add		%o1, 16, %o1 -	bne		4b -	 add		%o0, 16, %o0 - -	srl		%g1, 16, %g2 -1: -	sth		%g2, [%o0 - 2] - -	/* Fall through */ -	  -#endif /* FASTER_NONALIGNED */ +	ret +	 restore	%g7, %g0, %o0  88:	/* short_end */ @@ -1127,7 +521,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */  	stb		%g2, [%o0]  1:  	retl - 	 RETL_INSN +	 mov		%g7, %o0  90:	/* short_aligned_end */  	bne		88b diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index 97395802c23..b7f6334e159 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -4,11 +4,10 @@   * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)   */ +#include <linux/linkage.h> +  	.text -	.align		32 -	.globl		memmove -	.type		memmove,#function -memmove:		/* o0=dst o1=src o2=len */ +ENTRY(memmove) /* o0=dst o1=src o2=len */  	mov		%o0, %g1  	cmp		%o0, %o1  	bleu,pt		%xcc, memcpy @@ -28,4 +27,4 @@ memmove:		/* o0=dst o1=src o2=len */  	retl  	 mov		%g1, %o0 -	.size		memmove, .-memmove +ENDPROC(memmove) diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S deleted file mode 100644 index c45470d0b0c..00000000000 --- a/arch/sparc/lib/mul.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * mul.S:       This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - -/* - * Signed multiply, from Appendix E of the Sparc Version 8 - * Architecture Manual. - */ - -/* - * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of - * the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. - */ - -	.globl .mul -	.globl _Mul -.mul: -_Mul:	/* needed for export */ -	mov	%o0, %y		! multiplier -> Y -	andncc	%o0, 0xfff, %g0	! test bits 12..31 -	be	Lmul_shortway	! if zero, can do it the short way -	 andcc	%g0, %g0, %o4	! zero the partial product and clear N and V - -	/* -	 * Long multiply.  32 steps, followed by a final shift step. -	 */ -	mulscc	%o4, %o1, %o4	! 1 -	mulscc	%o4, %o1, %o4	! 2 -	mulscc	%o4, %o1, %o4	! 3 -	mulscc	%o4, %o1, %o4	! 4 -	mulscc	%o4, %o1, %o4	! 5 -	mulscc	%o4, %o1, %o4	! 6 -	mulscc	%o4, %o1, %o4	! 7 -	mulscc	%o4, %o1, %o4	! 8 -	mulscc	%o4, %o1, %o4	! 9 -	mulscc	%o4, %o1, %o4	! 10 -	mulscc	%o4, %o1, %o4	! 11 -	mulscc	%o4, %o1, %o4	! 12 -	mulscc	%o4, %o1, %o4	! 13 -	mulscc	%o4, %o1, %o4	! 14 -	mulscc	%o4, %o1, %o4	! 15 -	mulscc	%o4, %o1, %o4	! 16 -	mulscc	%o4, %o1, %o4	! 17 -	mulscc	%o4, %o1, %o4	! 18 -	mulscc	%o4, %o1, %o4	! 19 -	mulscc	%o4, %o1, %o4	! 20 -	mulscc	%o4, %o1, %o4	! 21 -	mulscc	%o4, %o1, %o4	! 22 -	mulscc	%o4, %o1, %o4	! 23 -	mulscc	%o4, %o1, %o4	! 24 -	mulscc	%o4, %o1, %o4	! 25 -	mulscc	%o4, %o1, %o4	! 26 -	mulscc	%o4, %o1, %o4	! 27 -	mulscc	%o4, %o1, %o4	! 28 -	mulscc	%o4, %o1, %o4	! 29 -	mulscc	%o4, %o1, %o4	! 30 -	mulscc	%o4, %o1, %o4	! 31 -	mulscc	%o4, %o1, %o4	! 32 -	mulscc	%o4, %g0, %o4	! final shift - -	! If %o0 was negative, the result is -	!	(%o0 * %o1) + (%o1 << 32)) -	! We fix that here. - -#if 0 -	tst	%o0 -	bge	1f -	 rd	%y, %o0 - -	! %o0 was indeed negative; fix upper 32 bits of result by subtracting  -	! %o1 (i.e., return %o4 - %o1 in %o1). -	retl -	 sub	%o4, %o1, %o1 - -1: -	retl -	 mov	%o4, %o1 -#else -	/* Faster code adapted from tege@sics.se's code for umul.S.  */ -	sra	%o0, 31, %o2	! make mask from sign bit -	and	%o1, %o2, %o2	! %o2 = 0 or %o1, depending on sign of %o0 -	rd	%y, %o0		! get lower half of product -	retl -	 sub	%o4, %o2, %o1	! subtract compensation  -				!  and put upper half in place -#endif - -Lmul_shortway: -	/* -	 * Short multiply.  12 steps, followed by a final shift step. -	 * The resulting bits are off by 12 and (32-12) = 20 bit positions, -	 * but there is no problem with %o0 being negative (unlike above). -	 */ -	mulscc	%o4, %o1, %o4	! 1 -	mulscc	%o4, %o1, %o4	! 2 -	mulscc	%o4, %o1, %o4	! 3 -	mulscc	%o4, %o1, %o4	! 4 -	mulscc	%o4, %o1, %o4	! 5 -	mulscc	%o4, %o1, %o4	! 6 -	mulscc	%o4, %o1, %o4	! 7 -	mulscc	%o4, %o1, %o4	! 8 -	mulscc	%o4, %o1, %o4	! 9 -	mulscc	%o4, %o1, %o4	! 10 -	mulscc	%o4, %o1, %o4	! 11 -	mulscc	%o4, %o1, %o4	! 12 -	mulscc	%o4, %g0, %o4	! final shift - -	/* -	 *  %o4 has 20 of the bits that should be in the low part of the -	 * result; %y has the bottom 12 (as %y's top 12).  That is: -	 * -	 *	  %o4		    %y -	 * +----------------+----------------+ -	 * | -12- |   -20-  | -12- |   -20-  | -	 * +------(---------+------)---------+ -	 *  --hi-- ----low-part---- -	 * -	 * The upper 12 bits of %o4 should be sign-extended to form the -	 * high part of the product (i.e., highpart = %o4 >> 20). -	 */ - -	rd	%y, %o5 -	sll	%o4, 12, %o0	! shift middle bits left 12 -	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left -	or	%o5, %o0, %o0	! construct low part of result -	retl -	 sra	%o4, 20, %o1	! ... and extract high part of result - -	.globl	.mul_patch -.mul_patch: -	smul	%o0, %o1, %o0 -	retl -	 rd	%y, %o1 -	nop diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S index 7f17872d060..9794939d1c1 100644 --- a/arch/sparc/lib/muldi3.S +++ b/arch/sparc/lib/muldi3.S @@ -63,12 +63,12 @@ __muldi3:  	rd  %y, %o1  	mov  %o1, %l3  	mov  %i1, %o0 -	call  .umul  	mov  %i2, %o1 +	umul %o0, %o1, %o0  	mov  %o0, %l0  	mov  %i0, %o0 -	call  .umul  	mov  %i3, %o1 +	umul %o0, %o1, %o0  	add  %l0, %o0, %l0  	mov  %l2, %i0  	add  %l2, %l0, %i0 diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S deleted file mode 100644 index 42fb8625281..00000000000 --- a/arch/sparc/lib/rem.S +++ /dev/null @@ -1,384 +0,0 @@ -/* - * rem.S:       This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - *  .rem	name of function to generate - *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - *  true		true=true => signed; true=false => unsigned - * - * Algorithm parameters: - *  N		how many bits per iteration we try to get (4) - *  WORDSIZE	total number of bits (32) - * - * Derived constants: - *  TOPBITS	number of bits in the top decade of a number - * - * Important variables: - *  Q		the partial quotient under development (initially 0) - *  R		the remainder so far, initially the dividend - *  ITER	number of main division loop iterations required; - *		equal to ceil(log2(quotient) / N).  Note that this - *		is the log base (2^N) of the quotient. - *  V		the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - *  Current estimate for non-large dividend is - *	ceil(log2(quotient) / N) * (10 + 7N/2) + C - *  A large dividend is one greater than 2^(31-TOPBITS) and takes a - *  different path, as the upper bits of the quotient must be developed - *  one bit at a time. - */ - - -	.globl .rem -	.globl _Rem -.rem: -_Rem:	/* needed for export */ -	! compute sign of result; if neither is negative, no problem -	orcc	%o1, %o0, %g0	! either negative? -	bge	2f			! no, go do the divide -	 mov	%o0, %g2	! compute sign in any case - -	tst	%o1 -	bge	1f -	 tst	%o0 -	! %o1 is definitely negative; %o0 might also be negative -	bge	2f			! if %o0 not negative... -	 sub	%g0, %o1, %o1	! in any case, make %o1 nonneg -1:	! %o0 is negative, %o1 is nonnegative -	sub	%g0, %o0, %o0	! make %o0 nonnegative -2: - -	! Ready to divide.  Compute size of quotient; scale comparand. -	orcc	%o1, %g0, %o5 -	bne	1f -	 mov	%o0, %o3 - -		! Divide by zero trap.  If it returns, return 0 (about as -		! wrong as possible, but that is what SunOS does...). -		ta	ST_DIV0 -		retl -		 clr	%o0 - -1: -	cmp	%o3, %o5			! if %o1 exceeds %o0, done -	blu	Lgot_result		! (and algorithm fails otherwise) -	 clr	%o2 - -	sethi	%hi(1 << (32 - 4 - 1)), %g1 - -	cmp	%o3, %g1 -	blu	Lnot_really_big -	 clr	%o4 - -	! Here the dividend is >= 2**(31-N) or so.  We must be careful here, -	! as our usual N-at-a-shot divide step will cause overflow and havoc. -	! The number of bits in the result here is N*ITER+SC, where SC <= N. -	! Compute ITER in an unorthodox manner: know we need to shift V into -	! the top decade: so do not even bother to compare to R. -	1: -		cmp	%o5, %g1 -		bgeu	3f -		 mov	1, %g7 - -		sll	%o5, 4, %o5 - -		b	1b -		 add	%o4, 1, %o4 - -	! Now compute %g7. -	2: -		addcc	%o5, %o5, %o5 - -		bcc	Lnot_too_big -		 add	%g7, 1, %g7 - -		! We get here if the %o1 overflowed while shifting. -		! This means that %o3 has the high-order bit set. -		! Restore %o5 and subtract from %o3. -		sll	%g1, 4, %g1	! high order bit -		srl	%o5, 1, %o5		! rest of %o5 -		add	%o5, %g1, %o5 - -		b	Ldo_single_div -		 sub	%g7, 1, %g7 - -	Lnot_too_big: -	3: -		cmp	%o5, %o3 -		blu	2b -		 nop - -		be	Ldo_single_div -		 nop -	/* NB: these are commented out in the V8-Sparc manual as well */ -	/* (I do not understand this) */ -	! %o5 > %o3: went too far: back up 1 step -	!	srl	%o5, 1, %o5 -	!	dec	%g7 -	! do single-bit divide steps -	! -	! We have to be careful here.  We know that %o3 >= %o5, so we can do the -	! first divide step without thinking.  BUT, the others are conditional, -	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high- -	! order bit set in the first step, just falling into the regular -	! division loop will mess up the first time around. -	! So we unroll slightly... -	Ldo_single_div: -		subcc	%g7, 1, %g7 -		bl	Lend_regular_divide -		 nop - -		sub	%o3, %o5, %o3 -		mov	1, %o2 - -		b	Lend_single_divloop -		 nop -	Lsingle_divloop: -		sll	%o2, 1, %o2 - -		bl	1f -		 srl	%o5, 1, %o5 -		! %o3 >= 0 -		sub	%o3, %o5, %o3 - -		b	2f -		 add	%o2, 1, %o2 -	1:	! %o3 < 0 -		add	%o3, %o5, %o3 -		sub	%o2, 1, %o2 -	2: -	Lend_single_divloop: -		subcc	%g7, 1, %g7 -		bge	Lsingle_divloop -		 tst	%o3 - -		b,a	Lend_regular_divide - -Lnot_really_big: -1: -	sll	%o5, 4, %o5 -	cmp	%o5, %o3 -	bleu	1b -	 addcc	%o4, 1, %o4 -	be	Lgot_result -	 sub	%o4, 1, %o4 - -	tst	%o3	! set up for initial iteration -Ldivloop: -	sll	%o2, 4, %o2 -		! depth 1, accumulated bits 0 -	bl	L.1.16 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 2, accumulated bits 1 -	bl	L.2.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 3 -	bl	L.3.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 7 -	bl	L.4.23 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 - -	b	9f -	 add	%o2, (7*2+1), %o2 -	 -L.4.23: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2-1), %o2 -	 -L.3.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 5 -	bl	L.4.21 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2+1), %o2 -	 -L.4.21: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2-1), %o2 -	 -L.2.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 1 -	bl	L.3.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 3 -	bl	L.4.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2+1), %o2 - -L.4.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2-1), %o2 - -L.3.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 1 -	bl	L.4.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2+1), %o2 - -L.4.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2-1), %o2 - -L.1.16: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 2, accumulated bits -1 -	bl	L.2.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -1 -	bl	L.3.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -1 -	bl	L.4.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2+1), %o2 - -L.4.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2-1), %o2 - -L.3.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -3 -	bl	L.4.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2+1), %o2 - -L.4.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2-1), %o2 - -L.2.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -3 -	bl	L.3.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -5 -	bl	L.4.11 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2+1), %o2 - -L.4.11: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2-1), %o2 - - -L.3.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -7 -	bl	L.4.9 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2+1), %o2 - -L.4.9: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2-1), %o2 - -	9: -Lend_regular_divide: -	subcc	%o4, 1, %o4 -	bge	Ldivloop -	 tst	%o3 - -	bl,a	Lgot_result -	! non-restoring fixup here (one instruction only!) -	add	%o3, %o1, %o3 - -Lgot_result: -	! check to see if answer should be < 0 -	tst	%g2 -	bl,a	1f -	 sub %g0, %o3, %o3 -1: -	retl -	 mov %o3, %o0 - -	.globl	.rem_patch -.rem_patch: -	sra	%o0, 0x1f, %o4 -	wr	%o4, 0x0, %y -	nop -	nop -	nop -	sdivcc	%o0, %o1, %o2 -	bvs,a	1f -	 xnor	%o2, %g0, %o2 -1:	smul	%o2, %o1, %o2 -	retl -	 sub	%o0, %o2, %o0 -	nop diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S deleted file mode 100644 index 9675268e7fd..00000000000 --- a/arch/sparc/lib/rwsem_32.S +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Assembly part of rw semaphores. - * - * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - -	.section .sched.text, "ax" -	.align	4 - -	.globl		___down_read -___down_read: -	rd		%psr, %g3 -	nop -	nop -	nop -	or		%g3, PSR_PIL, %g7 -	wr		%g7, 0, %psr -	nop -	nop -	nop -#ifdef CONFIG_SMP -1:	ldstub		[%g1 + 4], %g7 -	tst		%g7 -	bne		1b -	 ld		[%g1], %g7 -	sub		%g7, 1, %g7 -	st		%g7, [%g1] -	stb		%g0, [%g1 + 4] -#else -	ld		[%g1], %g7 -	sub		%g7, 1, %g7 -	st		%g7, [%g1] -#endif -	wr		%g3, 0, %psr -	add		%g7, 1, %g7 -	nop -	nop -	subcc		%g7, 1, %g7 -	bneg		3f -	 nop -2:	jmpl		%o7, %g0 -	 mov		%g4, %o7 -3:	save		%sp, -64, %sp -	mov		%g1, %l1 -	mov		%g4, %l4 -	bcs		4f -	 mov		%g5, %l5 -	call		down_read_failed -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		___down_read -	 restore	%l5, %g0, %g5 -4:	call		down_read_failed_biased -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		2b -	 restore	%l5, %g0, %g5 - -	.globl		___down_write -___down_write: -	rd		%psr, %g3 -	nop -	nop -	nop -	or		%g3, PSR_PIL, %g7 -	wr		%g7, 0, %psr -	sethi		%hi(0x01000000), %g2 -	nop -	nop -#ifdef CONFIG_SMP -1:	ldstub		[%g1 + 4], %g7 -	tst		%g7 -	bne		1b -	 ld		[%g1], %g7 -	sub		%g7, %g2, %g7 -	st		%g7, [%g1] -	stb		%g0, [%g1 + 4] -#else -	ld		[%g1], %g7 -	sub		%g7, %g2, %g7 -	st		%g7, [%g1] -#endif -	wr		%g3, 0, %psr -	add		%g7, %g2, %g7 -	nop -	nop -	subcc		%g7, %g2, %g7 -	bne		3f -	 nop -2:	jmpl		%o7, %g0 -	 mov		%g4, %o7 -3:	save		%sp, -64, %sp -	mov		%g1, %l1 -	mov		%g4, %l4 -	bcs		4f -	 mov		%g5, %l5 -	call		down_write_failed -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		___down_write -	 restore	%l5, %g0, %g5 -4:	call		down_write_failed_biased -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		2b -	 restore	%l5, %g0, %g5 - -	.text -	.globl		___up_read -___up_read: -	rd		%psr, %g3 -	nop -	nop -	nop -	or		%g3, PSR_PIL, %g7 -	wr		%g7, 0, %psr -	nop -	nop -	nop -#ifdef CONFIG_SMP -1:	ldstub		[%g1 + 4], %g7 -	tst		%g7 -	bne		1b -	 ld		[%g1], %g7 -	add		%g7, 1, %g7 -	st		%g7, [%g1] -	stb		%g0, [%g1 + 4] -#else -	ld		[%g1], %g7 -	add		%g7, 1, %g7 -	st		%g7, [%g1] -#endif -	wr		%g3, 0, %psr -	nop -	nop -	nop -	cmp		%g7, 0 -	be		3f -	 nop -2:	jmpl		%o7, %g0 -	 mov		%g4, %o7 -3:	save		%sp, -64, %sp -	mov		%g1, %l1 -	mov		%g4, %l4 -	mov		%g5, %l5 -	clr		%o1 -	call		__rwsem_wake -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		2b -	 restore	%l5, %g0, %g5 - -	.globl		___up_write -___up_write: -	rd		%psr, %g3 -	nop -	nop -	nop -	or		%g3, PSR_PIL, %g7 -	wr		%g7, 0, %psr -	sethi		%hi(0x01000000), %g2 -	nop -	nop -#ifdef CONFIG_SMP -1:	ldstub		[%g1 + 4], %g7 -	tst		%g7 -	bne		1b -	 ld		[%g1], %g7 -	add		%g7, %g2, %g7 -	st		%g7, [%g1] -	stb		%g0, [%g1 + 4] -#else -	ld		[%g1], %g7 -	add		%g7, %g2, %g7 -	st		%g7, [%g1] -#endif -	wr		%g3, 0, %psr -	sub		%g7, %g2, %g7 -	nop -	nop -	addcc		%g7, %g2, %g7 -	bcs		3f -	 nop -2:	jmpl		%o7, %g0 -	 mov		%g4, %o7 -3:	save		%sp, -64, %sp -	mov		%g1, %l1 -	mov		%g4, %l4 -	mov		%g5, %l5 -	mov		%g7, %o1 -	call		__rwsem_wake -	 mov		%l1, %o0 -	mov		%l1, %g1 -	mov		%l4, %g4 -	ba		2b -	 restore	%l5, %g0, %g5 diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S deleted file mode 100644 index f0a0d4e4db7..00000000000 --- a/arch/sparc/lib/sdiv.S +++ /dev/null @@ -1,381 +0,0 @@ -/* - * sdiv.S:      This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - *  .div	name of function to generate - *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1 - *  true		true=true => signed; true=false => unsigned - * - * Algorithm parameters: - *  N		how many bits per iteration we try to get (4) - *  WORDSIZE	total number of bits (32) - * - * Derived constants: - *  TOPBITS	number of bits in the top decade of a number - * - * Important variables: - *  Q		the partial quotient under development (initially 0) - *  R		the remainder so far, initially the dividend - *  ITER	number of main division loop iterations required; - *		equal to ceil(log2(quotient) / N).  Note that this - *		is the log base (2^N) of the quotient. - *  V		the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - *  Current estimate for non-large dividend is - *	ceil(log2(quotient) / N) * (10 + 7N/2) + C - *  A large dividend is one greater than 2^(31-TOPBITS) and takes a - *  different path, as the upper bits of the quotient must be developed - *  one bit at a time. - */ - - -	.globl .div -	.globl _Div -.div: -_Div:	/* needed for export */ -	! compute sign of result; if neither is negative, no problem -	orcc	%o1, %o0, %g0	! either negative? -	bge	2f			! no, go do the divide -	 xor	%o1, %o0, %g2	! compute sign in any case - -	tst	%o1 -	bge	1f -	 tst	%o0 -	! %o1 is definitely negative; %o0 might also be negative -	bge	2f			! if %o0 not negative... -	 sub	%g0, %o1, %o1	! in any case, make %o1 nonneg -1:	! %o0 is negative, %o1 is nonnegative -	sub	%g0, %o0, %o0	! make %o0 nonnegative -2: - -	! Ready to divide.  Compute size of quotient; scale comparand. -	orcc	%o1, %g0, %o5 -	bne	1f -	 mov	%o0, %o3 - -		! Divide by zero trap.  If it returns, return 0 (about as -		! wrong as possible, but that is what SunOS does...). -		ta	ST_DIV0 -		retl -		 clr	%o0 - -1: -	cmp	%o3, %o5			! if %o1 exceeds %o0, done -	blu	Lgot_result		! (and algorithm fails otherwise) -	 clr	%o2 - -	sethi	%hi(1 << (32 - 4 - 1)), %g1 - -	cmp	%o3, %g1 -	blu	Lnot_really_big -	 clr	%o4 - -	! Here the dividend is >= 2**(31-N) or so.  We must be careful here, -	! as our usual N-at-a-shot divide step will cause overflow and havoc. -	! The number of bits in the result here is N*ITER+SC, where SC <= N. -	! Compute ITER in an unorthodox manner: know we need to shift V into -	! the top decade: so do not even bother to compare to R. -	1: -		cmp	%o5, %g1 -		bgeu	3f -		 mov	1, %g7 - -		sll	%o5, 4, %o5 - -		b	1b -		 add	%o4, 1, %o4 - -	! Now compute %g7. -	2: -		addcc	%o5, %o5, %o5 -		bcc	Lnot_too_big -		 add	%g7, 1, %g7 - -		! We get here if the %o1 overflowed while shifting. -		! This means that %o3 has the high-order bit set. -		! Restore %o5 and subtract from %o3. -		sll	%g1, 4, %g1	! high order bit -		srl	%o5, 1, %o5		! rest of %o5 -		add	%o5, %g1, %o5 - -		b	Ldo_single_div -		 sub	%g7, 1, %g7 - -	Lnot_too_big: -	3: -		cmp	%o5, %o3 -		blu	2b -		 nop - -		be	Ldo_single_div -		 nop -	/* NB: these are commented out in the V8-Sparc manual as well */ -	/* (I do not understand this) */ -	! %o5 > %o3: went too far: back up 1 step -	!	srl	%o5, 1, %o5 -	!	dec	%g7 -	! do single-bit divide steps -	! -	! We have to be careful here.  We know that %o3 >= %o5, so we can do the -	! first divide step without thinking.  BUT, the others are conditional, -	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high- -	! order bit set in the first step, just falling into the regular -	! division loop will mess up the first time around. -	! So we unroll slightly... -	Ldo_single_div: -		subcc	%g7, 1, %g7 -		bl	Lend_regular_divide -		 nop - -		sub	%o3, %o5, %o3 -		mov	1, %o2 - -		b	Lend_single_divloop -		 nop -	Lsingle_divloop: -		sll	%o2, 1, %o2 - -		bl	1f -		 srl	%o5, 1, %o5 -		! %o3 >= 0 -		sub	%o3, %o5, %o3 - -		b	2f -		 add	%o2, 1, %o2 -	1:	! %o3 < 0 -		add	%o3, %o5, %o3 -		sub	%o2, 1, %o2 -	2: -	Lend_single_divloop: -		subcc	%g7, 1, %g7 -		bge	Lsingle_divloop -		 tst	%o3 - -		b,a	Lend_regular_divide - -Lnot_really_big: -1: -	sll	%o5, 4, %o5 -	cmp	%o5, %o3 -	bleu	1b -	 addcc	%o4, 1, %o4 - -	be	Lgot_result -	 sub	%o4, 1, %o4 - -	tst	%o3	! set up for initial iteration -Ldivloop: -	sll	%o2, 4, %o2 -		! depth 1, accumulated bits 0 -	bl	L.1.16 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 2, accumulated bits 1 -	bl	L.2.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 3 -	bl	L.3.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 7 -	bl	L.4.23 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2+1), %o2 - -L.4.23: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2-1), %o2 - -L.3.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 5 -	bl	L.4.21 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2+1), %o2 - -L.4.21: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2-1), %o2 - -L.2.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 1 -	bl	L.3.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 3 -	bl	L.4.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2+1), %o2 - -L.4.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2-1), %o2 -	 -	 -L.3.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 1 -	bl	L.4.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2+1), %o2 - -L.4.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2-1), %o2 - -L.1.16: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 2, accumulated bits -1 -	bl	L.2.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -1 -	bl	L.3.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -1 -	bl	L.4.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2+1), %o2 - -L.4.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2-1), %o2 - -L.3.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -3 -	bl	L.4.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2+1), %o2 - -L.4.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2-1), %o2 - -L.2.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -3 -	bl	L.3.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -5 -	bl	L.4.11 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2+1), %o2 - -L.4.11: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2-1), %o2 - -L.3.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -7 -	bl	L.4.9 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2+1), %o2 - -L.4.9: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2-1), %o2 - -	9: -Lend_regular_divide: -	subcc	%o4, 1, %o4 -	bge	Ldivloop -	 tst	%o3 - -	bl,a	Lgot_result -	! non-restoring fixup here (one instruction only!) -	sub	%o2, 1, %o2 - -Lgot_result: -	! check to see if answer should be < 0 -	tst	%g2 -	bl,a	1f -	 sub %g0, %o2, %o2 -1: -	retl -	 mov %o2, %o0 - -	.globl	.div_patch -.div_patch: -	sra	%o0, 0x1f, %o2 -	wr	%o2, 0x0, %y -	nop -	nop -	nop -	sdivcc	%o0, %o1, %o0 -	bvs,a	1f -	 xnor	%o0, %g0, %o0 -1:	retl -	 nop diff --git a/arch/sparc/lib/strlen_user_32.S b/arch/sparc/lib/strlen_user_32.S deleted file mode 100644 index 8c8a371df3c..00000000000 --- a/arch/sparc/lib/strlen_user_32.S +++ /dev/null @@ -1,109 +0,0 @@ -/* strlen_user.S: Sparc optimized strlen_user code - * - * Return length of string in userspace including terminating 0 - * or 0 for error - * - * Copyright (C) 1991,1996 Free Software Foundation - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#define LO_MAGIC 0x01010101 -#define HI_MAGIC 0x80808080 - -10: -	ldub	[%o0], %o5 -	cmp	%o5, 0 -	be	1f -	 add	%o0, 1, %o0 -	andcc	%o0, 3, %g0 -	be	4f -	 or	%o4, %lo(HI_MAGIC), %o3 -11: -	ldub	[%o0], %o5 -	cmp	%o5, 0 -	be	2f -	 add	%o0, 1, %o0 -	andcc	%o0, 3, %g0 -	be	5f -	 sethi	%hi(LO_MAGIC), %o4 -12: -	ldub	[%o0], %o5 -	cmp	%o5, 0 -	be	3f -	 add	%o0, 1, %o0 -	b	13f -	 or	%o4, %lo(LO_MAGIC), %o2 -1: -	retl -	 mov	1, %o0 -2: -	retl -	 mov	2, %o0 -3: -	retl -	 mov	3, %o0 - -	.align 4 -	.global __strlen_user, __strnlen_user -__strlen_user: -	sethi	%hi(32768), %o1 -__strnlen_user: -	mov	%o1, %g1 -	mov	%o0, %o1 -	andcc	%o0, 3, %g0 -	bne	10b -	 sethi	%hi(HI_MAGIC), %o4 -	or	%o4, %lo(HI_MAGIC), %o3 -4: -	sethi	%hi(LO_MAGIC), %o4 -5: -	or	%o4, %lo(LO_MAGIC), %o2 -13: -	ld	[%o0], %o5 -2: -	sub	%o5, %o2, %o4 -	andcc	%o4, %o3, %g0 -	bne	82f -	 add	%o0, 4, %o0 -	sub	%o0, %o1, %g2 -81:	cmp	%g2, %g1 -	blu	13b -	 mov	%o0, %o4 -	ba,a	1f - -	/* Check every byte. */ -82:	srl	%o5, 24, %g5 -	andcc	%g5, 0xff, %g0 -	be	1f -	 add	%o0, -3, %o4 -	srl	%o5, 16, %g5 -	andcc	%g5, 0xff, %g0 -	be	1f -	 add	%o4, 1, %o4 -	srl	%o5, 8, %g5 -	andcc	%g5, 0xff, %g0 -	be	1f -	 add	%o4, 1, %o4 -	andcc	%o5, 0xff, %g0 -	bne	81b -	 sub	%o0, %o1, %g2 - -	add	%o4, 1, %o4 -1: -	retl -	 sub	%o4, %o1, %o0 - -	.section .fixup,#alloc,#execinstr -	.align	4 -9: -	retl -	 clr	%o0 - -	.section __ex_table,#alloc -	.align	4 - -	.word	10b, 9b -	.word	11b, 9b -	.word	12b, 9b -	.word	13b, 9b diff --git a/arch/sparc/lib/strlen_user_64.S b/arch/sparc/lib/strlen_user_64.S deleted file mode 100644 index 114ed111e25..00000000000 --- a/arch/sparc/lib/strlen_user_64.S +++ /dev/null @@ -1,95 +0,0 @@ -/* strlen_user.S: Sparc64 optimized strlen_user code - * - * Return length of string in userspace including terminating 0 - * or 0 for error - * - * Copyright (C) 1991,1996 Free Software Foundation - * Copyright (C) 1996,1999 David S. Miller (davem@redhat.com) - * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <asm/asi.h> - -#define LO_MAGIC 0x01010101 -#define HI_MAGIC 0x80808080 - -	.align 4 -	.global __strlen_user, __strnlen_user -__strlen_user: -	sethi	%hi(32768), %o1 -__strnlen_user:	 -	mov	%o1, %g1 -	mov	%o0, %o1 -	andcc	%o0, 3, %g0 -	be,pt	%icc, 9f -	 sethi	%hi(HI_MAGIC), %o4 -10:	lduba	[%o0] %asi, %o5 -	brz,pn	%o5, 21f -	 add	%o0, 1, %o0 -	andcc	%o0, 3, %g0 -	be,pn	%icc, 4f -	 or	%o4, %lo(HI_MAGIC), %o3 -11:	lduba	[%o0] %asi, %o5 -	brz,pn	%o5, 22f -	 add	%o0, 1, %o0 -	andcc	%o0, 3, %g0 -	be,pt	%icc, 13f -	 srl	%o3, 7, %o2 -12:	lduba	[%o0] %asi, %o5 -	brz,pn	%o5, 23f -	 add	%o0, 1, %o0 -	ba,pt	%icc, 2f -15:	 lda	[%o0] %asi, %o5 -9:	or	%o4, %lo(HI_MAGIC), %o3 -4:	srl	%o3, 7, %o2 -13:	lda	[%o0] %asi, %o5 -2:	sub	%o5, %o2, %o4 -	andcc	%o4, %o3, %g0 -	bne,pn	%icc, 82f -	 add	%o0, 4, %o0 -	sub	%o0, %o1, %g2 -81:	cmp	%g2, %g1 -	blu,pt	%icc, 13b -	 mov	%o0, %o4 -	ba,a,pt	%xcc, 1f - -	/* Check every byte. */ -82:	srl	%o5, 24, %g7 -	andcc	%g7, 0xff, %g0 -	be,pn	%icc, 1f -	 add	%o0, -3, %o4 -	srl	%o5, 16, %g7 -	andcc	%g7, 0xff, %g0 -	be,pn	%icc, 1f -	 add	%o4, 1, %o4 -	srl	%o5, 8, %g7 -	andcc	%g7, 0xff, %g0 -	be,pn	%icc, 1f -	 add	%o4, 1, %o4 -	andcc	%o5, 0xff, %g0 -	bne,pt	%icc, 81b -	 sub	%o0, %o1, %g2 -	add	%o4, 1, %o4 -1:	retl -	 sub	%o4, %o1, %o0 -21:	retl -	 mov	1, %o0 -22:	retl -	 mov	2, %o0 -23:	retl -	 mov	3, %o0 - -        .section .fixup,#alloc,#execinstr -        .align  4 -30: -        retl -         clr    %o0 - -	.section __ex_table,"a" -	.align	4 - -	.word	10b, 30b -	.word	11b, 30b -	.word	12b, 30b -	.word	15b, 30b -	.word	13b, 30b diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S index 494ec664537..c0d1b568c1c 100644 --- a/arch/sparc/lib/strncmp_32.S +++ b/arch/sparc/lib/strncmp_32.S @@ -3,11 +3,10 @@   *            generic strncmp routine.   */ +#include <linux/linkage.h> +  	.text -	.align 4 -	.global __strncmp, strncmp -__strncmp: -strncmp: +ENTRY(strncmp)  	mov	%o0, %g3  	mov	0, %o3 @@ -116,3 +115,4 @@ strncmp:  	and	%g2, 0xff, %o0  	retl  	 sub	%o3, %o0, %o0 +ENDPROC(strncmp) diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S index 980e8375155..0656627166f 100644 --- a/arch/sparc/lib/strncmp_64.S +++ b/arch/sparc/lib/strncmp_64.S @@ -4,13 +4,11 @@   * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)   */ +#include <linux/linkage.h>  #include <asm/asi.h>  	.text -	.align	32 -	.globl	strncmp -	.type	strncmp,#function -strncmp: +ENTRY(strncmp)  	brlez,pn %o2, 3f  	 lduba	[%o0] (ASI_PNF), %o3  1: @@ -29,4 +27,4 @@ strncmp:  3:  	retl  	 clr	%o0 -	.size	strncmp, .-strncmp +ENDPROC(strncmp) diff --git a/arch/sparc/lib/strncpy_from_user_32.S b/arch/sparc/lib/strncpy_from_user_32.S deleted file mode 100644 index d77198976a6..00000000000 --- a/arch/sparc/lib/strncpy_from_user_32.S +++ /dev/null @@ -1,47 +0,0 @@ -/* strncpy_from_user.S: Sparc strncpy from userspace. - * - *  Copyright(C) 1996 David S. Miller - */ - -#include <asm/ptrace.h> -#include <asm/errno.h> - -	.text -	.align	4 - -	/* Must return: -	 * -	 * -EFAULT		for an exception -	 * count		if we hit the buffer limit -	 * bytes copied		if we hit a null byte -	 */ - -	.globl	__strncpy_from_user -__strncpy_from_user: -	/* %o0=dest, %o1=src, %o2=count */ -	mov	%o2, %o3 -1: -	subcc	%o2, 1, %o2 -	bneg	2f -	 nop -10: -	ldub	[%o1], %o4 -	add	%o0, 1, %o0 -	cmp	%o4, 0 -	add	%o1, 1, %o1 -	bne	1b -	 stb	%o4, [%o0 - 1] -2: -	add	%o2, 1, %o0 -	retl -	 sub	%o3, %o0, %o0 - -	.section .fixup,#alloc,#execinstr -	.align	4 -4: -	retl -	 mov	-EFAULT, %o0 - -	.section __ex_table,#alloc -	.align	4 -	.word	10b, 4b diff --git a/arch/sparc/lib/strncpy_from_user_64.S b/arch/sparc/lib/strncpy_from_user_64.S deleted file mode 100644 index 511c8f136f9..00000000000 --- a/arch/sparc/lib/strncpy_from_user_64.S +++ /dev/null @@ -1,135 +0,0 @@ -/* - * strncpy_from_user.S: Sparc64 strncpy from userspace. - * - *  Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/asi.h> -#include <asm/errno.h> - -	.data -	.align	8 -0:	.xword	0x0101010101010101 - -	.text -	.align	32 - -	/* Must return: -	 * -	 * -EFAULT		for an exception -	 * count		if we hit the buffer limit -	 * bytes copied		if we hit a null byte -	 * (without the null byte) -	 * -	 * This implementation assumes: -	 * %o1 is 8 aligned => !(%o2 & 7) -	 * %o0 is 8 aligned (if not, it will be slooooow, but will work) -	 * -	 * This is optimized for the common case: -	 * in my stats, 90% of src are 8 aligned (even on sparc32) -	 * and average length is 18 or so. -	 */ - -	.globl	__strncpy_from_user -	.type	__strncpy_from_user,#function -__strncpy_from_user: -	/* %o0=dest, %o1=src, %o2=count */ -	andcc	%o1, 7, %g0		! IEU1	Group -	bne,pn	%icc, 30f		! CTI -	 add	%o0, %o2, %g3		! IEU0 -60:	ldxa	[%o1] %asi, %g1		! Load	Group -	brlez,pn %o2, 10f		! CTI -	 mov	%o0, %o3		! IEU0 -50:	sethi	%hi(0b), %o4		! IEU0	Group -	ldx	[%o4 + %lo(0b)], %o4	! Load -	sllx	%o4, 7, %o5		! IEU1	Group -1:	sub	%g1, %o4, %g2		! IEU0	Group -	stx	%g1, [%o0]		! Store -	add	%o0, 8, %o0		! IEU1 -	andcc	%g2, %o5, %g0		! IEU1	Group -	bne,pn	%xcc, 5f		! CTI -	 add	%o1, 8, %o1		! IEU0 -	cmp	%o0, %g3		! IEU1	Group -	bl,a,pt %xcc, 1b		! CTI -61:	 ldxa	[%o1] %asi, %g1		! Load -10:	retl				! CTI	Group -	 mov	%o2, %o0		! IEU0 -5:	srlx	%g2, 32, %g7		! IEU0	Group -	sethi	%hi(0xff00), %o4	! IEU1 -	andcc	%g7, %o5, %g0		! IEU1	Group -	be,pn	%icc, 2f		! CTI -	 or	%o4, %lo(0xff00), %o4	! IEU0 -	srlx	%g1, 48, %g7		! IEU0	Group -	andcc	%g7, %o4, %g0		! IEU1	Group -	be,pn	%icc, 50f		! CTI -	 andcc	%g7, 0xff, %g0		! IEU1	Group -	be,pn	%icc, 51f		! CTI -	 srlx	%g1, 32, %g7		! IEU0 -	andcc	%g7, %o4, %g0		! IEU1	Group -	be,pn	%icc, 52f		! CTI -	 andcc	%g7, 0xff, %g0		! IEU1	Group -	be,pn	%icc, 53f		! CTI -2:	 andcc	%g2, %o5, %g0		! IEU1	Group -	be,pn	%icc, 2f		! CTI -	 srl	%g1, 16, %g7		! IEU0 -	andcc	%g7, %o4, %g0		! IEU1	Group -	be,pn	%icc, 54f		! CTI -	 andcc	%g7, 0xff, %g0		! IEU1	Group -	be,pn	%icc, 55f		! CTI -	 andcc	%g1, %o4, %g0		! IEU1	Group -	be,pn	%icc, 56f		! CTI -	 andcc	%g1, 0xff, %g0		! IEU1	Group -	be,a,pn	%icc, 57f		! CTI -	 sub	%o0, %o3, %o0		! IEU0 -2:	cmp	%o0, %g3		! IEU1	Group -	bl,a,pt	%xcc, 50b		! CTI -62:	 ldxa	[%o1] %asi, %g1		! Load -	retl				! CTI	Group -	 mov	%o2, %o0		! IEU0 -50:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 8, %o0 -51:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 7, %o0 -52:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 6, %o0 -53:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 5, %o0 -54:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 4, %o0 -55:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 3, %o0 -56:	sub	%o0, %o3, %o0 -	retl -	 sub	%o0, 2, %o0 -57:	retl -	 sub	%o0, 1, %o0 -30:	brlez,pn %o2, 3f -	 sub	%g0, %o2, %o3 -	add	%o0, %o2, %o0 -63:	lduba	[%o1] %asi, %o4 -1:	add	%o1, 1, %o1 -	brz,pn	%o4, 2f -	 stb	%o4, [%o0 + %o3] -	addcc	%o3, 1, %o3 -	bne,pt	%xcc, 1b -64:	 lduba	[%o1] %asi, %o4 -3:	retl -	 mov	%o2, %o0 -2:	retl -	 add	%o2, %o3, %o0 -	.size	__strncpy_from_user, .-__strncpy_from_user - -	.section __ex_table,"a" -	.align	4 -	.word	60b, __retl_efault -	.word	61b, __retl_efault -	.word	62b, __retl_efault -	.word	63b, __retl_efault -	.word	64b, __retl_efault -	.previous diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c new file mode 100644 index 00000000000..1e06ed50068 --- /dev/null +++ b/arch/sparc/lib/ucmpdi2.c @@ -0,0 +1,19 @@ +#include <linux/module.h> +#include "libgcc.h" + +word_type __ucmpdi2(unsigned long long a, unsigned long long b) +{ +	const DWunion au = {.ll = a}; +	const DWunion bu = {.ll = b}; + +	if ((unsigned int) au.s.high < (unsigned int) bu.s.high) +		return 0; +	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high) +		return 2; +	if ((unsigned int) au.s.low < (unsigned int) bu.s.low) +		return 0; +	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) +		return 2; +	return 1; +} +EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S deleted file mode 100644 index 2101405bdfc..00000000000 --- a/arch/sparc/lib/udiv.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * udiv.S:      This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - *  .udiv	name of function to generate - *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1 - *  false		false=true => signed; false=false => unsigned - * - * Algorithm parameters: - *  N		how many bits per iteration we try to get (4) - *  WORDSIZE	total number of bits (32) - * - * Derived constants: - *  TOPBITS	number of bits in the top decade of a number - * - * Important variables: - *  Q		the partial quotient under development (initially 0) - *  R		the remainder so far, initially the dividend - *  ITER	number of main division loop iterations required; - *		equal to ceil(log2(quotient) / N).  Note that this - *		is the log base (2^N) of the quotient. - *  V		the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - *  Current estimate for non-large dividend is - *	ceil(log2(quotient) / N) * (10 + 7N/2) + C - *  A large dividend is one greater than 2^(31-TOPBITS) and takes a - *  different path, as the upper bits of the quotient must be developed - *  one bit at a time. - */ - - -	.globl .udiv -	.globl _Udiv -.udiv: -_Udiv:	/* needed for export */ - -	! Ready to divide.  Compute size of quotient; scale comparand. -	orcc	%o1, %g0, %o5 -	bne	1f -	 mov	%o0, %o3 - -		! Divide by zero trap.  If it returns, return 0 (about as -		! wrong as possible, but that is what SunOS does...). -		ta	ST_DIV0 -		retl -		 clr	%o0 - -1: -	cmp	%o3, %o5			! if %o1 exceeds %o0, done -	blu	Lgot_result		! (and algorithm fails otherwise) -	 clr	%o2 - -	sethi	%hi(1 << (32 - 4 - 1)), %g1 - -	cmp	%o3, %g1 -	blu	Lnot_really_big -	 clr	%o4 - -	! Here the dividend is >= 2**(31-N) or so.  We must be careful here, -	! as our usual N-at-a-shot divide step will cause overflow and havoc. -	! The number of bits in the result here is N*ITER+SC, where SC <= N. -	! Compute ITER in an unorthodox manner: know we need to shift V into -	! the top decade: so do not even bother to compare to R. -	1: -		cmp	%o5, %g1 -		bgeu	3f -		 mov	1, %g7 - -		sll	%o5, 4, %o5 - -		b	1b -		 add	%o4, 1, %o4 - -	! Now compute %g7. -	2: -		addcc	%o5, %o5, %o5 -		bcc	Lnot_too_big -		 add	%g7, 1, %g7 - -		! We get here if the %o1 overflowed while shifting. -		! This means that %o3 has the high-order bit set. -		! Restore %o5 and subtract from %o3. -		sll	%g1, 4, %g1	! high order bit -		srl	%o5, 1, %o5		! rest of %o5 -		add	%o5, %g1, %o5 - -		b	Ldo_single_div -		 sub	%g7, 1, %g7 - -	Lnot_too_big: -	3: -		cmp	%o5, %o3 -		blu	2b -		 nop - -		be	Ldo_single_div -		 nop -	/* NB: these are commented out in the V8-Sparc manual as well */ -	/* (I do not understand this) */ -	! %o5 > %o3: went too far: back up 1 step -	!	srl	%o5, 1, %o5 -	!	dec	%g7 -	! do single-bit divide steps -	! -	! We have to be careful here.  We know that %o3 >= %o5, so we can do the -	! first divide step without thinking.  BUT, the others are conditional, -	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high- -	! order bit set in the first step, just falling into the regular -	! division loop will mess up the first time around. -	! So we unroll slightly... -	Ldo_single_div: -		subcc	%g7, 1, %g7 -		bl	Lend_regular_divide -		 nop - -		sub	%o3, %o5, %o3 -		mov	1, %o2 - -		b	Lend_single_divloop -		 nop -	Lsingle_divloop: -		sll	%o2, 1, %o2 -		bl	1f -		 srl	%o5, 1, %o5 -		! %o3 >= 0 -		sub	%o3, %o5, %o3 -		b	2f -		 add	%o2, 1, %o2 -	1:	! %o3 < 0 -		add	%o3, %o5, %o3 -		sub	%o2, 1, %o2 -	2: -	Lend_single_divloop: -		subcc	%g7, 1, %g7 -		bge	Lsingle_divloop -		 tst	%o3 - -		b,a	Lend_regular_divide - -Lnot_really_big: -1: -	sll	%o5, 4, %o5 - -	cmp	%o5, %o3 -	bleu	1b -	 addcc	%o4, 1, %o4 - -	be	Lgot_result -	 sub	%o4, 1, %o4 - -	tst	%o3	! set up for initial iteration -Ldivloop: -	sll	%o2, 4, %o2 -		! depth 1, accumulated bits 0 -	bl	L.1.16 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 2, accumulated bits 1 -	bl	L.2.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 3 -	bl	L.3.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 7 -	bl	L.4.23 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2+1), %o2 - -L.4.23: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2-1), %o2 - -L.3.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 5 -	bl	L.4.21 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2+1), %o2 - -L.4.21: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2-1), %o2 - -L.2.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 1 -	bl	L.3.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 3 -	bl	L.4.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2+1), %o2 - -L.4.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2-1), %o2 - -L.3.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 1 -	bl	L.4.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2+1), %o2 - -L.4.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2-1), %o2 - -L.1.16: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 2, accumulated bits -1 -	bl	L.2.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -1 -	bl	L.3.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -1 -	bl	L.4.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2+1), %o2 - -L.4.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2-1), %o2 - -L.3.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -3 -	bl	L.4.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2+1), %o2 - -L.4.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2-1), %o2 - -L.2.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -3 -	bl	L.3.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -5 -	bl	L.4.11 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2+1), %o2 - -L.4.11: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2-1), %o2 - -L.3.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -7 -	bl	L.4.9 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2+1), %o2 - -L.4.9: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2-1), %o2 - -	9: -Lend_regular_divide: -	subcc	%o4, 1, %o4 -	bge	Ldivloop -	 tst	%o3 - -	bl,a	Lgot_result -	! non-restoring fixup here (one instruction only!) -	sub	%o2, 1, %o2 - -Lgot_result: - -	retl -	 mov %o2, %o0 - -	.globl	.udiv_patch -.udiv_patch: -	wr	%g0, 0x0, %y -	nop -	nop -	retl -	 udiv	%o0, %o1, %o0 -	nop diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S index b430f1f0ef6..24e0a355e2e 100644 --- a/arch/sparc/lib/udivdi3.S +++ b/arch/sparc/lib/udivdi3.S @@ -60,8 +60,9 @@ __udivdi3:  	bne .LL77  	mov %i0,%o2  	mov 1,%o0 -	call .udiv,0  	mov 0,%o1 +	wr %g0, 0, %y +	udiv %o0, %o1, %o0  	mov %o0,%o3  	mov %i0,%o2  .LL77: diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S deleted file mode 100644 index 1f36ae68252..00000000000 --- a/arch/sparc/lib/umul.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * umul.S:      This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - - -/* - * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies.  Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - *	call	.umul - *	nop - *	bnz	overflow	(or tnz) - */ - -	.globl .umul -	.globl _Umul -.umul: -_Umul:	/* needed for export */ -	or	%o0, %o1, %o4 -	mov	%o0, %y		! multiplier -> Y - -	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args -	be	Lmul_shortway	! if zero, can do it the short way -	 andcc	%g0, %g0, %o4	! zero the partial product and clear N and V - -	/* -	 * Long multiply.  32 steps, followed by a final shift step. -	 */ -	mulscc	%o4, %o1, %o4	! 1 -	mulscc	%o4, %o1, %o4	! 2 -	mulscc	%o4, %o1, %o4	! 3 -	mulscc	%o4, %o1, %o4	! 4 -	mulscc	%o4, %o1, %o4	! 5 -	mulscc	%o4, %o1, %o4	! 6 -	mulscc	%o4, %o1, %o4	! 7 -	mulscc	%o4, %o1, %o4	! 8 -	mulscc	%o4, %o1, %o4	! 9 -	mulscc	%o4, %o1, %o4	! 10 -	mulscc	%o4, %o1, %o4	! 11 -	mulscc	%o4, %o1, %o4	! 12 -	mulscc	%o4, %o1, %o4	! 13 -	mulscc	%o4, %o1, %o4	! 14 -	mulscc	%o4, %o1, %o4	! 15 -	mulscc	%o4, %o1, %o4	! 16 -	mulscc	%o4, %o1, %o4	! 17 -	mulscc	%o4, %o1, %o4	! 18 -	mulscc	%o4, %o1, %o4	! 19 -	mulscc	%o4, %o1, %o4	! 20 -	mulscc	%o4, %o1, %o4	! 21 -	mulscc	%o4, %o1, %o4	! 22 -	mulscc	%o4, %o1, %o4	! 23 -	mulscc	%o4, %o1, %o4	! 24 -	mulscc	%o4, %o1, %o4	! 25 -	mulscc	%o4, %o1, %o4	! 26 -	mulscc	%o4, %o1, %o4	! 27 -	mulscc	%o4, %o1, %o4	! 28 -	mulscc	%o4, %o1, %o4	! 29 -	mulscc	%o4, %o1, %o4	! 30 -	mulscc	%o4, %o1, %o4	! 31 -	mulscc	%o4, %o1, %o4	! 32 -	mulscc	%o4, %g0, %o4	! final shift - - -	/* -	 * Normally, with the shift-and-add approach, if both numbers are -	 * positive you get the correct result.  With 32-bit two's-complement -	 * numbers, -x is represented as -	 * -	 *		  x		    32 -	 *	( 2  -  ------ ) mod 2  *  2 -	 *		   32 -	 *		  2 -	 * -	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s, -	 * we can treat this as if the radix point were just to the left -	 * of the sign bit (multiply by 2^32), and get -	 * -	 *	-x  =  (2 - x) mod 2 -	 * -	 * Then, ignoring the `mod 2's for convenience: -	 * -	 *   x *  y	= xy -	 *  -x *  y	= 2y - xy -	 *   x * -y	= 2x - xy -	 *  -x * -y	= 4 - 2x - 2y + xy -	 * -	 * For signed multiplies, we subtract (x << 32) from the partial -	 * product to fix this problem for negative multipliers (see mul.s). -	 * Because of the way the shift into the partial product is calculated -	 * (N xor V), this term is automatically removed for the multiplicand, -	 * so we don't have to adjust. -	 * -	 * But for unsigned multiplies, the high order bit wasn't a sign bit, -	 * and the correction is wrong.  So for unsigned multiplies where the -	 * high order bit is one, we end up with xy - (y << 32).  To fix it -	 * we add y << 32. -	 */ -#if 0 -	tst	%o1 -	bl,a	1f		! if %o1 < 0 (high order bit = 1), -	 add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half) - -1: -	rd	%y, %o0		! get lower half of product -	retl -	 addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0 -#else -	/* Faster code from tege@sics.se.  */ -	sra	%o1, 31, %o2	! make mask from sign bit -	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1 -	rd	%y, %o0		! get lower half of product -	retl -	 addcc	%o4, %o2, %o1	! add compensation and put upper half in place -#endif - -Lmul_shortway: -	/* -	 * Short multiply.  12 steps, followed by a final shift step. -	 * The resulting bits are off by 12 and (32-12) = 20 bit positions, -	 * but there is no problem with %o0 being negative (unlike above), -	 * and overflow is impossible (the answer is at most 24 bits long). -	 */ -	mulscc	%o4, %o1, %o4	! 1 -	mulscc	%o4, %o1, %o4	! 2 -	mulscc	%o4, %o1, %o4	! 3 -	mulscc	%o4, %o1, %o4	! 4 -	mulscc	%o4, %o1, %o4	! 5 -	mulscc	%o4, %o1, %o4	! 6 -	mulscc	%o4, %o1, %o4	! 7 -	mulscc	%o4, %o1, %o4	! 8 -	mulscc	%o4, %o1, %o4	! 9 -	mulscc	%o4, %o1, %o4	! 10 -	mulscc	%o4, %o1, %o4	! 11 -	mulscc	%o4, %o1, %o4	! 12 -	mulscc	%o4, %g0, %o4	! final shift - -	/* -	 * %o4 has 20 of the bits that should be in the result; %y has -	 * the bottom 12 (as %y's top 12).  That is: -	 * -	 *	  %o4		    %y -	 * +----------------+----------------+ -	 * | -12- |   -20-  | -12- |   -20-  | -	 * +------(---------+------)---------+ -	 *	   -----result----- -	 * -	 * The 12 bits of %o4 left of the `result' area are all zero; -	 * in fact, all top 20 bits of %o4 are zero. -	 */ - -	rd	%y, %o5 -	sll	%o4, 12, %o0	! shift middle bits left 12 -	srl	%o5, 20, %o5	! shift low bits right 20 -	or	%o5, %o0, %o0 -	retl -	 addcc	%g0, %g0, %o1	! %o1 = zero, and set Z - -	.globl	.umul_patch -.umul_patch: -	umul	%o0, %o1, %o0 -	retl -	 rd	%y, %o1 -	nop diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S deleted file mode 100644 index 77123eb83c4..00000000000 --- a/arch/sparc/lib/urem.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * urem.S:      This routine was taken from glibc-1.09 and is covered - *              by the GNU Library General Public License Version 2. - */ - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - *  .urem	name of function to generate - *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - *  false		false=true => signed; false=false => unsigned - * - * Algorithm parameters: - *  N		how many bits per iteration we try to get (4) - *  WORDSIZE	total number of bits (32) - * - * Derived constants: - *  TOPBITS	number of bits in the top decade of a number - * - * Important variables: - *  Q		the partial quotient under development (initially 0) - *  R		the remainder so far, initially the dividend - *  ITER	number of main division loop iterations required; - *		equal to ceil(log2(quotient) / N).  Note that this - *		is the log base (2^N) of the quotient. - *  V		the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - *  Current estimate for non-large dividend is - *	ceil(log2(quotient) / N) * (10 + 7N/2) + C - *  A large dividend is one greater than 2^(31-TOPBITS) and takes a - *  different path, as the upper bits of the quotient must be developed - *  one bit at a time. - */ - -	.globl .urem -	.globl _Urem -.urem: -_Urem:	/* needed for export */ - -	! Ready to divide.  Compute size of quotient; scale comparand. -	orcc	%o1, %g0, %o5 -	bne	1f -	 mov	%o0, %o3 - -		! Divide by zero trap.  If it returns, return 0 (about as -		! wrong as possible, but that is what SunOS does...). -		ta	ST_DIV0 -		retl -		 clr	%o0 - -1: -	cmp	%o3, %o5			! if %o1 exceeds %o0, done -	blu	Lgot_result		! (and algorithm fails otherwise) -	 clr	%o2 - -	sethi	%hi(1 << (32 - 4 - 1)), %g1 - -	cmp	%o3, %g1 -	blu	Lnot_really_big -	 clr	%o4 - -	! Here the dividend is >= 2**(31-N) or so.  We must be careful here, -	! as our usual N-at-a-shot divide step will cause overflow and havoc. -	! The number of bits in the result here is N*ITER+SC, where SC <= N. -	! Compute ITER in an unorthodox manner: know we need to shift V into -	! the top decade: so do not even bother to compare to R. -	1: -		cmp	%o5, %g1 -		bgeu	3f -		 mov	1, %g7 - -		sll	%o5, 4, %o5 - -		b	1b -		 add	%o4, 1, %o4 - -	! Now compute %g7. -	2: -		addcc	%o5, %o5, %o5 -		bcc	Lnot_too_big -		 add	%g7, 1, %g7 - -		! We get here if the %o1 overflowed while shifting. -		! This means that %o3 has the high-order bit set. -		! Restore %o5 and subtract from %o3. -		sll	%g1, 4, %g1	! high order bit -		srl	%o5, 1, %o5		! rest of %o5 -		add	%o5, %g1, %o5 - -		b	Ldo_single_div -		 sub	%g7, 1, %g7 - -	Lnot_too_big: -	3: -		cmp	%o5, %o3 -		blu	2b -		 nop - -		be	Ldo_single_div -		 nop -	/* NB: these are commented out in the V8-Sparc manual as well */ -	/* (I do not understand this) */ -	! %o5 > %o3: went too far: back up 1 step -	!	srl	%o5, 1, %o5 -	!	dec	%g7 -	! do single-bit divide steps -	! -	! We have to be careful here.  We know that %o3 >= %o5, so we can do the -	! first divide step without thinking.  BUT, the others are conditional, -	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high- -	! order bit set in the first step, just falling into the regular -	! division loop will mess up the first time around. -	! So we unroll slightly... -	Ldo_single_div: -		subcc	%g7, 1, %g7 -		bl	Lend_regular_divide -		 nop - -		sub	%o3, %o5, %o3 -		mov	1, %o2 - -		b	Lend_single_divloop -		 nop -	Lsingle_divloop: -		sll	%o2, 1, %o2 -		bl	1f -		 srl	%o5, 1, %o5 -		! %o3 >= 0 -		sub	%o3, %o5, %o3 -		b	2f -		 add	%o2, 1, %o2 -	1:	! %o3 < 0 -		add	%o3, %o5, %o3 -		sub	%o2, 1, %o2 -	2: -	Lend_single_divloop: -		subcc	%g7, 1, %g7 -		bge	Lsingle_divloop -		 tst	%o3 - -		b,a	Lend_regular_divide - -Lnot_really_big: -1: -	sll	%o5, 4, %o5 - -	cmp	%o5, %o3 -	bleu	1b -	 addcc	%o4, 1, %o4 - -	be	Lgot_result -	 sub	%o4, 1, %o4 - -	tst	%o3	! set up for initial iteration -Ldivloop: -	sll	%o2, 4, %o2 -		! depth 1, accumulated bits 0 -	bl	L.1.16 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 2, accumulated bits 1 -	bl	L.2.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 3 -	bl	L.3.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 7 -	bl	L.4.23 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2+1), %o2 - -L.4.23: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (7*2-1), %o2 - -L.3.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 5 -	bl	L.4.21 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2+1), %o2 - -L.4.21: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (5*2-1), %o2 - -L.2.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits 1 -	bl	L.3.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 3 -	bl	L.4.19 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2+1), %o2 - -L.4.19: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (3*2-1), %o2 - -L.3.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits 1 -	bl	L.4.17 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2+1), %o2 -	 -L.4.17: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (1*2-1), %o2 - -L.1.16: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 2, accumulated bits -1 -	bl	L.2.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -1 -	bl	L.3.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -1 -	bl	L.4.15 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2+1), %o2 - -L.4.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-1*2-1), %o2 - -L.3.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -3 -	bl	L.4.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2+1), %o2 - -L.4.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-3*2-1), %o2 - -L.2.15: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 3, accumulated bits -3 -	bl	L.3.13 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -5 -	bl	L.4.11 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2+1), %o2 -	 -L.4.11: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-5*2-1), %o2 - -L.3.13: -	! remainder is negative -	addcc	%o3,%o5,%o3 -			! depth 4, accumulated bits -7 -	bl	L.4.9 -	 srl	%o5,1,%o5 -	! remainder is positive -	subcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2+1), %o2 - -L.4.9: -	! remainder is negative -	addcc	%o3,%o5,%o3 -	b	9f -	 add	%o2, (-7*2-1), %o2 - -	9: -Lend_regular_divide: -	subcc	%o4, 1, %o4 -	bge	Ldivloop -	 tst	%o3 - -	bl,a	Lgot_result -	! non-restoring fixup here (one instruction only!) -	add	%o3, %o1, %o3 - -Lgot_result: - -	retl -	 mov %o3, %o0 - -	.globl	.urem_patch -.urem_patch: -	wr	%g0, 0x0, %y -	nop -	nop -	nop -	udiv	%o0, %o1, %o2 -	umul	%o2, %o1, %o2 -	retl -	 sub	%o0, %o2, %o0 diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c deleted file mode 100644 index 14b363fec8a..00000000000 --- a/arch/sparc/lib/usercopy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <linux/module.h> -#include <linux/bug.h> - -void copy_from_user_overflow(void) -{ -	WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S index f44f58f4023..2c05641c326 100644 --- a/arch/sparc/lib/xor.S +++ b/arch/sparc/lib/xor.S @@ -8,6 +8,7 @@   * Copyright (C) 2006 David S. Miller <davem@davemloft.net>   */ +#include <linux/linkage.h>  #include <asm/visasm.h>  #include <asm/asi.h>  #include <asm/dcu.h> @@ -19,12 +20,9 @@   *	!(len & 127) && len >= 256   */  	.text -	.align	32  	/* VIS versions. */ -	.globl	xor_vis_2 -	.type	xor_vis_2,#function -xor_vis_2: +ENTRY(xor_vis_2)  	rd	%fprs, %o5  	andcc	%o5, FPRS_FEF|FPRS_DU, %g0  	be,pt	%icc, 0f @@ -91,11 +89,9 @@ xor_vis_2:  	wr	%g1, %g0, %asi  	retl  	  wr	%g0, 0, %fprs -	.size	xor_vis_2, .-xor_vis_2 +ENDPROC(xor_vis_2) -	.globl	xor_vis_3 -	.type	xor_vis_3,#function -xor_vis_3: +ENTRY(xor_vis_3)  	rd	%fprs, %o5  	andcc	%o5, FPRS_FEF|FPRS_DU, %g0  	be,pt	%icc, 0f @@ -159,11 +155,9 @@ xor_vis_3:  	wr	%g1, %g0, %asi  	retl  	 wr	%g0, 0, %fprs -	.size	xor_vis_3, .-xor_vis_3 +ENDPROC(xor_vis_3) -	.globl	xor_vis_4 -	.type	xor_vis_4,#function -xor_vis_4: +ENTRY(xor_vis_4)  	rd	%fprs, %o5  	andcc	%o5, FPRS_FEF|FPRS_DU, %g0  	be,pt	%icc, 0f @@ -246,11 +240,9 @@ xor_vis_4:  	wr	%g1, %g0, %asi  	retl  	 wr	%g0, 0, %fprs -	.size	xor_vis_4, .-xor_vis_4 +ENDPROC(xor_vis_4) -	.globl	xor_vis_5 -	.type	xor_vis_5,#function -xor_vis_5: +ENTRY(xor_vis_5)  	save	%sp, -192, %sp  	rd	%fprs, %o5  	andcc	%o5, FPRS_FEF|FPRS_DU, %g0 @@ -354,12 +346,10 @@ xor_vis_5:  	wr	%g0, 0, %fprs  	ret  	 restore -	.size	xor_vis_5, .-xor_vis_5 +ENDPROC(xor_vis_5)  	/* Niagara versions. */ -	.globl		xor_niagara_2 -	.type		xor_niagara_2,#function -xor_niagara_2:		/* %o0=bytes, %o1=dest, %o2=src */ +ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */  	save		%sp, -192, %sp  	prefetch	[%i1], #n_writes  	prefetch	[%i2], #one_read @@ -402,11 +392,9 @@ xor_niagara_2:		/* %o0=bytes, %o1=dest, %o2=src */  	wr		%g7, 0x0, %asi  	ret  	 restore -	.size		xor_niagara_2, .-xor_niagara_2 +ENDPROC(xor_niagara_2) -	.globl		xor_niagara_3 -	.type		xor_niagara_3,#function -xor_niagara_3:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ +ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */  	save		%sp, -192, %sp  	prefetch	[%i1], #n_writes  	prefetch	[%i2], #one_read @@ -465,11 +453,9 @@ xor_niagara_3:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */  	wr		%g7, 0x0, %asi  	ret  	 restore -	.size		xor_niagara_3, .-xor_niagara_3 +ENDPROC(xor_niagara_3) -	.globl		xor_niagara_4 -	.type		xor_niagara_4,#function -xor_niagara_4:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ +ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */  	save		%sp, -192, %sp  	prefetch	[%i1], #n_writes  	prefetch	[%i2], #one_read @@ -549,11 +535,9 @@ xor_niagara_4:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */  	wr		%g7, 0x0, %asi  	ret  	 restore -	.size		xor_niagara_4, .-xor_niagara_4 +ENDPROC(xor_niagara_4) -	.globl		xor_niagara_5 -	.type		xor_niagara_5,#function -xor_niagara_5:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ +ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */  	save		%sp, -192, %sp  	prefetch	[%i1], #n_writes  	prefetch	[%i2], #one_read @@ -649,4 +633,4 @@ xor_niagara_5:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 *  	wr		%g7, 0x0, %asi  	ret  	 restore -	.size		xor_niagara_5, .-xor_niagara_5 +ENDPROC(xor_niagara_5)  | 
