diff options
Diffstat (limited to 'arch/sparc/lib')
43 files changed, 909 insertions, 2504 deletions
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index a3fc4375a15..3269b023409 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -4,16 +4,15 @@ asflags-y := -ansi -DST_DIV0=0x02 ccflags-y := -Werror -lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o +lib-$(CONFIG_SPARC32) += ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o lib-y += strlen.o lib-y += checksum_$(BITS).o lib-$(CONFIG_SPARC32) += blockops.o lib-y += memscan_$(BITS).o memcmp.o strncmp_$(BITS).o -lib-y += strncpy_from_user_$(BITS).o strlen_user_$(BITS).o lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o lib-$(CONFIG_SPARC32) += copy_user.o locks.o -lib-y += atomic_$(BITS).o +lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o @@ -33,14 +32,16 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o lib-$(CONFIG_SPARC64) += NG2patch.o +lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o +lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o + lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o -obj-y += iomap.o -obj-$(CONFIG_SPARC32) += atomic32.o +obj-$(CONFIG_SPARC64) += iomap.o +obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-y += ksyms.o obj-$(CONFIG_SPARC64) += PeeCeeI.o -obj-y += usercopy.o diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 0aed75653b5..30eee6e8a81 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -14,7 +14,7 @@ #define FPRS_FEF 0x04 #ifdef MEMCPY_DEBUG #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ - clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; + clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs #else #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs @@ -90,49 +90,49 @@ faligndata %x7, %x8, %f14; #define FREG_MOVE_1(x0) \ - fmovd %x0, %f0; + fsrc2 %x0, %f0; #define FREG_MOVE_2(x0, x1) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; #define FREG_MOVE_3(x0, x1, x2) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; #define FREG_MOVE_4(x0, x1, x2, x3) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; #define FREG_MOVE_5(x0, x1, x2, x3, x4) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; #define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; #define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; \ - fmovd %x6, %f12; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; #define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; \ - fmovd %x6, %f12; \ - fmovd %x7, %f14; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; \ + fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ EX_LD(LOAD(ldd, base + 0x00, %x0)) #define FREG_LOAD_2(base, x0, x1) \ @@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ cmp %g2, 0 tne %xcc, 5 PREAMBLE - mov %o0, GLOBAL_SPARE + mov %o0, %o3 cmp %o2, 0 be,pn %XCC, 85f - or %o0, %o1, %o3 + or %o0, %o1, GLOBAL_SPARE cmp %o2, 16 blu,a,pn %XCC, 80f - or %o3, %o2, %o3 + or GLOBAL_SPARE, %o2, GLOBAL_SPARE /* 2 blocks (128 bytes) is the minimum we can do the block * copy with. We need to ensure that we'll iterate at least @@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ cmp %o2, (4 * 64) blu,pt %XCC, 75f - andcc %o3, 0x7, %g0 + andcc GLOBAL_SPARE, 0x7, %g0 /* %o0: dst * %o1: src @@ -236,6 +236,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ VISEntryHalf + membar #Sync alignaddr %o1, %g0, %g0 add %o1, (64 - 1), %o4 @@ -404,13 +405,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ * over. If anything is left, we copy it one byte at a time. */ brz,pt %o2, 85f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE ba,a,pt %XCC, 90f .align 64 75: /* 16 < len <= 64 */ bne,pn %XCC, 75f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE 72: andn %o2, 0xf, %o4 @@ -420,9 +421,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o1, 0x08, %o1 EX_LD(LOAD(ldx, %o1, %g1)) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 @@ -430,14 +431,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop sub %o2, 0x8, %o2 EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -454,11 +455,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %g1, 1, %g1 EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) bgu,pt %icc, 1b add %o1, 1, %o1 -2: add %o1, %o3, %o0 +2: add %o1, GLOBAL_SPARE, %o0 andcc %o1, 0x7, %g1 bne,pt %icc, 8f sll %g1, 3, %g1 @@ -468,16 +469,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %xcc, 73b -8: mov 64, %o3 +8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 EX_LD(LOAD(ldx, %o1, %g2)) - sub %o3, %g1, %o3 + sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 EX_LD(LOAD(ldx, %o1, %g3)) subcc %o4, 0x8, %o4 - srlx %g3, %o3, %o5 + srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 EX_ST(STORE(stx, %o5, %o0)) add %o0, 0x8, %o0 @@ -489,32 +490,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pn %icc, 85f add %o1, %g1, %o1 ba,pt %xcc, 90f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE .align 64 80: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 + andcc GLOBAL_SPARE, 0x3, %g0 bne,pn %XCC, 90f - sub %o0, %o1, %o3 + sub %o0, %o1, GLOBAL_SPARE 1: subcc %o2, 4, %o2 EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 1b add %o1, 4, %o1 85: retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 + mov EX_RETVAL(%o3), %o0 .align 32 90: subcc %o2, 1, %o2 EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) bgu,pt %XCC, 90b add %o1, 1, %o1 retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 + mov EX_RETVAL(%o3), %o0 .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S new file mode 100644 index 00000000000..e16c88204a4 --- /dev/null +++ b/arch/sparc/lib/NG4clear_page.S @@ -0,0 +1,29 @@ +/* NG4copy_page.S: Niagara-4 optimized clear page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + + .register %g3, #scratch + + .align 32 + .globl NG4clear_page + .globl NG4clear_user_page +NG4clear_page: /* %o0=dest */ +NG4clear_user_page: /* %o0=dest, %o1=vaddr */ + set PAGE_SIZE, %g7 + mov 0x20, %g3 +1: stxa %g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P + subcc %g7, 0x40, %g7 + stxa %g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + membar #StoreLoad|#StoreStore + retl + nop + .size NG4clear_page,.-NG4clear_page + .size NG4clear_user_page,.-NG4clear_user_page
\ No newline at end of file diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 00000000000..fd9f903ffa3 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -0,0 +1,30 @@ +/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __retl_one_asi;\ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#define FUNC_NAME NG4copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, ___copy_in_user; \ + nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 00000000000..28504e88c53 --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S @@ -0,0 +1,57 @@ +/* NG4copy_page.S: Niagara-4 optimized copy page. + * + * Copyright (C) 2012 (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/page.h> + + .text + .align 32 + + .register %g2, #scratch + .register %g3, #scratch + + .globl NG4copy_user_page +NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x000], #n_reads_strong + prefetch [%o1 + 0x040], #n_reads_strong + prefetch [%o1 + 0x080], #n_reads_strong + prefetch [%o1 + 0x0c0], #n_reads_strong + set PAGE_SIZE, %g7 + prefetch [%o1 + 0x100], #n_reads_strong + prefetch [%o1 + 0x140], #n_reads_strong + prefetch [%o1 + 0x180], #n_reads_strong + prefetch [%o1 + 0x1c0], #n_reads_strong +1: + ldx [%o1 + 0x00], %o2 + subcc %g7, 0x40, %g7 + ldx [%o1 + 0x08], %o3 + ldx [%o1 + 0x10], %o4 + ldx [%o1 + 0x18], %o5 + ldx [%o1 + 0x20], %g1 + stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x28], %g2 + stxa %o3, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x30], %g3 + stxa %o4, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + ldx [%o1 + 0x38], %o2 + add %o1, 0x40, %o1 + stxa %o5, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + stxa %g1, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + stxa %g2, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + stxa %g3, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P + add %o0, 0x08, %o0 + bne,pt %icc, 1b + prefetch [%o1 + 0x200], #n_reads_strong + retl + membar #StoreLoad | #StoreStore + .size NG4copy_user_page,.-NG4copy_user_page diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 00000000000..9744c4540a8 --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -0,0 +1,39 @@ +/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, __retl_one_asi;\ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME NG4copy_to_user +#define STORE(type,src,addr) type##a src, [addr] %asi +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, ___copy_in_user; \ + nop +#endif + +#include "NG4memcpy.S" diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 00000000000..9cf2ee01cee --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S @@ -0,0 +1,360 @@ +/* NG4memcpy.S: Niagara-4 optimized memcpy. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include <asm/visasm.h> +#include <asm/asi.h> +#define GLOBAL_SPARE %g7 +#else +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define FPRS_FEF 0x04 + +/* On T4 it is very expensive to access ASRs like %fprs and + * %asi, avoiding a read or a write can save ~50 cycles. + */ +#define FPU_ENTER \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,a,pn %icc, 999f; \ + wr %g0, FPRS_FEF, %fprs; \ + 999: + +#ifdef MEMCPY_DEBUG +#define VISEntryHalf FPU_ENTER; \ + clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntryHalf FPU_ENTER +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif + +#define GLOBAL_SPARE %g5 +#endif + +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef STORE +#ifndef MEMCPY_DEBUG +#define STORE(type,src,addr) type src, [addr] +#else +#define STORE(type,src,addr) type##a src, [addr] %asi +#endif +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NG4memcpy +#endif +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ +#ifdef MEMCPY_DEBUG + wr %g0, 0x80, %asi +#endif + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %XCC, 5 + PREAMBLE + mov %o0, %o3 + brz,pn %o2, .Lexit + cmp %o2, 3 + ble,pn %icc, .Ltiny + cmp %o2, 19 + ble,pn %icc, .Lsmall + or %o0, %o1, %g2 + cmp %o2, 128 + bl,pn %icc, .Lmedium + nop + +.Llarge:/* len >= 0x80 */ + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 51f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) + +51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) + LOAD(prefetch, %o1 + 0x080, #n_reads_strong) + LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x100, #n_reads_strong) + LOAD(prefetch, %o1 + 0x140, #n_reads_strong) + LOAD(prefetch, %o1 + 0x180, #n_reads_strong) + LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + /* Check if we can use the straight fully aligned + * loop, or we require the alignaddr/faligndata variant. + */ + andcc %o1, 0x7, %o5 + bne,pn %icc, .Llarge_src_unaligned + sub %g0, %o0, %g1 + + /* Legitimize the use of initializing stores by getting dest + * to be 64-byte aligned. + */ + and %g1, 0x3f, %g1 + brz,pt %g1, .Llarge_aligned + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) + add %o1, 8, %o1 + subcc %g1, 8, %g1 + add %o0, 8, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g2, %o0 - 0x08)) + +.Llarge_aligned: + /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x40, %o1 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) + EX_ST(STORE_INIT(%g1, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) + EX_ST(STORE_INIT(%o5, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + membar #StoreLoad | #StoreStore + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_noprefetch + +.Lexit: retl + mov EX_RETVAL(%o3), %o0 + +.Llarge_src_unaligned: + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + VISEntryHalf + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 + EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) + EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) + EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) + EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) + EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) + EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) + faligndata %f0, %f2, %f16 + EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) + faligndata %f2, %f4, %f18 + add %g1, 0x40, %g1 + faligndata %f4, %f6, %f20 + faligndata %f6, %f8, %f22 + faligndata %f8, %f10, %f24 + faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + EX_ST(STORE(std, %f16, %o0 + 0x00)) + EX_ST(STORE(std, %f18, %o0 + 0x08)) + EX_ST(STORE(std, %f20, %o0 + 0x10)) + EX_ST(STORE(std, %f22, %o0 + 0x18)) + EX_ST(STORE(std, %f24, %o0 + 0x20)) + EX_ST(STORE(std, %f26, %o0 + 0x28)) + EX_ST(STORE(std, %f28, %o0 + 0x30)) + EX_ST(STORE(std, %f30, %o0 + 0x38)) + add %o0, 0x40, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %g1 + 0x200, #n_reads_strong) + VISExitHalf + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_unaligned + +.Lmedium: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 + bne,pn %icc, .Lmedium_unaligned + nop +.Lmedium_noprefetch: + andncc %o2, 0x20 - 1, %o5 + be,pn %icc, 2f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) + add %o1, 0x20, %o1 + subcc %o5, 0x20, %o5 + EX_ST(STORE(stx, %g1, %o0 + 0x00)) + EX_ST(STORE(stx, %g2, %o0 + 0x08)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) + EX_ST(STORE(stx, %o4, %o0 + 0x18)) + bne,pt %icc, 1b + add %o0, 0x20, %o0 +2: andcc %o2, 0x18, %o5 + be,pt %icc, 3f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x08, %o1 + add %o0, 0x08, %o0 + subcc %o5, 0x08, %o5 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g1, %o0 - 0x08)) +3: brz,pt %o2, .Lexit + cmp %o2, 0x04 + bl,pn %icc, .Ltiny + nop + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + add %o0, 0x04, %o0 + subcc %o2, 0x04, %o2 + bne,pn %icc, .Ltiny + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + ba,a,pt %icc, .Lexit +.Lmedium_unaligned: + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 2f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) +2: + and %o1, 0x7, %g1 + brz,pn %g1, .Lmedium_noprefetch + sll %g1, 3, %g1 + mov 64, %g2 + sub %g2, %g1, %g2 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + sllx %o4, %g1, %o4 + andn %o2, 0x08 - 1, %o5 + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) + add %o1, 0x08, %o1 + subcc %o5, 0x08, %o5 + srlx %g3, %g2, GLOBAL_SPARE + or GLOBAL_SPARE, %o4, GLOBAL_SPARE + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + sllx %g3, %g1, %o4 + srl %g1, 3, %g1 + add %o1, %g1, %o1 + brz,pn %o2, .Lexit + nop + ba,pt %icc, .Lsmall_unaligned + +.Ltiny: + EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x00)) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x01)) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + ba,pt %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x02)) + +.Lsmall: + andcc %g2, 0x3, %g0 + bne,pn %icc, .Lsmall_unaligned + andn %o2, 0x4 - 1, %o5 + sub %o2, %o5, %o2 +1: + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + subcc %o5, 0x04, %o5 + add %o0, 0x04, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + brz,pt %o2, .Lexit + nop + ba,a,pt %icc, .Ltiny + +.Lsmall_unaligned: +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + add %o1, 1, %o1 + add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g1, %o0 - 0x01)) + ba,a,pt %icc, .Lexit + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S new file mode 100644 index 00000000000..41da4bdd95c --- /dev/null +++ b/arch/sparc/lib/NG4memset.S @@ -0,0 +1,105 @@ +/* NG4memset.S: Niagara-4 optimized memset/bzero. + * + * Copyright (C) 2012 David S. Miller (davem@davemloft.net) + */ + +#include <asm/asi.h> + + .register %g2, #scratch + .register %g3, #scratch + + .text + .align 32 + .globl NG4memset +NG4memset: + andcc %o1, 0xff, %o4 + be,pt %icc, 1f + mov %o2, %o1 + sllx %o4, 8, %g1 + or %g1, %o4, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %icc, 1f + or %g1, %o2, %o4 + .size NG4memset,.-NG4memset + + .align 32 + .globl NG4bzero +NG4bzero: + clr %o4 +1: cmp %o1, 16 + ble %icc, .Ltiny + mov %o0, %o3 + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, .Laligned8 + sub %o1, %g1, %o1 +1: stb %o4, [%o0 + 0x00] + subcc %g1, 1, %g1 + bne,pt %icc, 1b + add %o0, 1, %o0 +.Laligned8: + cmp %o1, 64 + (64 - 8) + ble .Lmedium + sub %g0, %o0, %g1 + andcc %g1, (64 - 1), %g1 + brz,pn %g1, .Laligned64 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 8, %g1 + bne,pt %icc, 1b + add %o0, 0x8, %o0 +.Laligned64: + andn %o1, 64 - 1, %g1 + sub %o1, %g1, %o1 + brnz,pn %o4, .Lnon_bzero_loop + mov 0x20, %g2 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x40, %o0 +.Lpostloop: + cmp %o1, 8 + bl,pn %icc, .Ltiny + membar #StoreStore|#StoreLoad +.Lmedium: + andn %o1, 0x7, %g1 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 0x8, %g1 + bne,pt %icc, 1b + add %o0, 0x08, %o0 + andcc %o1, 0x4, %g1 + be,pt %icc, .Ltiny + sub %o1, %g1, %o1 + stw %o4, [%o0 + 0x00] + add %o0, 0x4, %o0 +.Ltiny: + cmp %o1, 0 + be,pn %icc, .Lexit +1: subcc %o1, 1, %o1 + stb %o4, [%o0 + 0x00] + bne,pt %icc, 1b + add %o0, 1, %o0 +.Lexit: + retl + mov %o3, %o0 +.Lnon_bzero_loop: + mov 0x08, %g3 + mov 0x28, %o5 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x10, %o0 + stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x30, %o0 + ba,a,pt %icc, .Lpostloop + .size NG4bzero,.-NG4bzero diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 00000000000..a114cbcf2a4 --- /dev/null +++ b/arch/sparc/lib/NG4patch.S @@ -0,0 +1,54 @@ +/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. + * + * Copyright (C) 2012 David S. Miller <davem@davemloft.net> + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara4_patch_copyops + .type niagara4_patch_copyops,#function +niagara4_patch_copyops: + NG_DO_PATCH(memcpy, NG4memcpy) + NG_DO_PATCH(___copy_from_user, NG4copy_from_user) + NG_DO_PATCH(___copy_to_user, NG4copy_to_user) + retl + nop + .size niagara4_patch_copyops,.-niagara4_patch_copyops + + .globl niagara4_patch_bzero + .type niagara4_patch_bzero,#function +niagara4_patch_bzero: + NG_DO_PATCH(memset, NG4memset) + NG_DO_PATCH(__bzero, NG4bzero) + NG_DO_PATCH(__clear_user, NGclear_user) + NG_DO_PATCH(tsb_init, NGtsb_init) + retl + nop + .size niagara4_patch_bzero,.-niagara4_patch_bzero + + .globl niagara4_patch_pageops + .type niagara4_patch_pageops,#function +niagara4_patch_pageops: + NG_DO_PATCH(copy_user_page, NG4copy_user_page) + NG_DO_PATCH(_clear_page, NG4clear_page) + NG_DO_PATCH(clear_user_page, NG4clear_user_page) + retl + nop + .size niagara4_patch_pageops,.-niagara4_patch_pageops diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index b9e790b9c6b..423d46e2258 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S @@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ restore .align 32 + .globl NGclear_page + .globl NGclear_user_page NGclear_page: /* %o0=dest */ NGclear_user_page: /* %o0=dest, %o1=vaddr */ rd %asi, %g3 diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index bafd2fc07ac..b67142b7768 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -109,7 +109,7 @@ #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ subcc %left, 8, %left; \ bl,pn %xcc, 95f; \ - fsrc1 %f0, %f1; + fsrc2 %f0, %f1; #define UNEVEN_VISCHUNK(dest, f0, f1, left) \ UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ @@ -201,7 +201,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o1, (0x40 - 1), %o1 and %g2, 7, %g2 andncc %g3, 0x7, %g3 - fmovd %f0, %f2 + fsrc2 %f0, %f2 sub %g3, 0x8, %g3 sub %o2, %GLOBAL_SPARE, %o2 diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S index 17912e60871..86f60de07b0 100644 --- a/arch/sparc/lib/ashldi3.S +++ b/arch/sparc/lib/ashldi3.S @@ -5,10 +5,10 @@ * Copyright (C) 1999 David S. Miller (davem@redhat.com) */ +#include <linux/linkage.h> + .text - .align 4 - .globl __ashldi3 -__ashldi3: +ENTRY(__ashldi3) cmp %o2, 0 be 9f mov 0x20, %g2 @@ -32,3 +32,4 @@ __ashldi3: 9: retl nop +ENDPROC(__ashldi3) diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S index 85398fd6dcc..6eb8ba2dd50 100644 --- a/arch/sparc/lib/ashrdi3.S +++ b/arch/sparc/lib/ashrdi3.S @@ -5,10 +5,10 @@ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/linkage.h> + .text - .align 4 - .globl __ashrdi3 -__ashrdi3: +ENTRY(__ashrdi3) tst %o2 be 3f or %g0, 32, %g2 @@ -34,3 +34,4 @@ __ashrdi3: 3: jmpl %o7 + 8, %g0 nop +ENDPROC(__ashrdi3) diff --git a/arch/sparc/lib/atomic_32.S b/arch/sparc/lib/atomic_32.S deleted file mode 100644 index eb6c7359cbd..00000000000 --- a/arch/sparc/lib/atomic_32.S +++ /dev/null @@ -1,44 +0,0 @@ -/* atomic.S: Move this stuff here for better ICACHE hit rates. - * - * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu) - */ - -#include <asm/ptrace.h> -#include <asm/psr.h> - - .text - .align 4 - - .globl __atomic_begin -__atomic_begin: - -#ifndef CONFIG_SMP - .globl ___xchg32_sun4c -___xchg32_sun4c: - rd %psr, %g3 - andcc %g3, PSR_PIL, %g0 - bne 1f - nop - wr %g3, PSR_PIL, %psr - nop; nop; nop -1: - andcc %g3, PSR_PIL, %g0 - ld [%g1], %g7 - bne 1f - st %g2, [%g1] - wr %g3, 0x0, %psr - nop; nop; nop -1: - mov %g7, %g2 - jmpl %o7 + 8, %g0 - mov %g4, %o7 - - .globl ___xchg32_sun4md -___xchg32_sun4md: - swap [%g1], %g2 - jmpl %o7 + 8, %g0 - mov %g4, %o7 -#endif - - .globl __atomic_end -__atomic_end: diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 59186e0fcf3..85c233d0a34 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -1,8 +1,9 @@ /* atomic.S: These things are too big to do inline. * - * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net) */ +#include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> @@ -13,9 +14,7 @@ * memory barriers, and a second which returns * a value and does the barriers. */ - .globl atomic_add - .type atomic_add,#function -atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic_add) /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 add %g1, %o0, %g7 @@ -26,11 +25,9 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ retl nop 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic_add, .-atomic_add +ENDPROC(atomic_add) - .globl atomic_sub - .type atomic_sub,#function -atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic_sub) /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 sub %g1, %o0, %g7 @@ -41,11 +38,9 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ retl nop 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic_sub, .-atomic_sub +ENDPROC(atomic_sub) - .globl atomic_add_ret - .type atomic_add_ret,#function -atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 add %g1, %o0, %g7 @@ -56,11 +51,9 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ retl sra %g1, 0, %o0 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic_add_ret, .-atomic_add_ret +ENDPROC(atomic_add_ret) - .globl atomic_sub_ret - .type atomic_sub_ret,#function -atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 sub %g1, %o0, %g7 @@ -71,11 +64,9 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ retl sra %g1, 0, %o0 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic_sub_ret, .-atomic_sub_ret +ENDPROC(atomic_sub_ret) - .globl atomic64_add - .type atomic64_add,#function -atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic64_add) /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 add %g1, %o0, %g7 @@ -86,11 +77,9 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ retl nop 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic64_add, .-atomic64_add +ENDPROC(atomic64_add) - .globl atomic64_sub - .type atomic64_sub,#function -atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic64_sub) /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 sub %g1, %o0, %g7 @@ -101,11 +90,9 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ retl nop 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic64_sub, .-atomic64_sub +ENDPROC(atomic64_sub) - .globl atomic64_add_ret - .type atomic64_add_ret,#function -atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ +ENTRY(atomic64_add_ret) /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 add %g1, %o0, %g7 @@ -116,11 +103,9 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ retl add %g1, %o0, %o0 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic64_add_ret, .-atomic64_add_ret +ENDPROC(atomic64_add_ret) - .globl atomic64_sub_ret - .type atomic64_sub_ret,#function -atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ +ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 sub %g1, %o0, %g7 @@ -131,4 +116,18 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ retl sub %g1, %o0, %o0 2: BACKOFF_SPIN(%o2, %o3, 1b) - .size atomic64_sub_ret, .-atomic64_sub_ret +ENDPROC(atomic64_sub_ret) + +ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o0], %g1 + brlez,pn %g1, 3f + sub %g1, 1, %g7 + casx [%o0], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, BACKOFF_LABEL(2f, 1b) + nop +3: retl + sub %g1, 1, %o0 +2: BACKOFF_SPIN(%o2, %o3, 1b) +ENDPROC(atomic64_dec_if_positive) diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 48d00e72ce1..8ec4e9c0251 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -119,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len) void bit_map_init(struct bit_map *t, unsigned long *map, int size) { - - if ((size & 07) != 0) - BUG(); - memset(map, 0, size>>3); - + bitmap_zero(map, size); memset(t, 0, sizeof *t); spin_lock_init(&t->lock); t->map = map; diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S index 3dc61d5537c..36f72cc0e67 100644 --- a/arch/sparc/lib/bitops.S +++ b/arch/sparc/lib/bitops.S @@ -3,14 +3,13 @@ * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net) */ +#include <linux/linkage.h> #include <asm/asi.h> #include <asm/backoff.h> .text - .globl test_and_set_bit - .type test_and_set_bit,#function -test_and_set_bit: /* %o0=nr, %o1=addr */ +ENTRY(test_and_set_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -29,11 +28,9 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size test_and_set_bit, .-test_and_set_bit +ENDPROC(test_and_set_bit) - .globl test_and_clear_bit - .type test_and_clear_bit,#function -test_and_clear_bit: /* %o0=nr, %o1=addr */ +ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -52,11 +49,9 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size test_and_clear_bit, .-test_and_clear_bit +ENDPROC(test_and_clear_bit) - .globl test_and_change_bit - .type test_and_change_bit,#function -test_and_change_bit: /* %o0=nr, %o1=addr */ +ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -75,11 +70,9 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size test_and_change_bit, .-test_and_change_bit +ENDPROC(test_and_change_bit) - .globl set_bit - .type set_bit,#function -set_bit: /* %o0=nr, %o1=addr */ +ENTRY(set_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -96,11 +89,9 @@ set_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size set_bit, .-set_bit +ENDPROC(set_bit) - .globl clear_bit - .type clear_bit,#function -clear_bit: /* %o0=nr, %o1=addr */ +ENTRY(clear_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -117,11 +108,9 @@ clear_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size clear_bit, .-clear_bit +ENDPROC(clear_bit) - .globl change_bit - .type change_bit,#function -change_bit: /* %o0=nr, %o1=addr */ +ENTRY(change_bit) /* %o0=nr, %o1=addr */ BACKOFF_SETUP(%o3) srlx %o0, 6, %g1 mov 1, %o2 @@ -138,4 +127,4 @@ change_bit: /* %o0=nr, %o1=addr */ retl nop 2: BACKOFF_SPIN(%o3, %o4, 1b) - .size change_bit, .-change_bit +ENDPROC(change_bit) diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S index 804be87f9a4..3c771011ff4 100644 --- a/arch/sparc/lib/blockops.S +++ b/arch/sparc/lib/blockops.S @@ -4,6 +4,7 @@ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ +#include <linux/linkage.h> #include <asm/page.h> /* Zero out 64 bytes of memory at (buf + offset). @@ -44,10 +45,7 @@ */ .text - .align 4 - .globl bzero_1page, __copy_1page - -bzero_1page: +ENTRY(bzero_1page) /* NOTE: If you change the number of insns of this routine, please check * arch/sparc/mm/hypersparc.S */ /* %o0 = buf */ @@ -65,8 +63,9 @@ bzero_1page: retl nop +ENDPROC(bzero_1page) -__copy_1page: +ENTRY(__copy_1page) /* NOTE: If you change the number of insns of this routine, please check * arch/sparc/mm/hypersparc.S */ /* %o0 = dst, %o1 = src */ @@ -87,3 +86,4 @@ __copy_1page: retl nop +ENDPROC(__copy_1page) diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S index 615f401edf6..8c058114b64 100644 --- a/arch/sparc/lib/bzero.S +++ b/arch/sparc/lib/bzero.S @@ -4,11 +4,11 @@ * Copyright (C) 2005 David S. Miller <davem@davemloft.net> */ +#include <linux/linkage.h> + .text - .globl memset - .type memset, #function -memset: /* %o0=buf, %o1=pat, %o2=len */ +ENTRY(memset) /* %o0=buf, %o1=pat, %o2=len */ and %o1, 0xff, %o3 mov %o2, %o1 sllx %o3, 8, %g1 @@ -19,9 +19,7 @@ memset: /* %o0=buf, %o1=pat, %o2=len */ ba,pt %xcc, 1f or %g1, %o2, %o2 - .globl __bzero - .type __bzero, #function -__bzero: /* %o0=buf, %o1=len */ +ENTRY(__bzero) /* %o0=buf, %o1=len */ clr %o2 1: mov %o0, %o3 brz,pn %o1, __bzero_done @@ -78,8 +76,8 @@ __bzero_tiny: __bzero_done: retl mov %o3, %o0 - .size __bzero, .-__bzero - .size memset, .-memset +ENDPROC(__bzero) +ENDPROC(memset) #define EX_ST(x,y) \ 98: x,y; \ @@ -89,9 +87,7 @@ __bzero_done: .text; \ .align 4; - .globl __clear_user - .type __clear_user, #function -__clear_user: /* %o0=buf, %o1=len */ +ENTRY(__clear_user) /* %o0=buf, %o1=len */ brz,pn %o1, __clear_user_done cmp %o1, 16 bl,pn %icc, __clear_user_tiny @@ -146,4 +142,4 @@ __clear_user_tiny: __clear_user_done: retl clr %o0 - .size __clear_user, .-__clear_user +ENDPROC(__clear_user) diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f6c2a..46272dfc26e 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -37,10 +37,10 @@ _clear_page: /* %o0=dest */ .globl clear_user_page clear_user_page: /* %o0=dest, %o1=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o2 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o4 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index b243d3b606b..dd16c61f326 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -34,10 +34,10 @@ #endif #define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \ - fmovd %reg0, %f48; fmovd %reg1, %f50; \ - fmovd %reg2, %f52; fmovd %reg3, %f54; \ - fmovd %reg4, %f56; fmovd %reg5, %f58; \ - fmovd %reg6, %f60; fmovd %reg7, %f62; + fsrc2 %reg0, %f48; fsrc2 %reg1, %f50; \ + fsrc2 %reg2, %f52; fsrc2 %reg3, %f54; \ + fsrc2 %reg4, %f56; fsrc2 %reg5, %f58; \ + fsrc2 %reg6, %f60; fsrc2 %reg7, %f62; .text @@ -46,10 +46,10 @@ .type copy_user_page,#function copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o4 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o3 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 @@ -104,60 +104,60 @@ cheetah_copy_page_insn: prefetch [%o1 + 0x140], #one_read ldd [%o1 + 0x010], %f4 prefetch [%o1 + 0x180], #one_read - fmovd %f0, %f16 + fsrc2 %f0, %f16 ldd [%o1 + 0x018], %f6 - fmovd %f2, %f18 + fsrc2 %f2, %f18 ldd [%o1 + 0x020], %f8 - fmovd %f4, %f20 + fsrc2 %f4, %f20 ldd [%o1 + 0x028], %f10 - fmovd %f6, %f22 + fsrc2 %f6, %f22 ldd [%o1 + 0x030], %f12 - fmovd %f8, %f24 + fsrc2 %f8, %f24 ldd [%o1 + 0x038], %f14 - fmovd %f10, %f26 + fsrc2 %f10, %f26 ldd [%o1 + 0x040], %f0 1: ldd [%o1 + 0x048], %f2 - fmovd %f12, %f28 + fsrc2 %f12, %f28 ldd [%o1 + 0x050], %f4 - fmovd %f14, %f30 + fsrc2 %f14, %f30 stda %f16, [%o0] ASI_BLK_P ldd [%o1 + 0x058], %f6 - fmovd %f0, %f16 + fsrc2 %f0, %f16 ldd [%o1 + 0x060], %f8 - fmovd %f2, %f18 + fsrc2 %f2, %f18 ldd [%o1 + 0x068], %f10 - fmovd %f4, %f20 + fsrc2 %f4, %f20 ldd [%o1 + 0x070], %f12 - fmovd %f6, %f22 + fsrc2 %f6, %f22 ldd [%o1 + 0x078], %f14 - fmovd %f8, %f24 + fsrc2 %f8, %f24 ldd [%o1 + 0x080], %f0 prefetch [%o1 + 0x180], #one_read - fmovd %f10, %f26 + fsrc2 %f10, %f26 subcc %o2, 1, %o2 add %o0, 0x40, %o0 bne,pt %xcc, 1b add %o1, 0x40, %o1 ldd [%o1 + 0x048], %f2 - fmovd %f12, %f28 + fsrc2 %f12, %f28 ldd [%o1 + 0x050], %f4 - fmovd %f14, %f30 + fsrc2 %f14, %f30 stda %f16, [%o0] ASI_BLK_P ldd [%o1 + 0x058], %f6 - fmovd %f0, %f16 + fsrc2 %f0, %f16 ldd [%o1 + 0x060], %f8 - fmovd %f2, %f18 + fsrc2 %f2, %f18 ldd [%o1 + 0x068], %f10 - fmovd %f4, %f20 + fsrc2 %f4, %f20 ldd [%o1 + 0x070], %f12 - fmovd %f6, %f22 + fsrc2 %f6, %f22 add %o0, 0x40, %o0 ldd [%o1 + 0x078], %f14 - fmovd %f8, %f24 - fmovd %f10, %f26 - fmovd %f12, %f28 - fmovd %f14, %f30 + fsrc2 %f8, %f24 + fsrc2 %f10, %f26 + fsrc2 %f12, %f28 + fsrc2 %f14, %f30 stda %f16, [%o0] ASI_BLK_P membar #Sync VISExitHalf diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S index d74bc0925f2..9614b48b6ef 100644 --- a/arch/sparc/lib/divdi3.S +++ b/arch/sparc/lib/divdi3.S @@ -19,7 +19,6 @@ Boston, MA 02111-1307, USA. */ .text .align 4 - .global .udiv .globl __divdi3 __divdi3: save %sp,-104,%sp @@ -83,8 +82,9 @@ __divdi3: bne .LL85 mov %i0,%o2 mov 1,%o0 - call .udiv,0 mov 0,%o1 + wr %g0, 0, %y + udiv %o0, %o1, %o0 mov %o0,%o4 mov %i0,%o2 .LL85: diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S index 58ca5b9a877..4742d59029e 100644 --- a/arch/sparc/lib/ipcsum.S +++ b/arch/sparc/lib/ipcsum.S @@ -1,8 +1,7 @@ +#include <linux/linkage.h> + .text - .align 32 - .globl ip_fast_csum - .type ip_fast_csum,#function -ip_fast_csum: /* %o0 = iph, %o1 = ihl */ +ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */ sub %o1, 4, %g7 lduw [%o0 + 0x00], %o2 lduw [%o0 + 0x04], %g2 @@ -31,4 +30,4 @@ ip_fast_csum: /* %o0 = iph, %o1 = ihl */ set 0xffff, %o1 retl and %o2, %o1, %o0 - .size ip_fast_csum, .-ip_fast_csum +ENDPROC(ip_fast_csum) diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index f73c2240fe6..323335b9cd2 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -15,8 +15,6 @@ /* string functions */ EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(__strlen_user); -EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(strncmp); /* mem* functions */ @@ -33,9 +31,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__bzero); -/* Moving data to/from/in userspace. */ -EXPORT_SYMBOL(__strncpy_from_user); - /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial); @@ -56,23 +51,11 @@ extern int __divdi3(int, int); extern void (*__copy_1page)(void *, const void *); extern void (*bzero_1page)(void *); -extern int __strncmp(const char *, const char *, __kernel_size_t); - extern void ___rw_read_enter(void); extern void ___rw_read_try(void); extern void ___rw_read_exit(void); extern void ___rw_write_enter(void); -/* Alias functions whose names begin with "." and export the aliases. - * The module references will be fixed up by module_frob_arch_sections. - */ -extern int _Div(int, int); -extern int _Mul(int, int); -extern int _Rem(int, int); -extern unsigned _Udiv(unsigned, unsigned); -extern unsigned _Umul(unsigned, unsigned); -extern unsigned _Urem(unsigned, unsigned); - /* Networking helper routines. */ EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); @@ -81,9 +64,6 @@ EXPORT_SYMBOL(__copy_1page); EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(bzero_1page); -/* string functions */ -EXPORT_SYMBOL(__strncmp); - /* Moving data to/from/in userspace. */ EXPORT_SYMBOL(__copy_user); @@ -100,13 +80,6 @@ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__divdi3); - -EXPORT_SYMBOL(_Rem); -EXPORT_SYMBOL(_Urem); -EXPORT_SYMBOL(_Mul); -EXPORT_SYMBOL(_Umul); -EXPORT_SYMBOL(_Div); -EXPORT_SYMBOL(_Udiv); #endif /* @@ -125,15 +98,6 @@ EXPORT_SYMBOL(___copy_from_user); EXPORT_SYMBOL(___copy_in_user); EXPORT_SYMBOL(__clear_user); -/* RW semaphores */ -EXPORT_SYMBOL(__down_read); -EXPORT_SYMBOL(__down_read_trylock); -EXPORT_SYMBOL(__down_write); -EXPORT_SYMBOL(__down_write_trylock); -EXPORT_SYMBOL(__up_read); -EXPORT_SYMBOL(__up_write); -EXPORT_SYMBOL(__downgrade_write); - /* Atomic counter implementation. */ EXPORT_SYMBOL(atomic_add); EXPORT_SYMBOL(atomic_add_ret); @@ -143,6 +107,7 @@ EXPORT_SYMBOL(atomic64_add); EXPORT_SYMBOL(atomic64_add_ret); EXPORT_SYMBOL(atomic64_sub); EXPORT_SYMBOL(atomic64_sub_ret); +EXPORT_SYMBOL(atomic64_dec_if_positive); /* Atomic bit operations. */ EXPORT_SYMBOL(test_and_set_bit); @@ -161,6 +126,10 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); +/* CRYPTO code needs this */ +void VISenterhalf(void); +EXPORT_SYMBOL(VISenterhalf); + extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S index 47a1354c160..60ebc7cdbee 100644 --- a/arch/sparc/lib/lshrdi3.S +++ b/arch/sparc/lib/lshrdi3.S @@ -1,6 +1,6 @@ +#include <linux/linkage.h> - .globl __lshrdi3 -__lshrdi3: +ENTRY(__lshrdi3) cmp %o2, 0 be 3f mov 0x20, %g2 @@ -24,3 +24,4 @@ __lshrdi3: 3: retl nop +ENDPROC(__lshrdi3) diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index 97395802c23..b7f6334e159 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -4,11 +4,10 @@ * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/linkage.h> + .text - .align 32 - .globl memmove - .type memmove,#function -memmove: /* o0=dst o1=src o2=len */ +ENTRY(memmove) /* o0=dst o1=src o2=len */ mov %o0, %g1 cmp %o0, %o1 bleu,pt %xcc, memcpy @@ -28,4 +27,4 @@ memmove: /* o0=dst o1=src o2=len */ retl mov %g1, %o0 - .size memmove, .-memmove +ENDPROC(memmove) diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S deleted file mode 100644 index c45470d0b0c..00000000000 --- a/arch/sparc/lib/mul.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * mul.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - -/* - * Signed multiply, from Appendix E of the Sparc Version 8 - * Architecture Manual. - */ - -/* - * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of - * the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. - */ - - .globl .mul - .globl _Mul -.mul: -_Mul: /* needed for export */ - mov %o0, %y ! multiplier -> Y - andncc %o0, 0xfff, %g0 ! test bits 12..31 - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - ! If %o0 was negative, the result is - ! (%o0 * %o1) + (%o1 << 32)) - ! We fix that here. - -#if 0 - tst %o0 - bge 1f - rd %y, %o0 - - ! %o0 was indeed negative; fix upper 32 bits of result by subtracting - ! %o1 (i.e., return %o4 - %o1 in %o1). - retl - sub %o4, %o1, %o1 - -1: - retl - mov %o4, %o1 -#else - /* Faster code adapted from tege@sics.se's code for umul.S. */ - sra %o0, 31, %o2 ! make mask from sign bit - and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0 - rd %y, %o0 ! get lower half of product - retl - sub %o4, %o2, %o1 ! subtract compensation - ! and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the low part of the - * result; %y has the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * --hi-- ----low-part---- - * - * The upper 12 bits of %o4 should be sign-extended to form the - * high part of the product (i.e., highpart = %o4 >> 20). - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left - or %o5, %o0, %o0 ! construct low part of result - retl - sra %o4, 20, %o1 ! ... and extract high part of result - - .globl .mul_patch -.mul_patch: - smul %o0, %o1, %o0 - retl - rd %y, %o1 - nop diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S index 7f17872d060..9794939d1c1 100644 --- a/arch/sparc/lib/muldi3.S +++ b/arch/sparc/lib/muldi3.S @@ -63,12 +63,12 @@ __muldi3: rd %y, %o1 mov %o1, %l3 mov %i1, %o0 - call .umul mov %i2, %o1 + umul %o0, %o1, %o0 mov %o0, %l0 mov %i0, %o0 - call .umul mov %i3, %o1 + umul %o0, %o1, %o0 add %l0, %o0, %l0 mov %l2, %i0 add %l2, %l0, %i0 diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S deleted file mode 100644 index 42fb8625281..00000000000 --- a/arch/sparc/lib/rem.S +++ /dev/null @@ -1,384 +0,0 @@ -/* - * rem.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .rem name of function to generate - * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .rem - .globl _Rem -.rem: -_Rem: /* needed for export */ - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - mov %o0, %g2 ! compute sign in any case - - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -Lgot_result: - ! check to see if answer should be < 0 - tst %g2 - bl,a 1f - sub %g0, %o3, %o3 -1: - retl - mov %o3, %o0 - - .globl .rem_patch -.rem_patch: - sra %o0, 0x1f, %o4 - wr %o4, 0x0, %y - nop - nop - nop - sdivcc %o0, %o1, %o2 - bvs,a 1f - xnor %o2, %g0, %o2 -1: smul %o2, %o1, %o2 - retl - sub %o0, %o2, %o0 - nop diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S deleted file mode 100644 index f0a0d4e4db7..00000000000 --- a/arch/sparc/lib/sdiv.S +++ /dev/null @@ -1,381 +0,0 @@ -/* - * sdiv.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .div - .globl _Div -.div: -_Div: /* needed for export */ - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - xor %o1, %o0, %g2 ! compute sign in any case - - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - -Lgot_result: - ! check to see if answer should be < 0 - tst %g2 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 - - .globl .div_patch -.div_patch: - sra %o0, 0x1f, %o2 - wr %o2, 0x0, %y - nop - nop - nop - sdivcc %o0, %o1, %o0 - bvs,a 1f - xnor %o0, %g0, %o0 -1: retl - nop diff --git a/arch/sparc/lib/strlen_user_32.S b/arch/sparc/lib/strlen_user_32.S deleted file mode 100644 index 8c8a371df3c..00000000000 --- a/arch/sparc/lib/strlen_user_32.S +++ /dev/null @@ -1,109 +0,0 @@ -/* strlen_user.S: Sparc optimized strlen_user code - * - * Return length of string in userspace including terminating 0 - * or 0 for error - * - * Copyright (C) 1991,1996 Free Software Foundation - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#define LO_MAGIC 0x01010101 -#define HI_MAGIC 0x80808080 - -10: - ldub [%o0], %o5 - cmp %o5, 0 - be 1f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 4f - or %o4, %lo(HI_MAGIC), %o3 -11: - ldub [%o0], %o5 - cmp %o5, 0 - be 2f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 5f - sethi %hi(LO_MAGIC), %o4 -12: - ldub [%o0], %o5 - cmp %o5, 0 - be 3f - add %o0, 1, %o0 - b 13f - or %o4, %lo(LO_MAGIC), %o2 -1: - retl - mov 1, %o0 -2: - retl - mov 2, %o0 -3: - retl - mov 3, %o0 - - .align 4 - .global __strlen_user, __strnlen_user -__strlen_user: - sethi %hi(32768), %o1 -__strnlen_user: - mov %o1, %g1 - mov %o0, %o1 - andcc %o0, 3, %g0 - bne 10b - sethi %hi(HI_MAGIC), %o4 - or %o4, %lo(HI_MAGIC), %o3 -4: - sethi %hi(LO_MAGIC), %o4 -5: - or %o4, %lo(LO_MAGIC), %o2 -13: - ld [%o0], %o5 -2: - sub %o5, %o2, %o4 - andcc %o4, %o3, %g0 - bne 82f - add %o0, 4, %o0 - sub %o0, %o1, %g2 -81: cmp %g2, %g1 - blu 13b - mov %o0, %o4 - ba,a 1f - - /* Check every byte. */ -82: srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be 1f - add %o0, -3, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be 1f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be 1f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne 81b - sub %o0, %o1, %g2 - - add %o4, 1, %o4 -1: - retl - sub %o4, %o1, %o0 - - .section .fixup,#alloc,#execinstr - .align 4 -9: - retl - clr %o0 - - .section __ex_table,#alloc - .align 4 - - .word 10b, 9b - .word 11b, 9b - .word 12b, 9b - .word 13b, 9b diff --git a/arch/sparc/lib/strlen_user_64.S b/arch/sparc/lib/strlen_user_64.S deleted file mode 100644 index 114ed111e25..00000000000 --- a/arch/sparc/lib/strlen_user_64.S +++ /dev/null @@ -1,95 +0,0 @@ -/* strlen_user.S: Sparc64 optimized strlen_user code - * - * Return length of string in userspace including terminating 0 - * or 0 for error - * - * Copyright (C) 1991,1996 Free Software Foundation - * Copyright (C) 1996,1999 David S. Miller (davem@redhat.com) - * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <asm/asi.h> - -#define LO_MAGIC 0x01010101 -#define HI_MAGIC 0x80808080 - - .align 4 - .global __strlen_user, __strnlen_user -__strlen_user: - sethi %hi(32768), %o1 -__strnlen_user: - mov %o1, %g1 - mov %o0, %o1 - andcc %o0, 3, %g0 - be,pt %icc, 9f - sethi %hi(HI_MAGIC), %o4 -10: lduba [%o0] %asi, %o5 - brz,pn %o5, 21f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be,pn %icc, 4f - or %o4, %lo(HI_MAGIC), %o3 -11: lduba [%o0] %asi, %o5 - brz,pn %o5, 22f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be,pt %icc, 13f - srl %o3, 7, %o2 -12: lduba [%o0] %asi, %o5 - brz,pn %o5, 23f - add %o0, 1, %o0 - ba,pt %icc, 2f -15: lda [%o0] %asi, %o5 -9: or %o4, %lo(HI_MAGIC), %o3 -4: srl %o3, 7, %o2 -13: lda [%o0] %asi, %o5 -2: sub %o5, %o2, %o4 - andcc %o4, %o3, %g0 - bne,pn %icc, 82f - add %o0, 4, %o0 - sub %o0, %o1, %g2 -81: cmp %g2, %g1 - blu,pt %icc, 13b - mov %o0, %o4 - ba,a,pt %xcc, 1f - - /* Check every byte. */ -82: srl %o5, 24, %g7 - andcc %g7, 0xff, %g0 - be,pn %icc, 1f - add %o0, -3, %o4 - srl %o5, 16, %g7 - andcc %g7, 0xff, %g0 - be,pn %icc, 1f - add %o4, 1, %o4 - srl %o5, 8, %g7 - andcc %g7, 0xff, %g0 - be,pn %icc, 1f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne,pt %icc, 81b - sub %o0, %o1, %g2 - add %o4, 1, %o4 -1: retl - sub %o4, %o1, %o0 -21: retl - mov 1, %o0 -22: retl - mov 2, %o0 -23: retl - mov 3, %o0 - - .section .fixup,#alloc,#execinstr - .align 4 -30: - retl - clr %o0 - - .section __ex_table,"a" - .align 4 - - .word 10b, 30b - .word 11b, 30b - .word 12b, 30b - .word 15b, 30b - .word 13b, 30b diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S index 494ec664537..c0d1b568c1c 100644 --- a/arch/sparc/lib/strncmp_32.S +++ b/arch/sparc/lib/strncmp_32.S @@ -3,11 +3,10 @@ * generic strncmp routine. */ +#include <linux/linkage.h> + .text - .align 4 - .global __strncmp, strncmp -__strncmp: -strncmp: +ENTRY(strncmp) mov %o0, %g3 mov 0, %o3 @@ -116,3 +115,4 @@ strncmp: and %g2, 0xff, %o0 retl sub %o3, %o0, %o0 +ENDPROC(strncmp) diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S index 980e8375155..0656627166f 100644 --- a/arch/sparc/lib/strncmp_64.S +++ b/arch/sparc/lib/strncmp_64.S @@ -4,13 +4,11 @@ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ +#include <linux/linkage.h> #include <asm/asi.h> .text - .align 32 - .globl strncmp - .type strncmp,#function -strncmp: +ENTRY(strncmp) brlez,pn %o2, 3f lduba [%o0] (ASI_PNF), %o3 1: @@ -29,4 +27,4 @@ strncmp: 3: retl clr %o0 - .size strncmp, .-strncmp +ENDPROC(strncmp) diff --git a/arch/sparc/lib/strncpy_from_user_32.S b/arch/sparc/lib/strncpy_from_user_32.S deleted file mode 100644 index d77198976a6..00000000000 --- a/arch/sparc/lib/strncpy_from_user_32.S +++ /dev/null @@ -1,47 +0,0 @@ -/* strncpy_from_user.S: Sparc strncpy from userspace. - * - * Copyright(C) 1996 David S. Miller - */ - -#include <asm/ptrace.h> -#include <asm/errno.h> - - .text - .align 4 - - /* Must return: - * - * -EFAULT for an exception - * count if we hit the buffer limit - * bytes copied if we hit a null byte - */ - - .globl __strncpy_from_user -__strncpy_from_user: - /* %o0=dest, %o1=src, %o2=count */ - mov %o2, %o3 -1: - subcc %o2, 1, %o2 - bneg 2f - nop -10: - ldub [%o1], %o4 - add %o0, 1, %o0 - cmp %o4, 0 - add %o1, 1, %o1 - bne 1b - stb %o4, [%o0 - 1] -2: - add %o2, 1, %o0 - retl - sub %o3, %o0, %o0 - - .section .fixup,#alloc,#execinstr - .align 4 -4: - retl - mov -EFAULT, %o0 - - .section __ex_table,#alloc - .align 4 - .word 10b, 4b diff --git a/arch/sparc/lib/strncpy_from_user_64.S b/arch/sparc/lib/strncpy_from_user_64.S deleted file mode 100644 index 511c8f136f9..00000000000 --- a/arch/sparc/lib/strncpy_from_user_64.S +++ /dev/null @@ -1,135 +0,0 @@ -/* - * strncpy_from_user.S: Sparc64 strncpy from userspace. - * - * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include <asm/asi.h> -#include <asm/errno.h> - - .data - .align 8 -0: .xword 0x0101010101010101 - - .text - .align 32 - - /* Must return: - * - * -EFAULT for an exception - * count if we hit the buffer limit - * bytes copied if we hit a null byte - * (without the null byte) - * - * This implementation assumes: - * %o1 is 8 aligned => !(%o2 & 7) - * %o0 is 8 aligned (if not, it will be slooooow, but will work) - * - * This is optimized for the common case: - * in my stats, 90% of src are 8 aligned (even on sparc32) - * and average length is 18 or so. - */ - - .globl __strncpy_from_user - .type __strncpy_from_user,#function -__strncpy_from_user: - /* %o0=dest, %o1=src, %o2=count */ - andcc %o1, 7, %g0 ! IEU1 Group - bne,pn %icc, 30f ! CTI - add %o0, %o2, %g3 ! IEU0 -60: ldxa [%o1] %asi, %g1 ! Load Group - brlez,pn %o2, 10f ! CTI - mov %o0, %o3 ! IEU0 -50: sethi %hi(0b), %o4 ! IEU0 Group - ldx [%o4 + %lo(0b)], %o4 ! Load - sllx %o4, 7, %o5 ! IEU1 Group -1: sub %g1, %o4, %g2 ! IEU0 Group - stx %g1, [%o0] ! Store - add %o0, 8, %o0 ! IEU1 - andcc %g2, %o5, %g0 ! IEU1 Group - bne,pn %xcc, 5f ! CTI - add %o1, 8, %o1 ! IEU0 - cmp %o0, %g3 ! IEU1 Group - bl,a,pt %xcc, 1b ! CTI -61: ldxa [%o1] %asi, %g1 ! Load -10: retl ! CTI Group - mov %o2, %o0 ! IEU0 -5: srlx %g2, 32, %g7 ! IEU0 Group - sethi %hi(0xff00), %o4 ! IEU1 - andcc %g7, %o5, %g0 ! IEU1 Group - be,pn %icc, 2f ! CTI - or %o4, %lo(0xff00), %o4 ! IEU0 - srlx %g1, 48, %g7 ! IEU0 Group - andcc %g7, %o4, %g0 ! IEU1 Group - be,pn %icc, 50f ! CTI - andcc %g7, 0xff, %g0 ! IEU1 Group - be,pn %icc, 51f ! CTI - srlx %g1, 32, %g7 ! IEU0 - andcc %g7, %o4, %g0 ! IEU1 Group - be,pn %icc, 52f ! CTI - andcc %g7, 0xff, %g0 ! IEU1 Group - be,pn %icc, 53f ! CTI -2: andcc %g2, %o5, %g0 ! IEU1 Group - be,pn %icc, 2f ! CTI - srl %g1, 16, %g7 ! IEU0 - andcc %g7, %o4, %g0 ! IEU1 Group - be,pn %icc, 54f ! CTI - andcc %g7, 0xff, %g0 ! IEU1 Group - be,pn %icc, 55f ! CTI - andcc %g1, %o4, %g0 ! IEU1 Group - be,pn %icc, 56f ! CTI - andcc %g1, 0xff, %g0 ! IEU1 Group - be,a,pn %icc, 57f ! CTI - sub %o0, %o3, %o0 ! IEU0 -2: cmp %o0, %g3 ! IEU1 Group - bl,a,pt %xcc, 50b ! CTI -62: ldxa [%o1] %asi, %g1 ! Load - retl ! CTI Group - mov %o2, %o0 ! IEU0 -50: sub %o0, %o3, %o0 - retl - sub %o0, 8, %o0 -51: sub %o0, %o3, %o0 - retl - sub %o0, 7, %o0 -52: sub %o0, %o3, %o0 - retl - sub %o0, 6, %o0 -53: sub %o0, %o3, %o0 - retl - sub %o0, 5, %o0 -54: sub %o0, %o3, %o0 - retl - sub %o0, 4, %o0 -55: sub %o0, %o3, %o0 - retl - sub %o0, 3, %o0 -56: sub %o0, %o3, %o0 - retl - sub %o0, 2, %o0 -57: retl - sub %o0, 1, %o0 -30: brlez,pn %o2, 3f - sub %g0, %o2, %o3 - add %o0, %o2, %o0 -63: lduba [%o1] %asi, %o4 -1: add %o1, 1, %o1 - brz,pn %o4, 2f - stb %o4, [%o0 + %o3] - addcc %o3, 1, %o3 - bne,pt %xcc, 1b -64: lduba [%o1] %asi, %o4 -3: retl - mov %o2, %o0 -2: retl - add %o2, %o3, %o0 - .size __strncpy_from_user, .-__strncpy_from_user - - .section __ex_table,"a" - .align 4 - .word 60b, __retl_efault - .word 61b, __retl_efault - .word 62b, __retl_efault - .word 63b, __retl_efault - .word 64b, __retl_efault - .previous diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c new file mode 100644 index 00000000000..1e06ed50068 --- /dev/null +++ b/arch/sparc/lib/ucmpdi2.c @@ -0,0 +1,19 @@ +#include <linux/module.h> +#include "libgcc.h" + +word_type __ucmpdi2(unsigned long long a, unsigned long long b) +{ + const DWunion au = {.ll = a}; + const DWunion bu = {.ll = b}; + + if ((unsigned int) au.s.high < (unsigned int) bu.s.high) + return 0; + else if ((unsigned int) au.s.high > (unsigned int) bu.s.high) + return 2; + if ((unsigned int) au.s.low < (unsigned int) bu.s.low) + return 0; + else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) + return 2; + return 1; +} +EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S deleted file mode 100644 index 2101405bdfc..00000000000 --- a/arch/sparc/lib/udiv.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * udiv.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .udiv name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - .globl .udiv - .globl _Udiv -.udiv: -_Udiv: /* needed for export */ - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - -Lgot_result: - - retl - mov %o2, %o0 - - .globl .udiv_patch -.udiv_patch: - wr %g0, 0x0, %y - nop - nop - retl - udiv %o0, %o1, %o0 - nop diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S index b430f1f0ef6..24e0a355e2e 100644 --- a/arch/sparc/lib/udivdi3.S +++ b/arch/sparc/lib/udivdi3.S @@ -60,8 +60,9 @@ __udivdi3: bne .LL77 mov %i0,%o2 mov 1,%o0 - call .udiv,0 mov 0,%o1 + wr %g0, 0, %y + udiv %o0, %o1, %o0 mov %o0,%o3 mov %i0,%o2 .LL77: diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S deleted file mode 100644 index 1f36ae68252..00000000000 --- a/arch/sparc/lib/umul.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * umul.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* - * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - * call .umul - * nop - * bnz overflow (or tnz) - */ - - .globl .umul - .globl _Umul -.umul: -_Umul: /* needed for export */ - or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - - /* - * Normally, with the shift-and-add approach, if both numbers are - * positive you get the correct result. With 32-bit two's-complement - * numbers, -x is represented as - * - * x 32 - * ( 2 - ------ ) mod 2 * 2 - * 32 - * 2 - * - * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, - * we can treat this as if the radix point were just to the left - * of the sign bit (multiply by 2^32), and get - * - * -x = (2 - x) mod 2 - * - * Then, ignoring the `mod 2's for convenience: - * - * x * y = xy - * -x * y = 2y - xy - * x * -y = 2x - xy - * -x * -y = 4 - 2x - 2y + xy - * - * For signed multiplies, we subtract (x << 32) from the partial - * product to fix this problem for negative multipliers (see mul.s). - * Because of the way the shift into the partial product is calculated - * (N xor V), this term is automatically removed for the multiplicand, - * so we don't have to adjust. - * - * But for unsigned multiplies, the high order bit wasn't a sign bit, - * and the correction is wrong. So for unsigned multiplies where the - * high order bit is one, we end up with xy - (y << 32). To fix it - * we add y << 32. - */ -#if 0 - tst %o1 - bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) - -1: - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 -#else - /* Faster code from tege@sics.se. */ - sra %o1, 31, %o2 ! make mask from sign bit - and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %o2, %o1 ! add compensation and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above), - * and overflow is impossible (the answer is at most 24 bits long). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the result; %y has - * the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * -----result----- - * - * The 12 bits of %o4 left of the `result' area are all zero; - * in fact, all top 20 bits of %o4 are zero. - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20 - or %o5, %o0, %o0 - retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z - - .globl .umul_patch -.umul_patch: - umul %o0, %o1, %o0 - retl - rd %y, %o1 - nop diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S deleted file mode 100644 index 77123eb83c4..00000000000 --- a/arch/sparc/lib/urem.S +++ /dev/null @@ -1,357 +0,0 @@ -/* - * urem.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - -/* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .urem name of function to generate - * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - .globl .urem - .globl _Urem -.urem: -_Urem: /* needed for export */ - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - - sethi %hi(1 << (32 - 4 - 1)), %g1 - - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - - sll %o5, 4, %o5 - - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: - addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: - cmp %o5, %o3 - blu 2b - nop - - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - - sub %o3, %o5, %o3 - mov 1, %o2 - - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -Lgot_result: - - retl - mov %o3, %o0 - - .globl .urem_patch -.urem_patch: - wr %g0, 0x0, %y - nop - nop - nop - udiv %o0, %o1, %o2 - umul %o2, %o1, %o2 - retl - sub %o0, %o2, %o0 diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c deleted file mode 100644 index 14b363fec8a..00000000000 --- a/arch/sparc/lib/usercopy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <linux/module.h> -#include <linux/bug.h> - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S index f44f58f4023..2c05641c326 100644 --- a/arch/sparc/lib/xor.S +++ b/arch/sparc/lib/xor.S @@ -8,6 +8,7 @@ * Copyright (C) 2006 David S. Miller <davem@davemloft.net> */ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #include <asm/dcu.h> @@ -19,12 +20,9 @@ * !(len & 127) && len >= 256 */ .text - .align 32 /* VIS versions. */ - .globl xor_vis_2 - .type xor_vis_2,#function -xor_vis_2: +ENTRY(xor_vis_2) rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 be,pt %icc, 0f @@ -91,11 +89,9 @@ xor_vis_2: wr %g1, %g0, %asi retl wr %g0, 0, %fprs - .size xor_vis_2, .-xor_vis_2 +ENDPROC(xor_vis_2) - .globl xor_vis_3 - .type xor_vis_3,#function -xor_vis_3: +ENTRY(xor_vis_3) rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 be,pt %icc, 0f @@ -159,11 +155,9 @@ xor_vis_3: wr %g1, %g0, %asi retl wr %g0, 0, %fprs - .size xor_vis_3, .-xor_vis_3 +ENDPROC(xor_vis_3) - .globl xor_vis_4 - .type xor_vis_4,#function -xor_vis_4: +ENTRY(xor_vis_4) rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 be,pt %icc, 0f @@ -246,11 +240,9 @@ xor_vis_4: wr %g1, %g0, %asi retl wr %g0, 0, %fprs - .size xor_vis_4, .-xor_vis_4 +ENDPROC(xor_vis_4) - .globl xor_vis_5 - .type xor_vis_5,#function -xor_vis_5: +ENTRY(xor_vis_5) save %sp, -192, %sp rd %fprs, %o5 andcc %o5, FPRS_FEF|FPRS_DU, %g0 @@ -354,12 +346,10 @@ xor_vis_5: wr %g0, 0, %fprs ret restore - .size xor_vis_5, .-xor_vis_5 +ENDPROC(xor_vis_5) /* Niagara versions. */ - .globl xor_niagara_2 - .type xor_niagara_2,#function -xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */ +ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ save %sp, -192, %sp prefetch [%i1], #n_writes prefetch [%i2], #one_read @@ -402,11 +392,9 @@ xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */ wr %g7, 0x0, %asi ret restore - .size xor_niagara_2, .-xor_niagara_2 +ENDPROC(xor_niagara_2) - .globl xor_niagara_3 - .type xor_niagara_3,#function -xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ +ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ save %sp, -192, %sp prefetch [%i1], #n_writes prefetch [%i2], #one_read @@ -465,11 +453,9 @@ xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ wr %g7, 0x0, %asi ret restore - .size xor_niagara_3, .-xor_niagara_3 +ENDPROC(xor_niagara_3) - .globl xor_niagara_4 - .type xor_niagara_4,#function -xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ +ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ save %sp, -192, %sp prefetch [%i1], #n_writes prefetch [%i2], #one_read @@ -549,11 +535,9 @@ xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ wr %g7, 0x0, %asi ret restore - .size xor_niagara_4, .-xor_niagara_4 +ENDPROC(xor_niagara_4) - .globl xor_niagara_5 - .type xor_niagara_5,#function -xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ +ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ save %sp, -192, %sp prefetch [%i1], #n_writes prefetch [%i2], #one_read @@ -649,4 +633,4 @@ xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 * wr %g7, 0x0, %asi ret restore - .size xor_niagara_5, .-xor_niagara_5 +ENDPROC(xor_niagara_5) |
