diff options
Diffstat (limited to 'arch/sh/lib')
| -rw-r--r-- | arch/sh/lib/Makefile | 28 | ||||
| -rw-r--r-- | arch/sh/lib/__clear_user.S | 108 | ||||
| -rw-r--r-- | arch/sh/lib/ashiftrt.S | 149 | ||||
| -rw-r--r-- | arch/sh/lib/ashldi3.c | 29 | ||||
| -rw-r--r-- | arch/sh/lib/ashlsi3.S | 193 | ||||
| -rw-r--r-- | arch/sh/lib/ashrdi3.c | 31 | ||||
| -rw-r--r-- | arch/sh/lib/ashrsi3.S | 185 | ||||
| -rw-r--r-- | arch/sh/lib/checksum.S | 72 | ||||
| -rw-r--r-- | arch/sh/lib/copy_page.S | 389 | ||||
| -rw-r--r-- | arch/sh/lib/delay.c | 16 | ||||
| -rw-r--r-- | arch/sh/lib/div64-generic.c | 10 | ||||
| -rw-r--r-- | arch/sh/lib/div64.S | 6 | ||||
| -rw-r--r-- | arch/sh/lib/io.c | 82 | ||||
| -rw-r--r-- | arch/sh/lib/libgcc.h | 25 | ||||
| -rw-r--r-- | arch/sh/lib/lshrdi3.c | 29 | ||||
| -rw-r--r-- | arch/sh/lib/lshrsi3.S | 193 | ||||
| -rw-r--r-- | arch/sh/lib/mcount.S | 310 | ||||
| -rw-r--r-- | arch/sh/lib/memcpy-sh4.S | 27 | ||||
| -rw-r--r-- | arch/sh/lib/memset-sh4.S | 107 | ||||
| -rw-r--r-- | arch/sh/lib/memset.S | 1 | ||||
| -rw-r--r-- | arch/sh/lib/movmem.S | 238 | ||||
| -rw-r--r-- | arch/sh/lib/strcasecmp.c | 26 | ||||
| -rw-r--r-- | arch/sh/lib/strlen.S | 2 | ||||
| -rw-r--r-- | arch/sh/lib/udiv_qrnnd.S | 81 | ||||
| -rw-r--r-- | arch/sh/lib/udivdi3.c | 16 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3.S | 87 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3_i4i-Os.S | 149 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3_i4i.S | 666 |
28 files changed, 3164 insertions, 91 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index b5681e3f968..3baff31e58c 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -2,12 +2,32 @@ # Makefile for SuperH-specific library files.. # -lib-y = delay.o memset.o memmove.o memchr.o \ - checksum.o strcasecmp.o strlen.o div64.o udivdi3.o \ - div64-generic.o +lib-y = delay.o memmove.o memchr.o \ + checksum.o strlen.o div64.o div64-generic.o + +# Extracted from libgcc +obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ + ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ + udiv_qrnnd.o + +udivsi3-y := udivsi3_i4i-Os.o + +ifneq ($(CONFIG_CC_OPTIMIZE_FOR_SIZE),y) +udivsi3-$(CONFIG_CPU_SH3) := udivsi3_i4i.o +udivsi3-$(CONFIG_CPU_SH4) := udivsi3_i4i.o +endif +udivsi3-y += udivsi3.o + +obj-y += io.o memcpy-y := memcpy.o memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o -lib-y += $(memcpy-y) +memset-y := memset.o +memset-$(CONFIG_CPU_SH4) := memset-sh4.o + +lib-$(CONFIG_MMU) += copy_page.o __clear_user.o +lib-$(CONFIG_MCOUNT) += mcount.o +lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y) +ccflags-y := -Werror diff --git a/arch/sh/lib/__clear_user.S b/arch/sh/lib/__clear_user.S new file mode 100644 index 00000000000..db1dca7aad1 --- /dev/null +++ b/arch/sh/lib/__clear_user.S @@ -0,0 +1,108 @@ +/* + * __clear_user_page, __clear_user, clear_page implementation of SuperH + * + * Copyright (C) 2001 Kaz Kojima + * Copyright (C) 2001, 2002 Niibe Yutaka + * Copyright (C) 2006 Paul Mundt + */ +#include <linux/linkage.h> +#include <asm/page.h> + +ENTRY(__clear_user) + ! + mov #0, r0 + mov #0xffffffe0, r1 + ! + ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ] + ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] + ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] + ! + ! Clear area 0 + mov r4, r2 + ! + tst r1, r5 ! length < 32 + bt .Larea2 ! skip to remainder + ! + add #31, r2 + and r1, r2 + cmp/eq r4, r2 + bt .Larea1 + mov r2, r3 + sub r4, r3 + mov r3, r7 + mov r4, r2 + ! +.L0: dt r3 +0: mov.b r0, @r2 + bf/s .L0 + add #1, r2 + ! + sub r7, r5 + mov r2, r4 +.Larea1: + mov r4, r3 + add r5, r3 + and r1, r3 + cmp/hi r2, r3 + bf .Larea2 + ! + ! Clear area 1 +#if defined(CONFIG_CPU_SH4) +1: movca.l r0, @r2 +#else +1: mov.l r0, @r2 +#endif + add #4, r2 +2: mov.l r0, @r2 + add #4, r2 +3: mov.l r0, @r2 + add #4, r2 +4: mov.l r0, @r2 + add #4, r2 +5: mov.l r0, @r2 + add #4, r2 +6: mov.l r0, @r2 + add #4, r2 +7: mov.l r0, @r2 + add #4, r2 +8: mov.l r0, @r2 + add #4, r2 + cmp/hi r2, r3 + bt/s 1b + nop + ! + ! Clear area 2 +.Larea2: + mov r4, r3 + add r5, r3 + cmp/hs r3, r2 + bt/s .Ldone + sub r2, r3 +.L2: dt r3 +9: mov.b r0, @r2 + bf/s .L2 + add #1, r2 + ! +.Ldone: rts + mov #0, r0 ! return 0 as normal return + + ! return the number of bytes remained +.Lbad_clear_user: + mov r4, r0 + add r5, r0 + rts + sub r2, r0 + +.section __ex_table,"a" + .align 2 + .long 0b, .Lbad_clear_user + .long 1b, .Lbad_clear_user + .long 2b, .Lbad_clear_user + .long 3b, .Lbad_clear_user + .long 4b, .Lbad_clear_user + .long 5b, .Lbad_clear_user + .long 6b, .Lbad_clear_user + .long 7b, .Lbad_clear_user + .long 8b, .Lbad_clear_user + .long 9b, .Lbad_clear_user +.previous diff --git a/arch/sh/lib/ashiftrt.S b/arch/sh/lib/ashiftrt.S new file mode 100644 index 00000000000..45ce86558f4 --- /dev/null +++ b/arch/sh/lib/ashiftrt.S @@ -0,0 +1,149 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .global __ashiftrt_r4_0 + .global __ashiftrt_r4_1 + .global __ashiftrt_r4_2 + .global __ashiftrt_r4_3 + .global __ashiftrt_r4_4 + .global __ashiftrt_r4_5 + .global __ashiftrt_r4_6 + .global __ashiftrt_r4_7 + .global __ashiftrt_r4_8 + .global __ashiftrt_r4_9 + .global __ashiftrt_r4_10 + .global __ashiftrt_r4_11 + .global __ashiftrt_r4_12 + .global __ashiftrt_r4_13 + .global __ashiftrt_r4_14 + .global __ashiftrt_r4_15 + .global __ashiftrt_r4_16 + .global __ashiftrt_r4_17 + .global __ashiftrt_r4_18 + .global __ashiftrt_r4_19 + .global __ashiftrt_r4_20 + .global __ashiftrt_r4_21 + .global __ashiftrt_r4_22 + .global __ashiftrt_r4_23 + .global __ashiftrt_r4_24 + .global __ashiftrt_r4_25 + .global __ashiftrt_r4_26 + .global __ashiftrt_r4_27 + .global __ashiftrt_r4_28 + .global __ashiftrt_r4_29 + .global __ashiftrt_r4_30 + .global __ashiftrt_r4_31 + .global __ashiftrt_r4_32 + + .align 1 +__ashiftrt_r4_32: +__ashiftrt_r4_31: + rotcl r4 + rts + subc r4,r4 +__ashiftrt_r4_30: + shar r4 +__ashiftrt_r4_29: + shar r4 +__ashiftrt_r4_28: + shar r4 +__ashiftrt_r4_27: + shar r4 +__ashiftrt_r4_26: + shar r4 +__ashiftrt_r4_25: + shar r4 +__ashiftrt_r4_24: + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 +__ashiftrt_r4_23: + shar r4 +__ashiftrt_r4_22: + shar r4 +__ashiftrt_r4_21: + shar r4 +__ashiftrt_r4_20: + shar r4 +__ashiftrt_r4_19: + shar r4 +__ashiftrt_r4_18: + shar r4 +__ashiftrt_r4_17: + shar r4 +__ashiftrt_r4_16: + shlr16 r4 + rts + exts.w r4,r4 +__ashiftrt_r4_15: + shar r4 +__ashiftrt_r4_14: + shar r4 +__ashiftrt_r4_13: + shar r4 +__ashiftrt_r4_12: + shar r4 +__ashiftrt_r4_11: + shar r4 +__ashiftrt_r4_10: + shar r4 +__ashiftrt_r4_9: + shar r4 +__ashiftrt_r4_8: + shar r4 +__ashiftrt_r4_7: + shar r4 +__ashiftrt_r4_6: + shar r4 +__ashiftrt_r4_5: + shar r4 +__ashiftrt_r4_4: + shar r4 +__ashiftrt_r4_3: + shar r4 +__ashiftrt_r4_2: + shar r4 +__ashiftrt_r4_1: + rts + shar r4 +__ashiftrt_r4_0: + rts + nop diff --git a/arch/sh/lib/ashldi3.c b/arch/sh/lib/ashldi3.c new file mode 100644 index 00000000000..beb80f31609 --- /dev/null +++ b/arch/sh/lib/ashldi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashldi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.low = 0; + w.s.high = (unsigned int) uu.s.low << -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.low >> bm; + + w.s.low = (unsigned int) uu.s.low << b; + w.s.high = ((unsigned int) uu.s.high << b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashldi3); diff --git a/arch/sh/lib/ashlsi3.S b/arch/sh/lib/ashlsi3.S new file mode 100644 index 00000000000..bd47e9b403a --- /dev/null +++ b/arch/sh/lib/ashlsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashlsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __ashlsi3 + + .align 2 +__ashlsi3: + mov #31,r0 + and r0,r5 + mova ashlsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashlsi3_table: + .byte ashlsi3_0-ashlsi3_table + .byte ashlsi3_1-ashlsi3_table + .byte ashlsi3_2-ashlsi3_table + .byte ashlsi3_3-ashlsi3_table + .byte ashlsi3_4-ashlsi3_table + .byte ashlsi3_5-ashlsi3_table + .byte ashlsi3_6-ashlsi3_table + .byte ashlsi3_7-ashlsi3_table + .byte ashlsi3_8-ashlsi3_table + .byte ashlsi3_9-ashlsi3_table + .byte ashlsi3_10-ashlsi3_table + .byte ashlsi3_11-ashlsi3_table + .byte ashlsi3_12-ashlsi3_table + .byte ashlsi3_13-ashlsi3_table + .byte ashlsi3_14-ashlsi3_table + .byte ashlsi3_15-ashlsi3_table + .byte ashlsi3_16-ashlsi3_table + .byte ashlsi3_17-ashlsi3_table + .byte ashlsi3_18-ashlsi3_table + .byte ashlsi3_19-ashlsi3_table + .byte ashlsi3_20-ashlsi3_table + .byte ashlsi3_21-ashlsi3_table + .byte ashlsi3_22-ashlsi3_table + .byte ashlsi3_23-ashlsi3_table + .byte ashlsi3_24-ashlsi3_table + .byte ashlsi3_25-ashlsi3_table + .byte ashlsi3_26-ashlsi3_table + .byte ashlsi3_27-ashlsi3_table + .byte ashlsi3_28-ashlsi3_table + .byte ashlsi3_29-ashlsi3_table + .byte ashlsi3_30-ashlsi3_table + .byte ashlsi3_31-ashlsi3_table + +ashlsi3_6: + shll2 r0 +ashlsi3_4: + shll2 r0 +ashlsi3_2: + rts + shll2 r0 + +ashlsi3_7: + shll2 r0 +ashlsi3_5: + shll2 r0 +ashlsi3_3: + shll2 r0 +ashlsi3_1: + rts + shll r0 + +ashlsi3_14: + shll2 r0 +ashlsi3_12: + shll2 r0 +ashlsi3_10: + shll2 r0 +ashlsi3_8: + rts + shll8 r0 + +ashlsi3_15: + shll2 r0 +ashlsi3_13: + shll2 r0 +ashlsi3_11: + shll2 r0 +ashlsi3_9: + shll8 r0 + rts + shll r0 + +ashlsi3_22: + shll2 r0 +ashlsi3_20: + shll2 r0 +ashlsi3_18: + shll2 r0 +ashlsi3_16: + rts + shll16 r0 + +ashlsi3_23: + shll2 r0 +ashlsi3_21: + shll2 r0 +ashlsi3_19: + shll2 r0 +ashlsi3_17: + shll16 r0 + rts + shll r0 + +ashlsi3_30: + shll2 r0 +ashlsi3_28: + shll2 r0 +ashlsi3_26: + shll2 r0 +ashlsi3_24: + shll16 r0 + rts + shll8 r0 + +ashlsi3_31: + shll2 r0 +ashlsi3_29: + shll2 r0 +ashlsi3_27: + shll2 r0 +ashlsi3_25: + shll16 r0 + shll8 r0 + rts + shll r0 + +ashlsi3_0: + rts + nop diff --git a/arch/sh/lib/ashrdi3.c b/arch/sh/lib/ashrdi3.c new file mode 100644 index 00000000000..c884a912b66 --- /dev/null +++ b/arch/sh/lib/ashrdi3.c @@ -0,0 +1,31 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + /* w.s.high = 1..1 or 0..0 */ + w.s.high = + uu.s.high >> 31; + w.s.low = uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashrdi3); diff --git a/arch/sh/lib/ashrsi3.S b/arch/sh/lib/ashrsi3.S new file mode 100644 index 00000000000..6f3cf46b77c --- /dev/null +++ b/arch/sh/lib/ashrsi3.S @@ -0,0 +1,185 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global __ashrsi3 + + .align 2 +__ashrsi3: + mov #31,r0 + and r0,r5 + mova ashrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashrsi3_table: + .byte ashrsi3_0-ashrsi3_table + .byte ashrsi3_1-ashrsi3_table + .byte ashrsi3_2-ashrsi3_table + .byte ashrsi3_3-ashrsi3_table + .byte ashrsi3_4-ashrsi3_table + .byte ashrsi3_5-ashrsi3_table + .byte ashrsi3_6-ashrsi3_table + .byte ashrsi3_7-ashrsi3_table + .byte ashrsi3_8-ashrsi3_table + .byte ashrsi3_9-ashrsi3_table + .byte ashrsi3_10-ashrsi3_table + .byte ashrsi3_11-ashrsi3_table + .byte ashrsi3_12-ashrsi3_table + .byte ashrsi3_13-ashrsi3_table + .byte ashrsi3_14-ashrsi3_table + .byte ashrsi3_15-ashrsi3_table + .byte ashrsi3_16-ashrsi3_table + .byte ashrsi3_17-ashrsi3_table + .byte ashrsi3_18-ashrsi3_table + .byte ashrsi3_19-ashrsi3_table + .byte ashrsi3_20-ashrsi3_table + .byte ashrsi3_21-ashrsi3_table + .byte ashrsi3_22-ashrsi3_table + .byte ashrsi3_23-ashrsi3_table + .byte ashrsi3_24-ashrsi3_table + .byte ashrsi3_25-ashrsi3_table + .byte ashrsi3_26-ashrsi3_table + .byte ashrsi3_27-ashrsi3_table + .byte ashrsi3_28-ashrsi3_table + .byte ashrsi3_29-ashrsi3_table + .byte ashrsi3_30-ashrsi3_table + .byte ashrsi3_31-ashrsi3_table + +ashrsi3_31: + rotcl r0 + rts + subc r0,r0 + +ashrsi3_30: + shar r0 +ashrsi3_29: + shar r0 +ashrsi3_28: + shar r0 +ashrsi3_27: + shar r0 +ashrsi3_26: + shar r0 +ashrsi3_25: + shar r0 +ashrsi3_24: + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +ashrsi3_23: + shar r0 +ashrsi3_22: + shar r0 +ashrsi3_21: + shar r0 +ashrsi3_20: + shar r0 +ashrsi3_19: + shar r0 +ashrsi3_18: + shar r0 +ashrsi3_17: + shar r0 +ashrsi3_16: + shlr16 r0 + rts + exts.w r0,r0 + +ashrsi3_15: + shar r0 +ashrsi3_14: + shar r0 +ashrsi3_13: + shar r0 +ashrsi3_12: + shar r0 +ashrsi3_11: + shar r0 +ashrsi3_10: + shar r0 +ashrsi3_9: + shar r0 +ashrsi3_8: + shar r0 +ashrsi3_7: + shar r0 +ashrsi3_6: + shar r0 +ashrsi3_5: + shar r0 +ashrsi3_4: + shar r0 +ashrsi3_3: + shar r0 +ashrsi3_2: + shar r0 +ashrsi3_1: + rts + shar r0 + +ashrsi3_0: + rts + nop diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index 7c50dfe68c0..356c8ec9289 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -36,8 +36,7 @@ */ /* - * unsigned int csum_partial(const unsigned char *buf, int len, - * unsigned int sum); + * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); */ .text @@ -49,11 +48,31 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ - mov r5, r1 mov r4, r0 - tst #2, r0 ! Check alignment. - bt 2f ! Jump if alignment is ok. + tst #3, r0 ! Check alignment. + bt/s 2f ! Jump if alignment is ok. + mov r4, r7 ! Keep a copy to check for alignment ! + tst #1, r0 ! Check alignment. + bt 21f ! Jump if alignment is boundary of 2bytes. + + ! buf is odd + tst r5, r5 + add #-1, r5 + bt 9f + mov.b @r4+, r0 + extu.b r0, r0 + addc r0, r6 ! t=0 from previous tst + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 + mov r4, r0 + tst #2, r0 + bt 2f +21: + ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -61,16 +80,17 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: - mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: + ! buf is 4 byte aligned (len could be 0) + mov r5, r1 mov #-5, r0 - shld r0, r5 - tst r5, r5 + shld r0, r1 + tst r1, r1 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -92,30 +112,31 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r5 + dt r1 bf/s 3b cmp/eq #1, r0 - ! here, we know r5==0 - addc r5, r6 ! add carry to r6 + ! here, we know r1==0 + addc r1, r6 ! add carry to r6 4: - mov r1, r0 + mov r5, r0 and #0x1c, r0 tst r0, r0 - bt/s 6f - mov r0, r5 - shlr2 r5 + bt 6f + ! 4 bytes or more remaining + mov r0, r1 + shlr2 r1 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r5 + dt r1 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r5, r6 ! r5==0 here, so it means add carry-bit + addc r1, r6 ! r1==0 here, so it means add carry-bit 6: - mov r1, r5 + ! 3 bytes or less remaining mov #3, r0 and r0, r5 tst r5, r5 @@ -139,8 +160,18 @@ ENTRY(csum_partial) 8: addc r0, r6 mov #0, r0 - addc r0, r6 + addc r0, r6 9: + ! Check if the buffer was misaligned, if so realign sum + mov r7, r0 + tst #1, r0 + bt 10f + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 +10: rts mov r6, r0 @@ -202,8 +233,9 @@ ENTRY(csum_partial_copy_generic) cmp/pz r6 ! Jump if we had at least two bytes. bt/s 1f clrt + add #2,r6 ! r6 was < 2. Deal with it. bra 4f - add #2,r6 ! r6 was < 2. Deal with it. + mov r6,r2 3: ! Handle different src and dest alignments. ! This is not common, so simple byte by byte copy will do. diff --git a/arch/sh/lib/copy_page.S b/arch/sh/lib/copy_page.S new file mode 100644 index 00000000000..9d7b8bc5186 --- /dev/null +++ b/arch/sh/lib/copy_page.S @@ -0,0 +1,389 @@ +/* + * copy_page, __copy_user_page, __copy_user implementation of SuperH + * + * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima + * Copyright (C) 2002 Toshinobu Sugioka + * Copyright (C) 2006 Paul Mundt + */ +#include <linux/linkage.h> +#include <asm/page.h> + +/* + * copy_page + * @to: P1 address + * @from: P1 address + * + * void copy_page(void *to, void *from) + */ + +/* + * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch + * r8 --- from + PAGE_SIZE + * r9 --- not used + * r10 --- to + * r11 --- from + */ +ENTRY(copy_page) + mov.l r8,@-r15 + mov.l r10,@-r15 + mov.l r11,@-r15 + mov r4,r10 + mov r5,r11 + mov r5,r8 + mov #(PAGE_SIZE >> 10), r0 + shll8 r0 + shll2 r0 + add r0,r8 + ! +1: mov.l @r11+,r0 + mov.l @r11+,r1 + mov.l @r11+,r2 + mov.l @r11+,r3 + mov.l @r11+,r4 + mov.l @r11+,r5 + mov.l @r11+,r6 + mov.l @r11+,r7 +#if defined(CONFIG_CPU_SH4) + movca.l r0,@r10 +#else + mov.l r0,@r10 +#endif + add #32,r10 + mov.l r7,@-r10 + mov.l r6,@-r10 + mov.l r5,@-r10 + mov.l r4,@-r10 + mov.l r3,@-r10 + mov.l r2,@-r10 + mov.l r1,@-r10 + cmp/eq r11,r8 + bf/s 1b + add #28,r10 + ! + mov.l @r15+,r11 + mov.l @r15+,r10 + mov.l @r15+,r8 + rts + nop + +/* + * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); + * Return the number of bytes NOT copied + */ +#define EX(...) \ + 9999: __VA_ARGS__ ; \ + .section __ex_table, "a"; \ + .long 9999b, 6000f ; \ + .previous +#define EX_NO_POP(...) \ + 9999: __VA_ARGS__ ; \ + .section __ex_table, "a"; \ + .long 9999b, 6005f ; \ + .previous +ENTRY(__copy_user) + ! Check if small number of bytes + mov #11,r0 + mov r4,r3 + cmp/gt r0,r6 ! r6 (len) > r0 (11) + bf/s .L_cleanup_loop_no_pop + add r6,r3 ! last destination address + + ! Calculate bytes needed to align to src + mov.l r11,@-r15 + neg r5,r0 + mov.l r10,@-r15 + add #4,r0 + mov.l r9,@-r15 + and #3,r0 + mov.l r8,@-r15 + tst r0,r0 + bt 2f + +1: + ! Copy bytes to long word align src +EX( mov.b @r5+,r1 ) + dt r0 + add #-1,r6 +EX( mov.b r1,@r4 ) + bf/s 1b + add #1,r4 + + ! Jump to appropriate routine depending on dest +2: mov #3,r1 + mov r6, r2 + and r4,r1 + shlr2 r2 + shll2 r1 + mova .L_jump_tbl,r0 + mov.l @(r0,r1),r1 + jmp @r1 + nop + + .align 2 +.L_jump_tbl: + .long .L_dest00 + .long .L_dest01 + .long .L_dest10 + .long .L_dest11 + +/* + * Come here if there are less than 12 bytes to copy + * + * Keep the branch target close, so the bf/s callee doesn't overflow + * and result in a more expensive branch being inserted. This is the + * fast-path for small copies, the jump via the jump table will hit the + * default slow-path cleanup. -PFM. + */ +.L_cleanup_loop_no_pop: + tst r6,r6 ! Check explicitly for zero + bt 1f + +2: +EX_NO_POP( mov.b @r5+,r0 ) + dt r6 +EX_NO_POP( mov.b r0,@r4 ) + bf/s 2b + add #1,r4 + +1: mov #0,r0 ! normal return +5000: + +# Exception handler: +.section .fixup, "ax" +6005: + mov.l 8000f,r1 + mov r3,r0 + jmp @r1 + sub r4,r0 + .align 2 +8000: .long 5000b + +.previous + rts + nop + +! Destination = 00 + +.L_dest00: + ! Skip the large copy for small transfers + mov #(32+32-4), r0 + cmp/gt r6, r0 ! r0 (60) > r6 (len) + bt 1f + + ! Align dest to a 32 byte boundary + neg r4,r0 + add #0x20, r0 + and #0x1f, r0 + tst r0, r0 + bt 2f + + sub r0, r6 + shlr2 r0 +3: +EX( mov.l @r5+,r1 ) + dt r0 +EX( mov.l r1,@r4 ) + bf/s 3b + add #4,r4 + +2: +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r2 ) +EX( mov.l @r5+,r7 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.l @r5+,r11 ) +#ifdef CONFIG_CPU_SH4 +EX( movca.l r0,@r4 ) +#else +EX( mov.l r0,@r4 ) +#endif + add #-32, r6 +EX( mov.l r1,@(4,r4) ) + mov #32, r0 +EX( mov.l r2,@(8,r4) ) + cmp/gt r6, r0 ! r0 (32) > r6 (len) +EX( mov.l r7,@(12,r4) ) +EX( mov.l r8,@(16,r4) ) +EX( mov.l r9,@(20,r4) ) +EX( mov.l r10,@(24,r4) ) +EX( mov.l r11,@(28,r4) ) + bf/s 2b + add #32,r4 + +1: mov r6, r0 + shlr2 r0 + tst r0, r0 + bt .L_cleanup +1: +EX( mov.l @r5+,r1 ) + dt r0 +EX( mov.l r1,@r4 ) + bf/s 1b + add #4,r4 + + bra .L_cleanup + nop + +! Destination = 10 + +.L_dest10: + mov r2,r7 + shlr2 r7 + shlr r7 + tst r7,r7 + mov #7,r0 + bt/s 1f + and r0,r2 +2: + dt r7 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.w r0,@r4 ) + add #2,r4 + xtrct r1,r0 + xtrct r8,r1 + xtrct r9,r8 + xtrct r10,r9 + +EX( mov.l r0,@r4 ) +EX( mov.l r1,@(4,r4) ) +EX( mov.l r8,@(8,r4) ) +EX( mov.l r9,@(12,r4) ) + +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r0 ) + xtrct r1,r10 + xtrct r8,r1 + xtrct r0,r8 + shlr16 r0 +EX( mov.l r10,@(16,r4) ) +EX( mov.l r1,@(20,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.w r0,@(28,r4) ) + bf/s 2b + add #30,r4 +#else +EX( mov.l @(28,r5),r0 ) +EX( mov.l @(24,r5),r8 ) +EX( mov.l @(20,r5),r9 ) +EX( mov.l @(16,r5),r10 ) +EX( mov.w r0,@(30,r4) ) + add #-2,r4 + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(28,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.l r9,@(20,r4) ) + +EX( mov.l @(12,r5),r0 ) +EX( mov.l @(8,r5),r8 ) + xtrct r0,r10 +EX( mov.l @(4,r5),r9 ) + mov.l r10,@(16,r4) +EX( mov.l @r5,r10 ) + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(12,r4) ) +EX( mov.l r8,@(8,r4) ) + swap.w r10,r0 +EX( mov.l r9,@(4,r4) ) +EX( mov.w r0,@(2,r4) ) + + add #32,r5 + bf/s 2b + add #34,r4 +#endif + tst r2,r2 + bt .L_cleanup + +1: ! Read longword, write two words per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.w r0,@(2,r4) ) +#else +EX( mov.w r0,@(2,r4) ) + shlr16 r0 +EX( mov.w r0,@r4 ) +#endif + bf/s 1b + add #4,r4 + + bra .L_cleanup + nop + +! Destination = 01 or 11 + +.L_dest01: +.L_dest11: + ! Read longword, write byte, word, byte per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef CONFIG_CPU_LITTLE_ENDIAN +EX( mov.b r0,@r4 ) + shlr8 r0 + add #1,r4 +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.b r0,@(2,r4) ) + bf/s .L_dest01 + add #3,r4 +#else +EX( mov.b r0,@(3,r4) ) + shlr8 r0 + swap.w r0,r7 +EX( mov.b r7,@r4 ) + add #1,r4 +EX( mov.w r0,@r4 ) + bf/s .L_dest01 + add #3,r4 +#endif + +! Cleanup last few bytes +.L_cleanup: + mov r6,r0 + and #3,r0 + tst r0,r0 + bt .L_exit + mov r0,r6 + +.L_cleanup_loop: +EX( mov.b @r5+,r0 ) + dt r6 +EX( mov.b r0,@r4 ) + bf/s .L_cleanup_loop + add #1,r4 + +.L_exit: + mov #0,r0 ! normal return + +5000: + +# Exception handler: +.section .fixup, "ax" +6000: + mov.l 8000f,r1 + mov r3,r0 + jmp @r1 + sub r4,r0 + .align 2 +8000: .long 5000b + +.previous + mov.l @r15+,r8 + mov.l @r15+,r9 + mov.l @r15+,r10 + rts + mov.l @r15+,r11 diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index 351714694d6..0901b2f14e1 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -10,6 +10,16 @@ void __delay(unsigned long loops) { __asm__ __volatile__( + /* + * ST40-300 appears to have an issue with this code, + * normally taking two cycles each loop, as with all + * other SH variants. If however the branch and the + * delay slot straddle an 8 byte boundary, this increases + * to 3 cycles. + * This align directive ensures this doesn't occur. + */ + ".balign 8\n\t" + "tst %0, %0\n\t" "1:\t" "bf/s 1b\n\t" @@ -21,12 +31,14 @@ void __delay(unsigned long loops) inline void __const_udelay(unsigned long xloops) { + xloops *= 4; __asm__("dmulu.l %0, %2\n\t" "sts mach, %0" : "=r" (xloops) - : "0" (xloops), "r" (cpu_data[raw_smp_processor_id()].loops_per_jiffy) + : "0" (xloops), + "r" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)) : "macl", "mach"); - __delay(xloops * HZ); + __delay(++xloops); } void __udelay(unsigned long usecs) diff --git a/arch/sh/lib/div64-generic.c b/arch/sh/lib/div64-generic.c index c02473afd58..60e76aa8b53 100644 --- a/arch/sh/lib/div64-generic.c +++ b/arch/sh/lib/div64-generic.c @@ -3,17 +3,17 @@ */ #include <linux/types.h> +#include <asm/div64.h> -extern u64 __xdiv64_32(u64 n, u32 d); +extern uint64_t __xdiv64_32(u64 n, u32 d); -u64 __div64_32(u64 *xp, u32 y) +uint32_t __div64_32(u64 *xp, u32 y) { - u64 rem; - u64 q = __xdiv64_32(*xp, y); + uint32_t rem; + uint64_t q = __xdiv64_32(*xp, y); rem = *xp - q * y; *xp = q; return rem; } - diff --git a/arch/sh/lib/div64.S b/arch/sh/lib/div64.S index eefc275d64a..5ee7334ea64 100644 --- a/arch/sh/lib/div64.S +++ b/arch/sh/lib/div64.S @@ -1,12 +1,12 @@ /* - * unsigned long long __xdiv64_32(unsigned long long n, unsigned long d); + * unsigned long __xdiv64_32(unsigned long long n, unsigned long d); */ #include <linux/linkage.h> .text ENTRY(__xdiv64_32) -#ifdef __LITTLE_ENDIAN__ +#ifdef CONFIG_CPU_LITTLE_ENDIAN mov r4, r0 mov r5, r1 #else @@ -34,7 +34,7 @@ ENTRY(__xdiv64_32) rotcl r0 div1 r6, r1 .endr -#ifdef __LITTLE_ENDIAN__ +#ifdef CONFIG_CPU_LITTLE_ENDIAN mov r2, r1 rts rotcl r0 diff --git a/arch/sh/lib/io.c b/arch/sh/lib/io.c new file mode 100644 index 00000000000..88dfe6e396b --- /dev/null +++ b/arch/sh/lib/io.c @@ -0,0 +1,82 @@ +/* + * arch/sh/lib/io.c - SH32 optimized I/O routines + * + * Copyright (C) 2000 Stuart Menefy + * Copyright (C) 2005 Paul Mundt + * + * Provide real functions which expand to whatever the header file defined. + * Also definitions of machine independent IO functions. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/module.h> +#include <linux/io.h> + +void __raw_readsl(const void __iomem *addr, void *datap, int len) +{ + u32 *data; + + for (data = datap; (len != 0) && (((u32)data & 0x1f) != 0); len--) + *data++ = __raw_readl(addr); + + if (likely(len >= (0x20 >> 2))) { + int tmp2, tmp3, tmp4, tmp5, tmp6; + + __asm__ __volatile__( + "1: \n\t" + "mov.l @%7, r0 \n\t" + "mov.l @%7, %2 \n\t" +#ifdef CONFIG_CPU_SH4 + "movca.l r0, @%0 \n\t" +#else + "mov.l r0, @%0 \n\t" +#endif + "mov.l @%7, %3 \n\t" + "mov.l @%7, %4 \n\t" + "mov.l @%7, %5 \n\t" + "mov.l @%7, %6 \n\t" + "mov.l @%7, r7 \n\t" + "mov.l @%7, r0 \n\t" + "mov.l %2, @(0x04,%0) \n\t" + "mov #0x20>>2, %2 \n\t" + "mov.l %3, @(0x08,%0) \n\t" + "sub %2, %1 \n\t" + "mov.l %4, @(0x0c,%0) \n\t" + "cmp/hi %1, %2 ! T if 32 > len \n\t" + "mov.l %5, @(0x10,%0) \n\t" + "mov.l %6, @(0x14,%0) \n\t" + "mov.l r7, @(0x18,%0) \n\t" + "mov.l r0, @(0x1c,%0) \n\t" + "bf.s 1b \n\t" + " add #0x20, %0 \n\t" + : "=&r" (data), "=&r" (len), + "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4), + "=&r" (tmp5), "=&r" (tmp6) + : "r"(addr), "0" (data), "1" (len) + : "r0", "r7", "t", "memory"); + } + + for (; len != 0; len--) + *data++ = __raw_readl(addr); +} +EXPORT_SYMBOL(__raw_readsl); + +void __raw_writesl(void __iomem *addr, const void *data, int len) +{ + if (likely(len != 0)) { + int tmp1; + + __asm__ __volatile__ ( + "1: \n\t" + "mov.l @%0+, %1 \n\t" + "dt %3 \n\t" + "bf.s 1b \n\t" + " mov.l %1, @%4 \n\t" + : "=&r" (data), "=&r" (tmp1) + : "0" (data), "r" (len), "r"(addr) + : "t", "memory"); + } +} +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h new file mode 100644 index 00000000000..05909d58e2f --- /dev/null +++ b/arch/sh/lib/libgcc.h @@ -0,0 +1,25 @@ +#ifndef __ASM_LIBGCC_H +#define __ASM_LIBGCC_H + +#include <asm/byteorder.h> + +typedef int word_type __attribute__ ((mode (__word__))); + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union { + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ diff --git a/arch/sh/lib/lshrdi3.c b/arch/sh/lib/lshrdi3.c new file mode 100644 index 00000000000..dcf8d6810b7 --- /dev/null +++ b/arch/sh/lib/lshrdi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __lshrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.high = 0; + w.s.low = (unsigned int) uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = (unsigned int) uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__lshrdi3); diff --git a/arch/sh/lib/lshrsi3.S b/arch/sh/lib/lshrsi3.S new file mode 100644 index 00000000000..1e7aaa55713 --- /dev/null +++ b/arch/sh/lib/lshrsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __lshrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __lshrsi3 + + .align 2 +__lshrsi3: + mov #31,r0 + and r0,r5 + mova lshrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +lshrsi3_table: + .byte lshrsi3_0-lshrsi3_table + .byte lshrsi3_1-lshrsi3_table + .byte lshrsi3_2-lshrsi3_table + .byte lshrsi3_3-lshrsi3_table + .byte lshrsi3_4-lshrsi3_table + .byte lshrsi3_5-lshrsi3_table + .byte lshrsi3_6-lshrsi3_table + .byte lshrsi3_7-lshrsi3_table + .byte lshrsi3_8-lshrsi3_table + .byte lshrsi3_9-lshrsi3_table + .byte lshrsi3_10-lshrsi3_table + .byte lshrsi3_11-lshrsi3_table + .byte lshrsi3_12-lshrsi3_table + .byte lshrsi3_13-lshrsi3_table + .byte lshrsi3_14-lshrsi3_table + .byte lshrsi3_15-lshrsi3_table + .byte lshrsi3_16-lshrsi3_table + .byte lshrsi3_17-lshrsi3_table + .byte lshrsi3_18-lshrsi3_table + .byte lshrsi3_19-lshrsi3_table + .byte lshrsi3_20-lshrsi3_table + .byte lshrsi3_21-lshrsi3_table + .byte lshrsi3_22-lshrsi3_table + .byte lshrsi3_23-lshrsi3_table + .byte lshrsi3_24-lshrsi3_table + .byte lshrsi3_25-lshrsi3_table + .byte lshrsi3_26-lshrsi3_table + .byte lshrsi3_27-lshrsi3_table + .byte lshrsi3_28-lshrsi3_table + .byte lshrsi3_29-lshrsi3_table + .byte lshrsi3_30-lshrsi3_table + .byte lshrsi3_31-lshrsi3_table + +lshrsi3_6: + shlr2 r0 +lshrsi3_4: + shlr2 r0 +lshrsi3_2: + rts + shlr2 r0 + +lshrsi3_7: + shlr2 r0 +lshrsi3_5: + shlr2 r0 +lshrsi3_3: + shlr2 r0 +lshrsi3_1: + rts + shlr r0 + +lshrsi3_14: + shlr2 r0 +lshrsi3_12: + shlr2 r0 +lshrsi3_10: + shlr2 r0 +lshrsi3_8: + rts + shlr8 r0 + +lshrsi3_15: + shlr2 r0 +lshrsi3_13: + shlr2 r0 +lshrsi3_11: + shlr2 r0 +lshrsi3_9: + shlr8 r0 + rts + shlr r0 + +lshrsi3_22: + shlr2 r0 +lshrsi3_20: + shlr2 r0 +lshrsi3_18: + shlr2 r0 +lshrsi3_16: + rts + shlr16 r0 + +lshrsi3_23: + shlr2 r0 +lshrsi3_21: + shlr2 r0 +lshrsi3_19: + shlr2 r0 +lshrsi3_17: + shlr16 r0 + rts + shlr r0 + +lshrsi3_30: + shlr2 r0 +lshrsi3_28: + shlr2 r0 +lshrsi3_26: + shlr2 r0 +lshrsi3_24: + shlr16 r0 + rts + shlr8 r0 + +lshrsi3_31: + shlr2 r0 +lshrsi3_29: + shlr2 r0 +lshrsi3_27: + shlr2 r0 +lshrsi3_25: + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +lshrsi3_0: + rts + nop diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S new file mode 100644 index 00000000000..52aa2011d75 --- /dev/null +++ b/arch/sh/lib/mcount.S @@ -0,0 +1,310 @@ +/* + * arch/sh/lib/mcount.S + * + * Copyright (C) 2008, 2009 Paul Mundt + * Copyright (C) 2008, 2009 Matt Fleming + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <asm/ftrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#define MCOUNT_ENTER() \ + mov.l r4, @-r15; \ + mov.l r5, @-r15; \ + mov.l r6, @-r15; \ + mov.l r7, @-r15; \ + sts.l pr, @-r15; \ + \ + mov.l @(20,r15),r4; \ + sts pr, r5 + +#define MCOUNT_LEAVE() \ + lds.l @r15+, pr; \ + mov.l @r15+, r7; \ + mov.l @r15+, r6; \ + mov.l @r15+, r5; \ + rts; \ + mov.l @r15+, r4 + +#ifdef CONFIG_STACK_DEBUG +/* + * Perform diagnostic checks on the state of the kernel stack. + * + * Check for stack overflow. If there is less than 1KB free + * then it has overflowed. + * + * Make sure the stack pointer contains a valid address. Valid + * addresses for kernel stacks are anywhere after the bss + * (after __bss_stop) and anywhere in init_thread_union (init_stack). + */ +#define STACK_CHECK() \ + mov #(THREAD_SIZE >> 10), r0; \ + shll8 r0; \ + shll2 r0; \ + \ + /* r1 = sp & (THREAD_SIZE - 1) */ \ + mov #-1, r1; \ + add r0, r1; \ + and r15, r1; \ + \ + mov #TI_SIZE, r3; \ + mov #(STACK_WARN >> 8), r2; \ + shll8 r2; \ + add r3, r2; \ + \ + /* Is the stack overflowing? */ \ + cmp/hi r2, r1; \ + bf stack_panic; \ + \ + /* If sp > __bss_stop then we're OK. */ \ + mov.l .L_ebss, r1; \ + cmp/hi r1, r15; \ + bt 1f; \ + \ + /* If sp < init_stack, we're not OK. */ \ + mov.l .L_init_thread_union, r1; \ + cmp/hs r1, r15; \ + bf stack_panic; \ + \ + /* If sp > init_stack && sp < __bss_stop, not OK. */ \ + add r0, r1; \ + cmp/hs r1, r15; \ + bt stack_panic; \ +1: +#else +#define STACK_CHECK() +#endif /* CONFIG_STACK_DEBUG */ + + .align 2 + .globl _mcount + .type _mcount,@function + .globl mcount + .type mcount,@function +_mcount: +mcount: + STACK_CHECK() + +#ifndef CONFIG_FUNCTION_TRACER + rts + nop +#else +#ifndef CONFIG_DYNAMIC_FTRACE + mov.l .Lfunction_trace_stop, r0 + mov.l @r0, r0 + tst r0, r0 + bf ftrace_stub +#endif + + MCOUNT_ENTER() + +#ifdef CONFIG_DYNAMIC_FTRACE + .globl mcount_call +mcount_call: + mov.l .Lftrace_stub, r6 +#else + mov.l .Lftrace_trace_function, r6 + mov.l ftrace_stub, r7 + cmp/eq r6, r7 + bt skip_trace + mov.l @r6, r6 +#endif + + jsr @r6 + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + mov.l .Lftrace_graph_return, r6 + mov.l .Lftrace_stub, r7 + cmp/eq r6, r7 + bt 1f + + mov.l .Lftrace_graph_caller, r0 + jmp @r0 + nop + +1: + mov.l .Lftrace_graph_entry, r6 + mov.l .Lftrace_graph_entry_stub, r7 + cmp/eq r6, r7 + bt skip_trace + + mov.l .Lftrace_graph_caller, r0 + jmp @r0 + nop + + .align 2 +.Lftrace_graph_return: + .long ftrace_graph_return +.Lftrace_graph_entry: + .long ftrace_graph_entry +.Lftrace_graph_entry_stub: + .long ftrace_graph_entry_stub +.Lftrace_graph_caller: + .long ftrace_graph_caller +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + .globl skip_trace +skip_trace: + MCOUNT_LEAVE() + + .align 2 +.Lftrace_trace_function: + .long ftrace_trace_function + +#ifdef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * NOTE: Do not move either ftrace_graph_call or ftrace_caller + * as this will affect the calculation of GRAPH_INSN_OFFSET. + */ + .globl ftrace_graph_call +ftrace_graph_call: + mov.l .Lskip_trace, r0 + jmp @r0 + nop + + .align 2 +.Lskip_trace: + .long skip_trace +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + .globl ftrace_caller +ftrace_caller: + mov.l .Lfunction_trace_stop, r0 + mov.l @r0, r0 + tst r0, r0 + bf ftrace_stub + + MCOUNT_ENTER() + + .globl ftrace_call +ftrace_call: + mov.l .Lftrace_stub, r6 + jsr @r6 + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + bra ftrace_graph_call + nop +#else + MCOUNT_LEAVE() +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_DYNAMIC_FTRACE */ + + .align 2 +.Lfunction_trace_stop: + .long function_trace_stop + +/* + * NOTE: From here on the locations of the .Lftrace_stub label and + * ftrace_stub itself are fixed. Adding additional data here will skew + * the displacement for the memory table and break the block replacement. + * Place new labels either after the ftrace_stub body, or before + * ftrace_caller. You have been warned. + */ +.Lftrace_stub: + .long ftrace_stub + + .globl ftrace_stub +ftrace_stub: + rts + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_caller +ftrace_graph_caller: + mov.l 2f, r0 + mov.l @r0, r0 + tst r0, r0 + bt 1f + + mov.l 3f, r1 + jmp @r1 + nop +1: + /* + * MCOUNT_ENTER() pushed 5 registers onto the stack, so + * the stack address containing our return address is + * r15 + 20. + */ + mov #20, r0 + add r15, r0 + mov r0, r4 + + mov.l .Lprepare_ftrace_return, r0 + jsr @r0 + nop + + MCOUNT_LEAVE() + + .align 2 +2: .long function_trace_stop +3: .long skip_trace +.Lprepare_ftrace_return: + .long prepare_ftrace_return + + .globl return_to_handler +return_to_handler: + /* + * Save the return values. + */ + mov.l r0, @-r15 + mov.l r1, @-r15 + + mov #0, r4 + + mov.l .Lftrace_return_to_handler, r0 + jsr @r0 + nop + + /* + * The return value from ftrace_return_handler has the real + * address that we should return to. + */ + lds r0, pr + mov.l @r15+, r1 + rts + mov.l @r15+, r0 + + + .align 2 +.Lftrace_return_to_handler: + .long ftrace_return_to_handler +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_STACK_DEBUG + .globl stack_panic +stack_panic: + mov.l .Ldump_stack, r0 + jsr @r0 + nop + + mov.l .Lpanic, r0 + jsr @r0 + mov.l .Lpanic_s, r4 + + rts + nop + + .align 2 +.L_init_thread_union: + .long init_thread_union +.L_ebss: + .long __bss_stop +.Lpanic: + .long panic +.Lpanic_s: + .long .Lpanic_str +.Ldump_stack: + .long dump_stack + + .section .rodata + .align 2 +.Lpanic_str: + .string "Stack error" +#endif /* CONFIG_STACK_DEBUG */ diff --git a/arch/sh/lib/memcpy-sh4.S b/arch/sh/lib/memcpy-sh4.S index 55f227441f9..459fa92a7c5 100644 --- a/arch/sh/lib/memcpy-sh4.S +++ b/arch/sh/lib/memcpy-sh4.S @@ -8,7 +8,6 @@ * */ #include <linux/linkage.h> -#include <linux/config.h> /* * void *memcpy(void *dst, const void *src, size_t n); @@ -127,10 +126,10 @@ mov.l r3,@-r0 ! 30 LS #else -3: mov r1,r3 ! OPQR +3: mov r7,r3 ! OPQR shlr8 r3 ! xOPQ - mov.l @(r0,r5),r1 ! KLMN - mov r1,r6 + mov.l @(r0,r5),r7 ! KLMN + mov r7,r6 shll16 r6 shll8 r6 ! Nxxx or r6,r3 ! NOPQ @@ -728,30 +727,30 @@ ENTRY(memcpy) mov.l @(0x04,r5), r11 ! 18 LS (latency=2) xtrct r9, r8 ! 48 EX - mov.w @(0x02,r5), r12 ! 18 LS (latency=2) - xtrct r10, r9 ! 48 EX + mov.l @(0x00,r5), r12 ! 18 LS (latency=2) + xtrct r10, r9 ! 48 EX movca.l r0,@r1 ! 40 LS (latency=3-7) add #-0x1c, r1 ! 50 EX - mov.l r3, @(0x1c,r1) ! 33 LS + mov.l r3, @(0x18,r1) ! 33 LS xtrct r11, r10 ! 48 EX - mov.l r6, @(0x18,r1) ! 33 LS + mov.l r6, @(0x14,r1) ! 33 LS xtrct r12, r11 ! 48 EX - mov.l r7, @(0x14,r1) ! 33 LS + mov.l r7, @(0x10,r1) ! 33 LS - mov.l r8, @(0x10,r1) ! 33 LS - add #-0x3e, r5 ! 50 EX + mov.l r8, @(0x0c,r1) ! 33 LS + add #-0x1e, r5 ! 50 EX - mov.l r9, @(0x0c,r1) ! 33 LS + mov.l r9, @(0x08,r1) ! 33 LS cmp/eq r2,r1 ! 54 MT - mov.l r10, @(0x08,r1) ! 33 LS + mov.l r10, @(0x04,r1) ! 33 LS bf/s 2b ! 109 BR - mov.l r11, @(0x04,r1) ! 33 LS + mov.l r11, @(0x00,r1) ! 33 LS #endif mov.l @r15+, r12 diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S new file mode 100644 index 00000000000..1a6e32cc4e4 --- /dev/null +++ b/arch/sh/lib/memset-sh4.S @@ -0,0 +1,107 @@ +/* + * "memset" implementation for SH4 + * + * Copyright (C) 1999 Niibe Yutaka + * Copyright (c) 2009 STMicroelectronics Limited + * Author: Stuart Menefy <stuart.menefy:st.com> + */ + +/* + * void *memset(void *s, int c, size_t n); + */ + +#include <linux/linkage.h> + +ENTRY(memset) + mov #12,r0 + add r6,r4 + cmp/gt r6,r0 + bt/s 40f ! if it's too small, set a byte at once + mov r4,r0 + and #3,r0 + cmp/eq #0,r0 + bt/s 2f ! It's aligned + sub r0,r6 +1: + dt r0 + bf/s 1b + mov.b r5,@-r4 +2: ! make VVVV + extu.b r5,r5 + swap.b r5,r0 ! V0 + or r0,r5 ! VV + swap.w r5,r0 ! VV00 + or r0,r5 ! VVVV + + ! Check if enough bytes need to be copied to be worth the big loop + mov #0x40, r0 ! (MT) + cmp/gt r6,r0 ! (MT) 64 > len => slow loop + + bt/s 22f + mov r6,r0 + + ! align the dst to the cache block size if necessary + mov r4, r3 + mov #~(0x1f), r1 + + and r3, r1 + cmp/eq r3, r1 + + bt/s 11f ! dst is already aligned + sub r1, r3 ! r3-r1 -> r3 + shlr2 r3 ! number of loops + +10: mov.l r5,@-r4 + dt r3 + bf/s 10b + add #-4, r6 + +11: ! dst is 32byte aligned + mov r6,r2 + mov #-5,r0 + shld r0,r2 ! number of loops + + add #-32, r4 + mov r5, r0 +12: + movca.l r0,@r4 + mov.l r5,@(4, r4) + mov.l r5,@(8, r4) + mov.l r5,@(12,r4) + mov.l r5,@(16,r4) + mov.l r5,@(20,r4) + add #-0x20, r6 + mov.l r5,@(24,r4) + dt r2 + mov.l r5,@(28,r4) + bf/s 12b + add #-32, r4 + + add #32, r4 + mov #8, r0 + cmp/ge r0, r6 + bf 40f + + mov r6,r0 +22: + shlr2 r0 + shlr r0 ! r0 = r6 >> 3 +3: + dt r0 + mov.l r5,@-r4 ! set 8-byte at once + bf/s 3b + mov.l r5,@-r4 + ! + mov #7,r0 + and r0,r6 + + ! fill bytes (length may be zero) +40: tst r6,r6 + bt 5f +4: + dt r6 + bf/s 4b + mov.b r5,@-r4 +5: + rts + mov r4,r0 diff --git a/arch/sh/lib/memset.S b/arch/sh/lib/memset.S index 95670090680..af91fe2b72a 100644 --- a/arch/sh/lib/memset.S +++ b/arch/sh/lib/memset.S @@ -29,6 +29,7 @@ ENTRY(memset) bf/s 1b mov.b r5,@-r4 2: ! make VVVV + extu.b r5,r5 swap.b r5,r0 ! V0 or r0,r5 ! VV swap.w r5,r0 ! VV00 diff --git a/arch/sh/lib/movmem.S b/arch/sh/lib/movmem.S new file mode 100644 index 00000000000..62075f6bc67 --- /dev/null +++ b/arch/sh/lib/movmem.S @@ -0,0 +1,238 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .text + .balign 4 + .global __movmem + .global __movstr + .set __movstr, __movmem + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +__movmem: + sts.l pr,@-r15 + shll2 r6 + bsr __movmemSI52+2 + mov.l @(48,r5),r0 + .balign 4 +movmem_loop: /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt movmem_done + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt __movmemSI52 +! done all the large groups, do the remainder +! jump to movmem+ + mova __movmemSI4+4,r0 + add r6,r0 + jmp @r0 +movmem_done: ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 + + .global __movmemSI64 + .global __movstrSI64 + .set __movstrSI64, __movmemSI64 +__movmemSI64: + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global __movmemSI60 + .global __movstrSI60 + .set __movstrSI60, __movmemSI60 +__movmemSI60: + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global __movmemSI56 + .global __movstrSI56 + .set __movstrSI56, __movmemSI56 +__movmemSI56: + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global __movmemSI52 + .global __movstrSI52 + .set __movstrSI52, __movmemSI52 +__movmemSI52: + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global __movmemSI48 + .global __movstrSI48 + .set __movstrSI48, __movmemSI48 +__movmemSI48: + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global __movmemSI44 + .global __movstrSI44 + .set __movstrSI44, __movmemSI44 +__movmemSI44: + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global __movmemSI40 + .global __movstrSI40 + .set __movstrSI40, __movmemSI40 +__movmemSI40: + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global __movmemSI36 + .global __movstrSI36 + .set __movstrSI36, __movmemSI36 +__movmemSI36: + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global __movmemSI32 + .global __movstrSI32 + .set __movstrSI32, __movmemSI32 +__movmemSI32: + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global __movmemSI28 + .global __movstrSI28 + .set __movstrSI28, __movmemSI28 +__movmemSI28: + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global __movmemSI24 + .global __movstrSI24 + .set __movstrSI24, __movmemSI24 +__movmemSI24: + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global __movmemSI20 + .global __movstrSI20 + .set __movstrSI20, __movmemSI20 +__movmemSI20: + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global __movmemSI16 + .global __movstrSI16 + .set __movstrSI16, __movmemSI16 +__movmemSI16: + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global __movmemSI12 + .global __movstrSI12 + .set __movstrSI12, __movmemSI12 +__movmemSI12: + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global __movmemSI8 + .global __movstrSI8 + .set __movstrSI8, __movmemSI8 +__movmemSI8: + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global __movmemSI4 + .global __movstrSI4 + .set __movstrSI4, __movmemSI4 +__movmemSI4: + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + .global __movmem_i4_even + .global __movstr_i4_even + .set __movstr_i4_even, __movmem_i4_even + + .global __movmem_i4_odd + .global __movstr_i4_odd + .set __movstr_i4_odd, __movmem_i4_odd + + .global __movmemSI12_i4 + .global __movstrSI12_i4 + .set __movstrSI12_i4, __movmemSI12_i4 + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +__movmem_i4_even: + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +__movmem_i4_odd: + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + .p2align 4 +__movmemSI12_i4: + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) diff --git a/arch/sh/lib/strcasecmp.c b/arch/sh/lib/strcasecmp.c deleted file mode 100644 index 4e57a216fea..00000000000 --- a/arch/sh/lib/strcasecmp.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * linux/arch/alpha/lib/strcasecmp.c - */ - -#include <linux/string.h> - - -/* We handle nothing here except the C locale. Since this is used in - only one place, on strings known to contain only 7 bit ASCII, this - is ok. */ - -int strcasecmp(const char *a, const char *b) -{ - int ca, cb; - - do { - ca = *a++ & 0xff; - cb = *b++ & 0xff; - if (ca >= 'A' && ca <= 'Z') - ca += 'a' - 'A'; - if (cb >= 'A' && cb <= 'Z') - cb += 'a' - 'A'; - } while (ca == cb && ca != '\0'); - - return ca - cb; -} diff --git a/arch/sh/lib/strlen.S b/arch/sh/lib/strlen.S index f8ab296047b..1bcc13f0596 100644 --- a/arch/sh/lib/strlen.S +++ b/arch/sh/lib/strlen.S @@ -35,7 +35,7 @@ ENTRY(strlen) mov.b @r4+,r1 tst r1,r1 bt 8f - add #1,r2 + add #1,r2 1: mov #0,r3 diff --git a/arch/sh/lib/udiv_qrnnd.S b/arch/sh/lib/udiv_qrnnd.S new file mode 100644 index 00000000000..32b9a36de94 --- /dev/null +++ b/arch/sh/lib/udiv_qrnnd.S @@ -0,0 +1,81 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global __udiv_qrnnd_16 + .balign 8 +__udiv_qrnnd_16: + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + bf/s 1b + shlr16 r1 +0: rts + nop diff --git a/arch/sh/lib/udivdi3.c b/arch/sh/lib/udivdi3.c deleted file mode 100644 index 68f038bf3c5..00000000000 --- a/arch/sh/lib/udivdi3.c +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Simple __udivdi3 function which doesn't use FPU. - */ - -#include <linux/types.h> - -extern u64 __xdiv64_32(u64 n, u32 d); -extern void panic(const char * fmt, ...); - -u64 __udivdi3(u64 n, u64 d) -{ - if (d & ~0xffffffff) - panic("Need true 64-bit/64-bit division"); - return __xdiv64_32(n, (u32)d); -} - diff --git a/arch/sh/lib/udivsi3.S b/arch/sh/lib/udivsi3.S new file mode 100644 index 00000000000..72157ab5c31 --- /dev/null +++ b/arch/sh/lib/udivsi3.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + + .balign 4 + .global __udivsi3 + .type __udivsi3, @function +div8: + div1 r5,r4 +div7: + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +divx4: + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + rts; div1 r5,r4 + +__udivsi3: + sts.l pr,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + bf/s large_divisor + div0u + swap.w r4,r0 + shlr16 r4 + bsr div8 + shll16 r5 + bsr div7 + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr div8 + swap.w r4,r4 + bsr div7 + div1 r5,r4 + lds.l @r15+,pr + xtrct r4,r0 + swap.w r0,r0 + rotcl r0 + rts + shlr16 r5 + +large_divisor: + mov #0,r0 + xtrct r4,r0 + xtrct r0,r4 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + lds.l @r15+,pr + rts + rotcl r0 diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S new file mode 100644 index 00000000000..4835553e1ea --- /dev/null +++ b/arch/sh/lib/udivsi3_i4i-Os.S @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Moderately Space-optimized libgcc routines for the Renesas SH / + STMicroelectronics ST40 CPUs. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i + sh4-200 run times: + udiv small divisor: 55 cycles + udiv large divisor: 52 cycles + sdiv small divisor, positive result: 59 cycles + sdiv large divisor, positive result: 56 cycles + sdiv small divisor, negative result: 65 cycles (*) + sdiv large divisor, negative result: 62 cycles (*) + (*): r2 is restored in the rts delay slot and has a lingering latency + of two more cycles. */ + .balign 4 + .global __udivsi3_i4i + .global __udivsi3_i4 + .set __udivsi3_i4, __udivsi3_i4i + .type __udivsi3_i4i, @function + .type __sdivsi3_i4i, @function +__udivsi3_i4i: + sts pr,r1 + mov.l r4,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + swap.w r4,r0 + shlr16 r4 + bf/s large_divisor + div0u + mov.l r5,@-r15 + shll16 r5 +sdiv_small_divisor: + div1 r5,r4 + bsr div6 + div1 r5,r4 + div1 r5,r4 + bsr div6 + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr div7 + swap.w r4,r4 + div1 r5,r4 + bsr div7 + div1 r5,r4 + xtrct r4,r0 + mov.l @r15+,r5 + swap.w r0,r0 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 +div7: + div1 r5,r4 +div6: + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +divx3: + rotcl r0 + div1 r5,r4 + rotcl r0 + div1 r5,r4 + rotcl r0 + rts + div1 r5,r4 + +large_divisor: + mov.l r5,@-r15 +sdiv_large_divisor: + xor r4,r0 + .rept 4 + rotcl r0 + bsr divx3 + div1 r5,r4 + .endr + mov.l @r15+,r5 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3_i4 + .global __sdivsi3 + .set __sdivsi3_i4, __sdivsi3_i4i + .set __sdivsi3, __sdivsi3_i4i +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.l r5,@-r15 + bt/s pos_divisor + cmp/pz r4 + neg r5,r5 + extu.w r5,r0 + bt/s neg_result + cmp/eq r5,r0 + neg r4,r4 +pos_result: + swap.w r4,r0 + bra sdiv_check_divisor + sts pr,r1 +pos_divisor: + extu.w r5,r0 + bt/s pos_result + cmp/eq r5,r0 + neg r4,r4 +neg_result: + mova negate_result,r0 + ; + mov r0,r1 + swap.w r4,r0 + lds r2,macl + sts pr,r2 +sdiv_check_divisor: + shlr16 r4 + bf/s sdiv_large_divisor + div0u + bra sdiv_small_divisor + shll16 r5 + .balign 4 +negate_result: + neg r0,r0 + jmp @r2 + sts macl,r2 diff --git a/arch/sh/lib/udivsi3_i4i.S b/arch/sh/lib/udivsi3_i4i.S new file mode 100644 index 00000000000..f1a79d9c501 --- /dev/null +++ b/arch/sh/lib/udivsi3_i4i.S @@ -0,0 +1,666 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global __udivsi3_i4i + .global __udivsi3_i4 + .set __udivsi3_i4, __udivsi3_i4i + .type __udivsi3_i4i, @function +__udivsi3_i4i: + mov.w c128_w, r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf udiv_le128 + cmp/eq r5,r1 + bf udiv_ge64k + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra udiv_25 + div1 r5,r0 + +div_le128: + mova div_table_ix,r0 + bra div_le128_2 + mov.b @(r0,r5),r1 +udiv_le128: + mov.l r4,@-r15 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +div_le128_2: + mova div_table_inv,r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s div_by_1 + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +div_by_1_neg: + neg r4,r0 +div_by_1: + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +div_ge64k: + bt/s div_r8 + div0u + shll8 r5 + bra div_ge64k_2 + div1 r5,r0 +udiv_ge64k: + cmp/hi r0,r5 + mov r5,r1 + bt udiv_r8 + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +div_ge64k_2: + div1 r5,r0 + mov.l zero_l,r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_end + xor r4,r0 + +div_r8: + shll16 r4 + bra div_r8_2 + shll8 r4 +udiv_r8: + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +div_r8_2: + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3_i4 + .global __sdivsi3 + .set __sdivsi3_i4, __sdivsi3_i4i + .set __sdivsi3, __sdivsi3_i4i + .type __sdivsi3_i4i, @function + /* This is link-compatible with a __sdivsi3 call, + but we effectively clobber only r1. */ +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.w c128_w, r1 + bt/s pos_divisor + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s neg_result + cmp/hi r1,r5 + neg r4,r4 +pos_result: + extu.w r5,r0 + bf div_le128 + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +udiv_25: + mov.l zero_l,r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +div_le128_neg: + tst #0xfe,r0 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mova div_table_inv,r0 + bt/s div_by_1_neg + mov.l @(r0,r1),r1 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +pos_divisor: + mov.l r5,@-r15 + bt/s pos_result + cmp/hi r1,r5 + neg r4,r4 +neg_result: + extu.w r5,r0 + bf div_le128_neg + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k_neg + cmp/hi r0,r5 + div0u + mov.l zero_l,r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_neg_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +div_r8_neg_end: + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +div_ge64k_neg: + bt/s div_r8_neg + div0u + shll8 r5 + mov.l zero_l,r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_neg_end + xor r4,r0 + +c128_w: + .word 128 + +div_r8_neg: + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra div_r8_neg_end + div1 r4,r0 + +m256_w: + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +div_table_clz: + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +div_table_ix: + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +zero_l: + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +div_table_inv: + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ |
