diff options
Diffstat (limited to 'arch/sh/lib')
| -rw-r--r-- | arch/sh/lib/Makefile | 27 | ||||
| -rw-r--r-- | arch/sh/lib/__clear_user.S (renamed from arch/sh/lib/clear_page.S) | 48 | ||||
| -rw-r--r-- | arch/sh/lib/ashiftrt.S | 149 | ||||
| -rw-r--r-- | arch/sh/lib/ashldi3.c | 29 | ||||
| -rw-r--r-- | arch/sh/lib/ashlsi3.S | 193 | ||||
| -rw-r--r-- | arch/sh/lib/ashrdi3.c | 31 | ||||
| -rw-r--r-- | arch/sh/lib/ashrsi3.S | 185 | ||||
| -rw-r--r-- | arch/sh/lib/checksum.S | 69 | ||||
| -rw-r--r-- | arch/sh/lib/copy_page.S | 28 | ||||
| -rw-r--r-- | arch/sh/lib/delay.c | 15 | ||||
| -rw-r--r-- | arch/sh/lib/div64-generic.c | 1 | ||||
| -rw-r--r-- | arch/sh/lib/io.c | 8 | ||||
| -rw-r--r-- | arch/sh/lib/libgcc.h | 25 | ||||
| -rw-r--r-- | arch/sh/lib/lshrdi3.c | 29 | ||||
| -rw-r--r-- | arch/sh/lib/lshrsi3.S | 193 | ||||
| -rw-r--r-- | arch/sh/lib/mcount.S | 310 | ||||
| -rw-r--r-- | arch/sh/lib/memcpy-sh4.S | 22 | ||||
| -rw-r--r-- | arch/sh/lib/memset-sh4.S | 107 | ||||
| -rw-r--r-- | arch/sh/lib/movmem.S | 238 | ||||
| -rw-r--r-- | arch/sh/lib/strlen.S | 2 | ||||
| -rw-r--r-- | arch/sh/lib/udiv_qrnnd.S | 81 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3.S | 87 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3_i4i-Os.S | 149 | ||||
| -rw-r--r-- | arch/sh/lib/udivsi3_i4i.S | 666 |
24 files changed, 2590 insertions, 102 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index ebb55d1149f..3baff31e58c 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -2,13 +2,32 @@ # Makefile for SuperH-specific library files.. # -lib-y = delay.o io.o memset.o memmove.o memchr.o \ +lib-y = delay.o memmove.o memchr.o \ checksum.o strlen.o div64.o div64-generic.o +# Extracted from libgcc +obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ + ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ + udiv_qrnnd.o + +udivsi3-y := udivsi3_i4i-Os.o + +ifneq ($(CONFIG_CC_OPTIMIZE_FOR_SIZE),y) +udivsi3-$(CONFIG_CPU_SH3) := udivsi3_i4i.o +udivsi3-$(CONFIG_CPU_SH4) := udivsi3_i4i.o +endif +udivsi3-y += udivsi3.o + +obj-y += io.o + memcpy-y := memcpy.o memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o -lib-$(CONFIG_MMU) += copy_page.o clear_page.o -lib-y += $(memcpy-y) +memset-y := memset.o +memset-$(CONFIG_CPU_SH4) := memset-sh4.o + +lib-$(CONFIG_MMU) += copy_page.o __clear_user.o +lib-$(CONFIG_MCOUNT) += mcount.o +lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y) -EXTRA_CFLAGS += -Werror +ccflags-y := -Werror diff --git a/arch/sh/lib/clear_page.S b/arch/sh/lib/__clear_user.S index 3539123fe51..db1dca7aad1 100644 --- a/arch/sh/lib/clear_page.S +++ b/arch/sh/lib/__clear_user.S @@ -8,56 +8,10 @@ #include <linux/linkage.h> #include <asm/page.h> -/* - * clear_page - * @to: P1 address - * - * void clear_page(void *to) - */ - -/* - * r0 --- scratch - * r4 --- to - * r5 --- to + PAGE_SIZE - */ -ENTRY(clear_page) - mov r4,r5 - mov.l .Llimit,r0 - add r0,r5 - mov #0,r0 - ! -1: -#if defined(CONFIG_CPU_SH3) - mov.l r0,@r4 -#elif defined(CONFIG_CPU_SH4) - movca.l r0,@r4 - mov r4,r1 -#endif - add #32,r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 -#if defined(CONFIG_CPU_SH4) - ocbwb @r1 -#endif - cmp/eq r5,r4 - bf/s 1b - add #28,r4 - ! - rts - nop - - .balign 4 -.Llimit: .long (PAGE_SIZE-28) - ENTRY(__clear_user) ! mov #0, r0 - mov #0xe0, r1 ! 0xffffffe0 + mov #0xffffffe0, r1 ! ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ] ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] diff --git a/arch/sh/lib/ashiftrt.S b/arch/sh/lib/ashiftrt.S new file mode 100644 index 00000000000..45ce86558f4 --- /dev/null +++ b/arch/sh/lib/ashiftrt.S @@ -0,0 +1,149 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .global __ashiftrt_r4_0 + .global __ashiftrt_r4_1 + .global __ashiftrt_r4_2 + .global __ashiftrt_r4_3 + .global __ashiftrt_r4_4 + .global __ashiftrt_r4_5 + .global __ashiftrt_r4_6 + .global __ashiftrt_r4_7 + .global __ashiftrt_r4_8 + .global __ashiftrt_r4_9 + .global __ashiftrt_r4_10 + .global __ashiftrt_r4_11 + .global __ashiftrt_r4_12 + .global __ashiftrt_r4_13 + .global __ashiftrt_r4_14 + .global __ashiftrt_r4_15 + .global __ashiftrt_r4_16 + .global __ashiftrt_r4_17 + .global __ashiftrt_r4_18 + .global __ashiftrt_r4_19 + .global __ashiftrt_r4_20 + .global __ashiftrt_r4_21 + .global __ashiftrt_r4_22 + .global __ashiftrt_r4_23 + .global __ashiftrt_r4_24 + .global __ashiftrt_r4_25 + .global __ashiftrt_r4_26 + .global __ashiftrt_r4_27 + .global __ashiftrt_r4_28 + .global __ashiftrt_r4_29 + .global __ashiftrt_r4_30 + .global __ashiftrt_r4_31 + .global __ashiftrt_r4_32 + + .align 1 +__ashiftrt_r4_32: +__ashiftrt_r4_31: + rotcl r4 + rts + subc r4,r4 +__ashiftrt_r4_30: + shar r4 +__ashiftrt_r4_29: + shar r4 +__ashiftrt_r4_28: + shar r4 +__ashiftrt_r4_27: + shar r4 +__ashiftrt_r4_26: + shar r4 +__ashiftrt_r4_25: + shar r4 +__ashiftrt_r4_24: + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 +__ashiftrt_r4_23: + shar r4 +__ashiftrt_r4_22: + shar r4 +__ashiftrt_r4_21: + shar r4 +__ashiftrt_r4_20: + shar r4 +__ashiftrt_r4_19: + shar r4 +__ashiftrt_r4_18: + shar r4 +__ashiftrt_r4_17: + shar r4 +__ashiftrt_r4_16: + shlr16 r4 + rts + exts.w r4,r4 +__ashiftrt_r4_15: + shar r4 +__ashiftrt_r4_14: + shar r4 +__ashiftrt_r4_13: + shar r4 +__ashiftrt_r4_12: + shar r4 +__ashiftrt_r4_11: + shar r4 +__ashiftrt_r4_10: + shar r4 +__ashiftrt_r4_9: + shar r4 +__ashiftrt_r4_8: + shar r4 +__ashiftrt_r4_7: + shar r4 +__ashiftrt_r4_6: + shar r4 +__ashiftrt_r4_5: + shar r4 +__ashiftrt_r4_4: + shar r4 +__ashiftrt_r4_3: + shar r4 +__ashiftrt_r4_2: + shar r4 +__ashiftrt_r4_1: + rts + shar r4 +__ashiftrt_r4_0: + rts + nop diff --git a/arch/sh/lib/ashldi3.c b/arch/sh/lib/ashldi3.c new file mode 100644 index 00000000000..beb80f31609 --- /dev/null +++ b/arch/sh/lib/ashldi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashldi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.low = 0; + w.s.high = (unsigned int) uu.s.low << -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.low >> bm; + + w.s.low = (unsigned int) uu.s.low << b; + w.s.high = ((unsigned int) uu.s.high << b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashldi3); diff --git a/arch/sh/lib/ashlsi3.S b/arch/sh/lib/ashlsi3.S new file mode 100644 index 00000000000..bd47e9b403a --- /dev/null +++ b/arch/sh/lib/ashlsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashlsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __ashlsi3 + + .align 2 +__ashlsi3: + mov #31,r0 + and r0,r5 + mova ashlsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashlsi3_table: + .byte ashlsi3_0-ashlsi3_table + .byte ashlsi3_1-ashlsi3_table + .byte ashlsi3_2-ashlsi3_table + .byte ashlsi3_3-ashlsi3_table + .byte ashlsi3_4-ashlsi3_table + .byte ashlsi3_5-ashlsi3_table + .byte ashlsi3_6-ashlsi3_table + .byte ashlsi3_7-ashlsi3_table + .byte ashlsi3_8-ashlsi3_table + .byte ashlsi3_9-ashlsi3_table + .byte ashlsi3_10-ashlsi3_table + .byte ashlsi3_11-ashlsi3_table + .byte ashlsi3_12-ashlsi3_table + .byte ashlsi3_13-ashlsi3_table + .byte ashlsi3_14-ashlsi3_table + .byte ashlsi3_15-ashlsi3_table + .byte ashlsi3_16-ashlsi3_table + .byte ashlsi3_17-ashlsi3_table + .byte ashlsi3_18-ashlsi3_table + .byte ashlsi3_19-ashlsi3_table + .byte ashlsi3_20-ashlsi3_table + .byte ashlsi3_21-ashlsi3_table + .byte ashlsi3_22-ashlsi3_table + .byte ashlsi3_23-ashlsi3_table + .byte ashlsi3_24-ashlsi3_table + .byte ashlsi3_25-ashlsi3_table + .byte ashlsi3_26-ashlsi3_table + .byte ashlsi3_27-ashlsi3_table + .byte ashlsi3_28-ashlsi3_table + .byte ashlsi3_29-ashlsi3_table + .byte ashlsi3_30-ashlsi3_table + .byte ashlsi3_31-ashlsi3_table + +ashlsi3_6: + shll2 r0 +ashlsi3_4: + shll2 r0 +ashlsi3_2: + rts + shll2 r0 + +ashlsi3_7: + shll2 r0 +ashlsi3_5: + shll2 r0 +ashlsi3_3: + shll2 r0 +ashlsi3_1: + rts + shll r0 + +ashlsi3_14: + shll2 r0 +ashlsi3_12: + shll2 r0 +ashlsi3_10: + shll2 r0 +ashlsi3_8: + rts + shll8 r0 + +ashlsi3_15: + shll2 r0 +ashlsi3_13: + shll2 r0 +ashlsi3_11: + shll2 r0 +ashlsi3_9: + shll8 r0 + rts + shll r0 + +ashlsi3_22: + shll2 r0 +ashlsi3_20: + shll2 r0 +ashlsi3_18: + shll2 r0 +ashlsi3_16: + rts + shll16 r0 + +ashlsi3_23: + shll2 r0 +ashlsi3_21: + shll2 r0 +ashlsi3_19: + shll2 r0 +ashlsi3_17: + shll16 r0 + rts + shll r0 + +ashlsi3_30: + shll2 r0 +ashlsi3_28: + shll2 r0 +ashlsi3_26: + shll2 r0 +ashlsi3_24: + shll16 r0 + rts + shll8 r0 + +ashlsi3_31: + shll2 r0 +ashlsi3_29: + shll2 r0 +ashlsi3_27: + shll2 r0 +ashlsi3_25: + shll16 r0 + shll8 r0 + rts + shll r0 + +ashlsi3_0: + rts + nop diff --git a/arch/sh/lib/ashrdi3.c b/arch/sh/lib/ashrdi3.c new file mode 100644 index 00000000000..c884a912b66 --- /dev/null +++ b/arch/sh/lib/ashrdi3.c @@ -0,0 +1,31 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __ashrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + /* w.s.high = 1..1 or 0..0 */ + w.s.high = + uu.s.high >> 31; + w.s.low = uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__ashrdi3); diff --git a/arch/sh/lib/ashrsi3.S b/arch/sh/lib/ashrsi3.S new file mode 100644 index 00000000000..6f3cf46b77c --- /dev/null +++ b/arch/sh/lib/ashrsi3.S @@ -0,0 +1,185 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __ashrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + + .global __ashrsi3 + + .align 2 +__ashrsi3: + mov #31,r0 + and r0,r5 + mova ashrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +ashrsi3_table: + .byte ashrsi3_0-ashrsi3_table + .byte ashrsi3_1-ashrsi3_table + .byte ashrsi3_2-ashrsi3_table + .byte ashrsi3_3-ashrsi3_table + .byte ashrsi3_4-ashrsi3_table + .byte ashrsi3_5-ashrsi3_table + .byte ashrsi3_6-ashrsi3_table + .byte ashrsi3_7-ashrsi3_table + .byte ashrsi3_8-ashrsi3_table + .byte ashrsi3_9-ashrsi3_table + .byte ashrsi3_10-ashrsi3_table + .byte ashrsi3_11-ashrsi3_table + .byte ashrsi3_12-ashrsi3_table + .byte ashrsi3_13-ashrsi3_table + .byte ashrsi3_14-ashrsi3_table + .byte ashrsi3_15-ashrsi3_table + .byte ashrsi3_16-ashrsi3_table + .byte ashrsi3_17-ashrsi3_table + .byte ashrsi3_18-ashrsi3_table + .byte ashrsi3_19-ashrsi3_table + .byte ashrsi3_20-ashrsi3_table + .byte ashrsi3_21-ashrsi3_table + .byte ashrsi3_22-ashrsi3_table + .byte ashrsi3_23-ashrsi3_table + .byte ashrsi3_24-ashrsi3_table + .byte ashrsi3_25-ashrsi3_table + .byte ashrsi3_26-ashrsi3_table + .byte ashrsi3_27-ashrsi3_table + .byte ashrsi3_28-ashrsi3_table + .byte ashrsi3_29-ashrsi3_table + .byte ashrsi3_30-ashrsi3_table + .byte ashrsi3_31-ashrsi3_table + +ashrsi3_31: + rotcl r0 + rts + subc r0,r0 + +ashrsi3_30: + shar r0 +ashrsi3_29: + shar r0 +ashrsi3_28: + shar r0 +ashrsi3_27: + shar r0 +ashrsi3_26: + shar r0 +ashrsi3_25: + shar r0 +ashrsi3_24: + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +ashrsi3_23: + shar r0 +ashrsi3_22: + shar r0 +ashrsi3_21: + shar r0 +ashrsi3_20: + shar r0 +ashrsi3_19: + shar r0 +ashrsi3_18: + shar r0 +ashrsi3_17: + shar r0 +ashrsi3_16: + shlr16 r0 + rts + exts.w r0,r0 + +ashrsi3_15: + shar r0 +ashrsi3_14: + shar r0 +ashrsi3_13: + shar r0 +ashrsi3_12: + shar r0 +ashrsi3_11: + shar r0 +ashrsi3_10: + shar r0 +ashrsi3_9: + shar r0 +ashrsi3_8: + shar r0 +ashrsi3_7: + shar r0 +ashrsi3_6: + shar r0 +ashrsi3_5: + shar r0 +ashrsi3_4: + shar r0 +ashrsi3_3: + shar r0 +ashrsi3_2: + shar r0 +ashrsi3_1: + rts + shar r0 + +ashrsi3_0: + rts + nop diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index cbdd0d40e54..356c8ec9289 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -36,8 +36,7 @@ */ /* - * unsigned int csum_partial(const unsigned char *buf, int len, - * unsigned int sum); + * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); */ .text @@ -49,11 +48,31 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ - mov r5, r1 mov r4, r0 - tst #2, r0 ! Check alignment. - bt 2f ! Jump if alignment is ok. + tst #3, r0 ! Check alignment. + bt/s 2f ! Jump if alignment is ok. + mov r4, r7 ! Keep a copy to check for alignment ! + tst #1, r0 ! Check alignment. + bt 21f ! Jump if alignment is boundary of 2bytes. + + ! buf is odd + tst r5, r5 + add #-1, r5 + bt 9f + mov.b @r4+, r0 + extu.b r0, r0 + addc r0, r6 ! t=0 from previous tst + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 + mov r4, r0 + tst #2, r0 + bt 2f +21: + ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -61,16 +80,17 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: - mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: + ! buf is 4 byte aligned (len could be 0) + mov r5, r1 mov #-5, r0 - shld r0, r5 - tst r5, r5 + shld r0, r1 + tst r1, r1 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -92,30 +112,31 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r5 + dt r1 bf/s 3b cmp/eq #1, r0 - ! here, we know r5==0 - addc r5, r6 ! add carry to r6 + ! here, we know r1==0 + addc r1, r6 ! add carry to r6 4: - mov r1, r0 + mov r5, r0 and #0x1c, r0 tst r0, r0 - bt/s 6f - mov r0, r5 - shlr2 r5 + bt 6f + ! 4 bytes or more remaining + mov r0, r1 + shlr2 r1 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r5 + dt r1 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r5, r6 ! r5==0 here, so it means add carry-bit + addc r1, r6 ! r1==0 here, so it means add carry-bit 6: - mov r1, r5 + ! 3 bytes or less remaining mov #3, r0 and r0, r5 tst r5, r5 @@ -139,8 +160,18 @@ ENTRY(csum_partial) 8: addc r0, r6 mov #0, r0 - addc r0, r6 + addc r0, r6 9: + ! Check if the buffer was misaligned, if so realign sum + mov r7, r0 + tst #1, r0 + bt 10f + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 +10: rts mov r6, r0 diff --git a/arch/sh/lib/copy_page.S b/arch/sh/lib/copy_page.S index e002b91c875..9d7b8bc5186 100644 --- a/arch/sh/lib/copy_page.S +++ b/arch/sh/lib/copy_page.S @@ -30,7 +30,9 @@ ENTRY(copy_page) mov r4,r10 mov r5,r11 mov r5,r8 - mov.l .Lpsz,r0 + mov #(PAGE_SIZE >> 10), r0 + shll8 r0 + shll2 r0 add r0,r8 ! 1: mov.l @r11+,r0 @@ -41,11 +43,10 @@ ENTRY(copy_page) mov.l @r11+,r5 mov.l @r11+,r6 mov.l @r11+,r7 -#if defined(CONFIG_CPU_SH3) - mov.l r0,@r10 -#elif defined(CONFIG_CPU_SH4) +#if defined(CONFIG_CPU_SH4) movca.l r0,@r10 - mov r10,r0 +#else + mov.l r0,@r10 #endif add #32,r10 mov.l r7,@-r10 @@ -55,9 +56,6 @@ ENTRY(copy_page) mov.l r3,@-r10 mov.l r2,@-r10 mov.l r1,@-r10 -#if defined(CONFIG_CPU_SH4) - ocbwb @r0 -#endif cmp/eq r11,r8 bf/s 1b add #28,r10 @@ -68,9 +66,6 @@ ENTRY(copy_page) rts nop - .balign 4 -.Lpsz: .long PAGE_SIZE - /* * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); * Return the number of bytes NOT copied @@ -80,6 +75,11 @@ ENTRY(copy_page) .section __ex_table, "a"; \ .long 9999b, 6000f ; \ .previous +#define EX_NO_POP(...) \ + 9999: __VA_ARGS__ ; \ + .section __ex_table, "a"; \ + .long 9999b, 6005f ; \ + .previous ENTRY(__copy_user) ! Check if small number of bytes mov #11,r0 @@ -139,9 +139,9 @@ EX( mov.b r1,@r4 ) bt 1f 2: -EX( mov.b @r5+,r0 ) +EX_NO_POP( mov.b @r5+,r0 ) dt r6 -EX( mov.b r0,@r4 ) +EX_NO_POP( mov.b r0,@r4 ) bf/s 2b add #1,r4 @@ -150,7 +150,7 @@ EX( mov.b r0,@r4 ) # Exception handler: .section .fixup, "ax" -6000: +6005: mov.l 8000f,r1 mov r3,r0 jmp @r1 diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index f3ddd2133e6..0901b2f14e1 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -10,6 +10,16 @@ void __delay(unsigned long loops) { __asm__ __volatile__( + /* + * ST40-300 appears to have an issue with this code, + * normally taking two cycles each loop, as with all + * other SH variants. If however the branch and the + * delay slot straddle an 8 byte boundary, this increases + * to 3 cycles. + * This align directive ensures this doesn't occur. + */ + ".balign 8\n\t" + "tst %0, %0\n\t" "1:\t" "bf/s 1b\n\t" @@ -21,13 +31,14 @@ void __delay(unsigned long loops) inline void __const_udelay(unsigned long xloops) { + xloops *= 4; __asm__("dmulu.l %0, %2\n\t" "sts mach, %0" : "=r" (xloops) : "0" (xloops), - "r" (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) + "r" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)) : "macl", "mach"); - __delay(xloops); + __delay(++xloops); } void __udelay(unsigned long usecs) diff --git a/arch/sh/lib/div64-generic.c b/arch/sh/lib/div64-generic.c index 4bef3b5d964..60e76aa8b53 100644 --- a/arch/sh/lib/div64-generic.c +++ b/arch/sh/lib/div64-generic.c @@ -3,6 +3,7 @@ */ #include <linux/types.h> +#include <asm/div64.h> extern uint64_t __xdiv64_32(u64 n, u32 d); diff --git a/arch/sh/lib/io.c b/arch/sh/lib/io.c index 4f54ec43516..88dfe6e396b 100644 --- a/arch/sh/lib/io.c +++ b/arch/sh/lib/io.c @@ -14,12 +14,12 @@ #include <linux/module.h> #include <linux/io.h> -void __raw_readsl(unsigned long addr, void *datap, int len) +void __raw_readsl(const void __iomem *addr, void *datap, int len) { u32 *data; for (data = datap; (len != 0) && (((u32)data & 0x1f) != 0); len--) - *data++ = ctrl_inl(addr); + *data++ = __raw_readl(addr); if (likely(len >= (0x20 >> 2))) { int tmp2, tmp3, tmp4, tmp5, tmp6; @@ -59,11 +59,11 @@ void __raw_readsl(unsigned long addr, void *datap, int len) } for (; len != 0; len--) - *data++ = ctrl_inl(addr); + *data++ = __raw_readl(addr); } EXPORT_SYMBOL(__raw_readsl); -void __raw_writesl(unsigned long addr, const void *data, int len) +void __raw_writesl(void __iomem *addr, const void *data, int len) { if (likely(len != 0)) { int tmp1; diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h new file mode 100644 index 00000000000..05909d58e2f --- /dev/null +++ b/arch/sh/lib/libgcc.h @@ -0,0 +1,25 @@ +#ifndef __ASM_LIBGCC_H +#define __ASM_LIBGCC_H + +#include <asm/byteorder.h> + +typedef int word_type __attribute__ ((mode (__word__))); + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union { + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ diff --git a/arch/sh/lib/lshrdi3.c b/arch/sh/lib/lshrdi3.c new file mode 100644 index 00000000000..dcf8d6810b7 --- /dev/null +++ b/arch/sh/lib/lshrdi3.c @@ -0,0 +1,29 @@ +#include <linux/module.h> + +#include "libgcc.h" + +long long __lshrdi3(long long u, word_type b) +{ + DWunion uu, w; + word_type bm; + + if (b == 0) + return u; + + uu.ll = u; + bm = 32 - b; + + if (bm <= 0) { + w.s.high = 0; + w.s.low = (unsigned int) uu.s.high >> -bm; + } else { + const unsigned int carries = (unsigned int) uu.s.high << bm; + + w.s.high = (unsigned int) uu.s.high >> b; + w.s.low = ((unsigned int) uu.s.low >> b) | carries; + } + + return w.ll; +} + +EXPORT_SYMBOL(__lshrdi3); diff --git a/arch/sh/lib/lshrsi3.S b/arch/sh/lib/lshrsi3.S new file mode 100644 index 00000000000..1e7aaa55713 --- /dev/null +++ b/arch/sh/lib/lshrsi3.S @@ -0,0 +1,193 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +! +! __lshrsi3 +! +! Entry: +! +! r4: Value to shift +! r5: Shifts +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! (none) +! + .global __lshrsi3 + + .align 2 +__lshrsi3: + mov #31,r0 + and r0,r5 + mova lshrsi3_table,r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +lshrsi3_table: + .byte lshrsi3_0-lshrsi3_table + .byte lshrsi3_1-lshrsi3_table + .byte lshrsi3_2-lshrsi3_table + .byte lshrsi3_3-lshrsi3_table + .byte lshrsi3_4-lshrsi3_table + .byte lshrsi3_5-lshrsi3_table + .byte lshrsi3_6-lshrsi3_table + .byte lshrsi3_7-lshrsi3_table + .byte lshrsi3_8-lshrsi3_table + .byte lshrsi3_9-lshrsi3_table + .byte lshrsi3_10-lshrsi3_table + .byte lshrsi3_11-lshrsi3_table + .byte lshrsi3_12-lshrsi3_table + .byte lshrsi3_13-lshrsi3_table + .byte lshrsi3_14-lshrsi3_table + .byte lshrsi3_15-lshrsi3_table + .byte lshrsi3_16-lshrsi3_table + .byte lshrsi3_17-lshrsi3_table + .byte lshrsi3_18-lshrsi3_table + .byte lshrsi3_19-lshrsi3_table + .byte lshrsi3_20-lshrsi3_table + .byte lshrsi3_21-lshrsi3_table + .byte lshrsi3_22-lshrsi3_table + .byte lshrsi3_23-lshrsi3_table + .byte lshrsi3_24-lshrsi3_table + .byte lshrsi3_25-lshrsi3_table + .byte lshrsi3_26-lshrsi3_table + .byte lshrsi3_27-lshrsi3_table + .byte lshrsi3_28-lshrsi3_table + .byte lshrsi3_29-lshrsi3_table + .byte lshrsi3_30-lshrsi3_table + .byte lshrsi3_31-lshrsi3_table + +lshrsi3_6: + shlr2 r0 +lshrsi3_4: + shlr2 r0 +lshrsi3_2: + rts + shlr2 r0 + +lshrsi3_7: + shlr2 r0 +lshrsi3_5: + shlr2 r0 +lshrsi3_3: + shlr2 r0 +lshrsi3_1: + rts + shlr r0 + +lshrsi3_14: + shlr2 r0 +lshrsi3_12: + shlr2 r0 +lshrsi3_10: + shlr2 r0 +lshrsi3_8: + rts + shlr8 r0 + +lshrsi3_15: + shlr2 r0 +lshrsi3_13: + shlr2 r0 +lshrsi3_11: + shlr2 r0 +lshrsi3_9: + shlr8 r0 + rts + shlr r0 + +lshrsi3_22: + shlr2 r0 +lshrsi3_20: + shlr2 r0 +lshrsi3_18: + shlr2 r0 +lshrsi3_16: + rts + shlr16 r0 + +lshrsi3_23: + shlr2 r0 +lshrsi3_21: + shlr2 r0 +lshrsi3_19: + shlr2 r0 +lshrsi3_17: + shlr16 r0 + rts + shlr r0 + +lshrsi3_30: + shlr2 r0 +lshrsi3_28: + shlr2 r0 +lshrsi3_26: + shlr2 r0 +lshrsi3_24: + shlr16 r0 + rts + shlr8 r0 + +lshrsi3_31: + shlr2 r0 +lshrsi3_29: + shlr2 r0 +lshrsi3_27: + shlr2 r0 +lshrsi3_25: + shlr16 r0 + shlr8 r0 + rts + shlr r0 + +lshrsi3_0: + rts + nop diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S new file mode 100644 index 00000000000..52aa2011d75 --- /dev/null +++ b/arch/sh/lib/mcount.S @@ -0,0 +1,310 @@ +/* + * arch/sh/lib/mcount.S + * + * Copyright (C) 2008, 2009 Paul Mundt + * Copyright (C) 2008, 2009 Matt Fleming + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <asm/ftrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#define MCOUNT_ENTER() \ + mov.l r4, @-r15; \ + mov.l r5, @-r15; \ + mov.l r6, @-r15; \ + mov.l r7, @-r15; \ + sts.l pr, @-r15; \ + \ + mov.l @(20,r15),r4; \ + sts pr, r5 + +#define MCOUNT_LEAVE() \ + lds.l @r15+, pr; \ + mov.l @r15+, r7; \ + mov.l @r15+, r6; \ + mov.l @r15+, r5; \ + rts; \ + mov.l @r15+, r4 + +#ifdef CONFIG_STACK_DEBUG +/* + * Perform diagnostic checks on the state of the kernel stack. + * + * Check for stack overflow. If there is less than 1KB free + * then it has overflowed. + * + * Make sure the stack pointer contains a valid address. Valid + * addresses for kernel stacks are anywhere after the bss + * (after __bss_stop) and anywhere in init_thread_union (init_stack). + */ +#define STACK_CHECK() \ + mov #(THREAD_SIZE >> 10), r0; \ + shll8 r0; \ + shll2 r0; \ + \ + /* r1 = sp & (THREAD_SIZE - 1) */ \ + mov #-1, r1; \ + add r0, r1; \ + and r15, r1; \ + \ + mov #TI_SIZE, r3; \ + mov #(STACK_WARN >> 8), r2; \ + shll8 r2; \ + add r3, r2; \ + \ + /* Is the stack overflowing? */ \ + cmp/hi r2, r1; \ + bf stack_panic; \ + \ + /* If sp > __bss_stop then we're OK. */ \ + mov.l .L_ebss, r1; \ + cmp/hi r1, r15; \ + bt 1f; \ + \ + /* If sp < init_stack, we're not OK. */ \ + mov.l .L_init_thread_union, r1; \ + cmp/hs r1, r15; \ + bf stack_panic; \ + \ + /* If sp > init_stack && sp < __bss_stop, not OK. */ \ + add r0, r1; \ + cmp/hs r1, r15; \ + bt stack_panic; \ +1: +#else +#define STACK_CHECK() +#endif /* CONFIG_STACK_DEBUG */ + + .align 2 + .globl _mcount + .type _mcount,@function + .globl mcount + .type mcount,@function +_mcount: +mcount: + STACK_CHECK() + +#ifndef CONFIG_FUNCTION_TRACER + rts + nop +#else +#ifndef CONFIG_DYNAMIC_FTRACE + mov.l .Lfunction_trace_stop, r0 + mov.l @r0, r0 + tst r0, r0 + bf ftrace_stub +#endif + + MCOUNT_ENTER() + +#ifdef CONFIG_DYNAMIC_FTRACE + .globl mcount_call +mcount_call: + mov.l .Lftrace_stub, r6 +#else + mov.l .Lftrace_trace_function, r6 + mov.l ftrace_stub, r7 + cmp/eq r6, r7 + bt skip_trace + mov.l @r6, r6 +#endif + + jsr @r6 + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + mov.l .Lftrace_graph_return, r6 + mov.l .Lftrace_stub, r7 + cmp/eq r6, r7 + bt 1f + + mov.l .Lftrace_graph_caller, r0 + jmp @r0 + nop + +1: + mov.l .Lftrace_graph_entry, r6 + mov.l .Lftrace_graph_entry_stub, r7 + cmp/eq r6, r7 + bt skip_trace + + mov.l .Lftrace_graph_caller, r0 + jmp @r0 + nop + + .align 2 +.Lftrace_graph_return: + .long ftrace_graph_return +.Lftrace_graph_entry: + .long ftrace_graph_entry +.Lftrace_graph_entry_stub: + .long ftrace_graph_entry_stub +.Lftrace_graph_caller: + .long ftrace_graph_caller +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + .globl skip_trace +skip_trace: + MCOUNT_LEAVE() + + .align 2 +.Lftrace_trace_function: + .long ftrace_trace_function + +#ifdef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * NOTE: Do not move either ftrace_graph_call or ftrace_caller + * as this will affect the calculation of GRAPH_INSN_OFFSET. + */ + .globl ftrace_graph_call +ftrace_graph_call: + mov.l .Lskip_trace, r0 + jmp @r0 + nop + + .align 2 +.Lskip_trace: + .long skip_trace +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + .globl ftrace_caller +ftrace_caller: + mov.l .Lfunction_trace_stop, r0 + mov.l @r0, r0 + tst r0, r0 + bf ftrace_stub + + MCOUNT_ENTER() + + .globl ftrace_call +ftrace_call: + mov.l .Lftrace_stub, r6 + jsr @r6 + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + bra ftrace_graph_call + nop +#else + MCOUNT_LEAVE() +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_DYNAMIC_FTRACE */ + + .align 2 +.Lfunction_trace_stop: + .long function_trace_stop + +/* + * NOTE: From here on the locations of the .Lftrace_stub label and + * ftrace_stub itself are fixed. Adding additional data here will skew + * the displacement for the memory table and break the block replacement. + * Place new labels either after the ftrace_stub body, or before + * ftrace_caller. You have been warned. + */ +.Lftrace_stub: + .long ftrace_stub + + .globl ftrace_stub +ftrace_stub: + rts + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_caller +ftrace_graph_caller: + mov.l 2f, r0 + mov.l @r0, r0 + tst r0, r0 + bt 1f + + mov.l 3f, r1 + jmp @r1 + nop +1: + /* + * MCOUNT_ENTER() pushed 5 registers onto the stack, so + * the stack address containing our return address is + * r15 + 20. + */ + mov #20, r0 + add r15, r0 + mov r0, r4 + + mov.l .Lprepare_ftrace_return, r0 + jsr @r0 + nop + + MCOUNT_LEAVE() + + .align 2 +2: .long function_trace_stop +3: .long skip_trace +.Lprepare_ftrace_return: + .long prepare_ftrace_return + + .globl return_to_handler +return_to_handler: + /* + * Save the return values. + */ + mov.l r0, @-r15 + mov.l r1, @-r15 + + mov #0, r4 + + mov.l .Lftrace_return_to_handler, r0 + jsr @r0 + nop + + /* + * The return value from ftrace_return_handler has the real + * address that we should return to. + */ + lds r0, pr + mov.l @r15+, r1 + rts + mov.l @r15+, r0 + + + .align 2 +.Lftrace_return_to_handler: + .long ftrace_return_to_handler +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_STACK_DEBUG + .globl stack_panic +stack_panic: + mov.l .Ldump_stack, r0 + jsr @r0 + nop + + mov.l .Lpanic, r0 + jsr @r0 + mov.l .Lpanic_s, r4 + + rts + nop + + .align 2 +.L_init_thread_union: + .long init_thread_union +.L_ebss: + .long __bss_stop +.Lpanic: + .long panic +.Lpanic_s: + .long .Lpanic_str +.Ldump_stack: + .long dump_stack + + .section .rodata + .align 2 +.Lpanic_str: + .string "Stack error" +#endif /* CONFIG_STACK_DEBUG */ diff --git a/arch/sh/lib/memcpy-sh4.S b/arch/sh/lib/memcpy-sh4.S index 560bc17eebd..459fa92a7c5 100644 --- a/arch/sh/lib/memcpy-sh4.S +++ b/arch/sh/lib/memcpy-sh4.S @@ -126,10 +126,10 @@ mov.l r3,@-r0 ! 30 LS #else -3: mov r1,r3 ! OPQR +3: mov r7,r3 ! OPQR shlr8 r3 ! xOPQ - mov.l @(r0,r5),r1 ! KLMN - mov r1,r6 + mov.l @(r0,r5),r7 ! KLMN + mov r7,r6 shll16 r6 shll8 r6 ! Nxxx or r6,r3 ! NOPQ @@ -733,24 +733,24 @@ ENTRY(memcpy) movca.l r0,@r1 ! 40 LS (latency=3-7) add #-0x1c, r1 ! 50 EX - mov.l r3, @(0x1c,r1) ! 33 LS + mov.l r3, @(0x18,r1) ! 33 LS xtrct r11, r10 ! 48 EX - mov.l r6, @(0x18,r1) ! 33 LS + mov.l r6, @(0x14,r1) ! 33 LS xtrct r12, r11 ! 48 EX - mov.l r7, @(0x14,r1) ! 33 LS + mov.l r7, @(0x10,r1) ! 33 LS - mov.l r8, @(0x10,r1) ! 33 LS - add #-0x3e, r5 ! 50 EX + mov.l r8, @(0x0c,r1) ! 33 LS + add #-0x1e, r5 ! 50 EX - mov.l r9, @(0x0c,r1) ! 33 LS + mov.l r9, @(0x08,r1) ! 33 LS cmp/eq r2,r1 ! 54 MT - mov.l r10, @(0x08,r1) ! 33 LS + mov.l r10, @(0x04,r1) ! 33 LS bf/s 2b ! 109 BR - mov.l r11, @(0x04,r1) ! 33 LS + mov.l r11, @(0x00,r1) ! 33 LS #endif mov.l @r15+, r12 diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S new file mode 100644 index 00000000000..1a6e32cc4e4 --- /dev/null +++ b/arch/sh/lib/memset-sh4.S @@ -0,0 +1,107 @@ +/* + * "memset" implementation for SH4 + * + * Copyright (C) 1999 Niibe Yutaka + * Copyright (c) 2009 STMicroelectronics Limited + * Author: Stuart Menefy <stuart.menefy:st.com> + */ + +/* + * void *memset(void *s, int c, size_t n); + */ + +#include <linux/linkage.h> + +ENTRY(memset) + mov #12,r0 + add r6,r4 + cmp/gt r6,r0 + bt/s 40f ! if it's too small, set a byte at once + mov r4,r0 + and #3,r0 + cmp/eq #0,r0 + bt/s 2f ! It's aligned + sub r0,r6 +1: + dt r0 + bf/s 1b + mov.b r5,@-r4 +2: ! make VVVV + extu.b r5,r5 + swap.b r5,r0 ! V0 + or r0,r5 ! VV + swap.w r5,r0 ! VV00 + or r0,r5 ! VVVV + + ! Check if enough bytes need to be copied to be worth the big loop + mov #0x40, r0 ! (MT) + cmp/gt r6,r0 ! (MT) 64 > len => slow loop + + bt/s 22f + mov r6,r0 + + ! align the dst to the cache block size if necessary + mov r4, r3 + mov #~(0x1f), r1 + + and r3, r1 + cmp/eq r3, r1 + + bt/s 11f ! dst is already aligned + sub r1, r3 ! r3-r1 -> r3 + shlr2 r3 ! number of loops + +10: mov.l r5,@-r4 + dt r3 + bf/s 10b + add #-4, r6 + +11: ! dst is 32byte aligned + mov r6,r2 + mov #-5,r0 + shld r0,r2 ! number of loops + + add #-32, r4 + mov r5, r0 +12: + movca.l r0,@r4 + mov.l r5,@(4, r4) + mov.l r5,@(8, r4) + mov.l r5,@(12,r4) + mov.l r5,@(16,r4) + mov.l r5,@(20,r4) + add #-0x20, r6 + mov.l r5,@(24,r4) + dt r2 + mov.l r5,@(28,r4) + bf/s 12b + add #-32, r4 + + add #32, r4 + mov #8, r0 + cmp/ge r0, r6 + bf 40f + + mov r6,r0 +22: + shlr2 r0 + shlr r0 ! r0 = r6 >> 3 +3: + dt r0 + mov.l r5,@-r4 ! set 8-byte at once + bf/s 3b + mov.l r5,@-r4 + ! + mov #7,r0 + and r0,r6 + + ! fill bytes (length may be zero) +40: tst r6,r6 + bt 5f +4: + dt r6 + bf/s 4b + mov.b r5,@-r4 +5: + rts + mov r4,r0 diff --git a/arch/sh/lib/movmem.S b/arch/sh/lib/movmem.S new file mode 100644 index 00000000000..62075f6bc67 --- /dev/null +++ b/arch/sh/lib/movmem.S @@ -0,0 +1,238 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + .text + .balign 4 + .global __movmem + .global __movstr + .set __movstr, __movmem + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +__movmem: + sts.l pr,@-r15 + shll2 r6 + bsr __movmemSI52+2 + mov.l @(48,r5),r0 + .balign 4 +movmem_loop: /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt movmem_done + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt __movmemSI52 +! done all the large groups, do the remainder +! jump to movmem+ + mova __movmemSI4+4,r0 + add r6,r0 + jmp @r0 +movmem_done: ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 + + .global __movmemSI64 + .global __movstrSI64 + .set __movstrSI64, __movmemSI64 +__movmemSI64: + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global __movmemSI60 + .global __movstrSI60 + .set __movstrSI60, __movmemSI60 +__movmemSI60: + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global __movmemSI56 + .global __movstrSI56 + .set __movstrSI56, __movmemSI56 +__movmemSI56: + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global __movmemSI52 + .global __movstrSI52 + .set __movstrSI52, __movmemSI52 +__movmemSI52: + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global __movmemSI48 + .global __movstrSI48 + .set __movstrSI48, __movmemSI48 +__movmemSI48: + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global __movmemSI44 + .global __movstrSI44 + .set __movstrSI44, __movmemSI44 +__movmemSI44: + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global __movmemSI40 + .global __movstrSI40 + .set __movstrSI40, __movmemSI40 +__movmemSI40: + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global __movmemSI36 + .global __movstrSI36 + .set __movstrSI36, __movmemSI36 +__movmemSI36: + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global __movmemSI32 + .global __movstrSI32 + .set __movstrSI32, __movmemSI32 +__movmemSI32: + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global __movmemSI28 + .global __movstrSI28 + .set __movstrSI28, __movmemSI28 +__movmemSI28: + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global __movmemSI24 + .global __movstrSI24 + .set __movstrSI24, __movmemSI24 +__movmemSI24: + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global __movmemSI20 + .global __movstrSI20 + .set __movstrSI20, __movmemSI20 +__movmemSI20: + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global __movmemSI16 + .global __movstrSI16 + .set __movstrSI16, __movmemSI16 +__movmemSI16: + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global __movmemSI12 + .global __movstrSI12 + .set __movstrSI12, __movmemSI12 +__movmemSI12: + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global __movmemSI8 + .global __movstrSI8 + .set __movstrSI8, __movmemSI8 +__movmemSI8: + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global __movmemSI4 + .global __movstrSI4 + .set __movstrSI4, __movmemSI4 +__movmemSI4: + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + .global __movmem_i4_even + .global __movstr_i4_even + .set __movstr_i4_even, __movmem_i4_even + + .global __movmem_i4_odd + .global __movstr_i4_odd + .set __movstr_i4_odd, __movmem_i4_odd + + .global __movmemSI12_i4 + .global __movstrSI12_i4 + .set __movstrSI12_i4, __movmemSI12_i4 + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +__movmem_i4_even: + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +__movmem_i4_odd: + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + .p2align 4 +__movmemSI12_i4: + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) diff --git a/arch/sh/lib/strlen.S b/arch/sh/lib/strlen.S index f8ab296047b..1bcc13f0596 100644 --- a/arch/sh/lib/strlen.S +++ b/arch/sh/lib/strlen.S @@ -35,7 +35,7 @@ ENTRY(strlen) mov.b @r4+,r1 tst r1,r1 bt 8f - add #1,r2 + add #1,r2 1: mov #0,r3 diff --git a/arch/sh/lib/udiv_qrnnd.S b/arch/sh/lib/udiv_qrnnd.S new file mode 100644 index 00000000000..32b9a36de94 --- /dev/null +++ b/arch/sh/lib/udiv_qrnnd.S @@ -0,0 +1,81 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global __udiv_qrnnd_16 + .balign 8 +__udiv_qrnnd_16: + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + bf/s 1b + shlr16 r1 +0: rts + nop diff --git a/arch/sh/lib/udivsi3.S b/arch/sh/lib/udivsi3.S new file mode 100644 index 00000000000..72157ab5c31 --- /dev/null +++ b/arch/sh/lib/udivsi3.S @@ -0,0 +1,87 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + + .balign 4 + .global __udivsi3 + .type __udivsi3, @function +div8: + div1 r5,r4 +div7: + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +divx4: + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + rts; div1 r5,r4 + +__udivsi3: + sts.l pr,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + bf/s large_divisor + div0u + swap.w r4,r0 + shlr16 r4 + bsr div8 + shll16 r5 + bsr div7 + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr div8 + swap.w r4,r4 + bsr div7 + div1 r5,r4 + lds.l @r15+,pr + xtrct r4,r0 + swap.w r0,r0 + rotcl r0 + rts + shlr16 r5 + +large_divisor: + mov #0,r0 + xtrct r4,r0 + xtrct r0,r4 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + bsr divx4 + rotcl r0 + lds.l @r15+,pr + rts + rotcl r0 diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S new file mode 100644 index 00000000000..4835553e1ea --- /dev/null +++ b/arch/sh/lib/udivsi3_i4i-Os.S @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Moderately Space-optimized libgcc routines for the Renesas SH / + STMicroelectronics ST40 CPUs. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i + sh4-200 run times: + udiv small divisor: 55 cycles + udiv large divisor: 52 cycles + sdiv small divisor, positive result: 59 cycles + sdiv large divisor, positive result: 56 cycles + sdiv small divisor, negative result: 65 cycles (*) + sdiv large divisor, negative result: 62 cycles (*) + (*): r2 is restored in the rts delay slot and has a lingering latency + of two more cycles. */ + .balign 4 + .global __udivsi3_i4i + .global __udivsi3_i4 + .set __udivsi3_i4, __udivsi3_i4i + .type __udivsi3_i4i, @function + .type __sdivsi3_i4i, @function +__udivsi3_i4i: + sts pr,r1 + mov.l r4,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + swap.w r4,r0 + shlr16 r4 + bf/s large_divisor + div0u + mov.l r5,@-r15 + shll16 r5 +sdiv_small_divisor: + div1 r5,r4 + bsr div6 + div1 r5,r4 + div1 r5,r4 + bsr div6 + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr div7 + swap.w r4,r4 + div1 r5,r4 + bsr div7 + div1 r5,r4 + xtrct r4,r0 + mov.l @r15+,r5 + swap.w r0,r0 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 +div7: + div1 r5,r4 +div6: + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +divx3: + rotcl r0 + div1 r5,r4 + rotcl r0 + div1 r5,r4 + rotcl r0 + rts + div1 r5,r4 + +large_divisor: + mov.l r5,@-r15 +sdiv_large_divisor: + xor r4,r0 + .rept 4 + rotcl r0 + bsr divx3 + div1 r5,r4 + .endr + mov.l @r15+,r5 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3_i4 + .global __sdivsi3 + .set __sdivsi3_i4, __sdivsi3_i4i + .set __sdivsi3, __sdivsi3_i4i +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.l r5,@-r15 + bt/s pos_divisor + cmp/pz r4 + neg r5,r5 + extu.w r5,r0 + bt/s neg_result + cmp/eq r5,r0 + neg r4,r4 +pos_result: + swap.w r4,r0 + bra sdiv_check_divisor + sts pr,r1 +pos_divisor: + extu.w r5,r0 + bt/s pos_result + cmp/eq r5,r0 + neg r4,r4 +neg_result: + mova negate_result,r0 + ; + mov r0,r1 + swap.w r4,r0 + lds r2,macl + sts pr,r2 +sdiv_check_divisor: + shlr16 r4 + bf/s sdiv_large_divisor + div0u + bra sdiv_small_divisor + shll16 r5 + .balign 4 +negate_result: + neg r0,r0 + jmp @r2 + sts macl,r2 diff --git a/arch/sh/lib/udivsi3_i4i.S b/arch/sh/lib/udivsi3_i4i.S new file mode 100644 index 00000000000..f1a79d9c501 --- /dev/null +++ b/arch/sh/lib/udivsi3_i4i.S @@ -0,0 +1,666 @@ +/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global __udivsi3_i4i + .global __udivsi3_i4 + .set __udivsi3_i4, __udivsi3_i4i + .type __udivsi3_i4i, @function +__udivsi3_i4i: + mov.w c128_w, r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf udiv_le128 + cmp/eq r5,r1 + bf udiv_ge64k + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra udiv_25 + div1 r5,r0 + +div_le128: + mova div_table_ix,r0 + bra div_le128_2 + mov.b @(r0,r5),r1 +udiv_le128: + mov.l r4,@-r15 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +div_le128_2: + mova div_table_inv,r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s div_by_1 + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +div_by_1_neg: + neg r4,r0 +div_by_1: + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +div_ge64k: + bt/s div_r8 + div0u + shll8 r5 + bra div_ge64k_2 + div1 r5,r0 +udiv_ge64k: + cmp/hi r0,r5 + mov r5,r1 + bt udiv_r8 + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +div_ge64k_2: + div1 r5,r0 + mov.l zero_l,r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_end + xor r4,r0 + +div_r8: + shll16 r4 + bra div_r8_2 + shll8 r4 +udiv_r8: + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +div_r8_2: + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + .global __sdivsi3_i4i + .global __sdivsi3_i4 + .global __sdivsi3 + .set __sdivsi3_i4, __sdivsi3_i4i + .set __sdivsi3, __sdivsi3_i4i + .type __sdivsi3_i4i, @function + /* This is link-compatible with a __sdivsi3 call, + but we effectively clobber only r1. */ +__sdivsi3_i4i: + mov.l r4,@-r15 + cmp/pz r5 + mov.w c128_w, r1 + bt/s pos_divisor + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s neg_result + cmp/hi r1,r5 + neg r4,r4 +pos_result: + extu.w r5,r0 + bf div_le128 + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +udiv_25: + mov.l zero_l,r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +div_le128_neg: + tst #0xfe,r0 + mova div_table_ix,r0 + mov.b @(r0,r5),r1 + mova div_table_inv,r0 + bt/s div_by_1_neg + mov.l @(r0,r1),r1 + mova div_table_clz,r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +pos_divisor: + mov.l r5,@-r15 + bt/s pos_result + cmp/hi r1,r5 + neg r4,r4 +neg_result: + extu.w r5,r0 + bf div_le128_neg + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s div_ge64k_neg + cmp/hi r0,r5 + div0u + mov.l zero_l,r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +div_ge64k_neg_end: + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +div_r8_neg_end: + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +div_ge64k_neg: + bt/s div_r8_neg + div0u + shll8 r5 + mov.l zero_l,r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w m256_w,r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra div_ge64k_neg_end + xor r4,r0 + +c128_w: + .word 128 + +div_r8_neg: + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra div_r8_neg_end + div1 r4,r0 + +m256_w: + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +div_table_clz: + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +div_table_ix: + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +zero_l: + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +div_table_inv: + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ |
