diff options
Diffstat (limited to 'arch/sh/lib')
| -rw-r--r-- | arch/sh/lib/Makefile | 11 | ||||
| -rw-r--r-- | arch/sh/lib/delay.c | 10 | ||||
| -rw-r--r-- | arch/sh/lib/libgcc.h | 3 | ||||
| -rw-r--r-- | arch/sh/lib/mcount.S | 10 | ||||
| -rw-r--r-- | arch/sh/lib/memset-sh4.S | 107 | ||||
| -rw-r--r-- | arch/sh/lib/strlen.S | 2 |
6 files changed, 131 insertions, 12 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index a969b47c546..3baff31e58c 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -2,11 +2,11 @@ # Makefile for SuperH-specific library files.. # -lib-y = delay.o memset.o memmove.o memchr.o \ +lib-y = delay.o memmove.o memchr.o \ checksum.o strlen.o div64.o div64-generic.o # Extracted from libgcc -lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ +obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ udiv_qrnnd.o @@ -23,8 +23,11 @@ obj-y += io.o memcpy-y := memcpy.o memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o +memset-y := memset.o +memset-$(CONFIG_CPU_SH4) := memset-sh4.o + lib-$(CONFIG_MMU) += copy_page.o __clear_user.o lib-$(CONFIG_MCOUNT) += mcount.o -lib-y += $(memcpy-y) $(udivsi3-y) +lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y) -EXTRA_CFLAGS += -Werror +ccflags-y := -Werror diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index faa8f86c0db..0901b2f14e1 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -10,6 +10,16 @@ void __delay(unsigned long loops) { __asm__ __volatile__( + /* + * ST40-300 appears to have an issue with this code, + * normally taking two cycles each loop, as with all + * other SH variants. If however the branch and the + * delay slot straddle an 8 byte boundary, this increases + * to 3 cycles. + * This align directive ensures this doesn't occur. + */ + ".balign 8\n\t" + "tst %0, %0\n\t" "1:\t" "bf/s 1b\n\t" diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h index 3f19d1c5d94..05909d58e2f 100644 --- a/arch/sh/lib/libgcc.h +++ b/arch/sh/lib/libgcc.h @@ -17,8 +17,7 @@ struct DWstruct { #error I feel sick. #endif -typedef union -{ +typedef union { struct DWstruct s; long long ll; } DWunion; diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S index 84a57761f17..52aa2011d75 100644 --- a/arch/sh/lib/mcount.S +++ b/arch/sh/lib/mcount.S @@ -39,7 +39,7 @@ * * Make sure the stack pointer contains a valid address. Valid * addresses for kernel stacks are anywhere after the bss - * (after _ebss) and anywhere in init_thread_union (init_stack). + * (after __bss_stop) and anywhere in init_thread_union (init_stack). */ #define STACK_CHECK() \ mov #(THREAD_SIZE >> 10), r0; \ @@ -60,7 +60,7 @@ cmp/hi r2, r1; \ bf stack_panic; \ \ - /* If sp > _ebss then we're OK. */ \ + /* If sp > __bss_stop then we're OK. */ \ mov.l .L_ebss, r1; \ cmp/hi r1, r15; \ bt 1f; \ @@ -70,7 +70,7 @@ cmp/hs r1, r15; \ bf stack_panic; \ \ - /* If sp > init_stack && sp < _ebss, not OK. */ \ + /* If sp > init_stack && sp < __bss_stop, not OK. */ \ add r0, r1; \ cmp/hs r1, r15; \ bt stack_panic; \ @@ -292,10 +292,10 @@ stack_panic: nop .align 2 -.L_ebss: - .long _ebss .L_init_thread_union: .long init_thread_union +.L_ebss: + .long __bss_stop .Lpanic: .long panic .Lpanic_s: diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S new file mode 100644 index 00000000000..1a6e32cc4e4 --- /dev/null +++ b/arch/sh/lib/memset-sh4.S @@ -0,0 +1,107 @@ +/* + * "memset" implementation for SH4 + * + * Copyright (C) 1999 Niibe Yutaka + * Copyright (c) 2009 STMicroelectronics Limited + * Author: Stuart Menefy <stuart.menefy:st.com> + */ + +/* + * void *memset(void *s, int c, size_t n); + */ + +#include <linux/linkage.h> + +ENTRY(memset) + mov #12,r0 + add r6,r4 + cmp/gt r6,r0 + bt/s 40f ! if it's too small, set a byte at once + mov r4,r0 + and #3,r0 + cmp/eq #0,r0 + bt/s 2f ! It's aligned + sub r0,r6 +1: + dt r0 + bf/s 1b + mov.b r5,@-r4 +2: ! make VVVV + extu.b r5,r5 + swap.b r5,r0 ! V0 + or r0,r5 ! VV + swap.w r5,r0 ! VV00 + or r0,r5 ! VVVV + + ! Check if enough bytes need to be copied to be worth the big loop + mov #0x40, r0 ! (MT) + cmp/gt r6,r0 ! (MT) 64 > len => slow loop + + bt/s 22f + mov r6,r0 + + ! align the dst to the cache block size if necessary + mov r4, r3 + mov #~(0x1f), r1 + + and r3, r1 + cmp/eq r3, r1 + + bt/s 11f ! dst is already aligned + sub r1, r3 ! r3-r1 -> r3 + shlr2 r3 ! number of loops + +10: mov.l r5,@-r4 + dt r3 + bf/s 10b + add #-4, r6 + +11: ! dst is 32byte aligned + mov r6,r2 + mov #-5,r0 + shld r0,r2 ! number of loops + + add #-32, r4 + mov r5, r0 +12: + movca.l r0,@r4 + mov.l r5,@(4, r4) + mov.l r5,@(8, r4) + mov.l r5,@(12,r4) + mov.l r5,@(16,r4) + mov.l r5,@(20,r4) + add #-0x20, r6 + mov.l r5,@(24,r4) + dt r2 + mov.l r5,@(28,r4) + bf/s 12b + add #-32, r4 + + add #32, r4 + mov #8, r0 + cmp/ge r0, r6 + bf 40f + + mov r6,r0 +22: + shlr2 r0 + shlr r0 ! r0 = r6 >> 3 +3: + dt r0 + mov.l r5,@-r4 ! set 8-byte at once + bf/s 3b + mov.l r5,@-r4 + ! + mov #7,r0 + and r0,r6 + + ! fill bytes (length may be zero) +40: tst r6,r6 + bt 5f +4: + dt r6 + bf/s 4b + mov.b r5,@-r4 +5: + rts + mov r4,r0 diff --git a/arch/sh/lib/strlen.S b/arch/sh/lib/strlen.S index f8ab296047b..1bcc13f0596 100644 --- a/arch/sh/lib/strlen.S +++ b/arch/sh/lib/strlen.S @@ -35,7 +35,7 @@ ENTRY(strlen) mov.b @r4+,r1 tst r1,r1 bt 8f - add #1,r2 + add #1,r2 1: mov #0,r3 |
