aboutsummaryrefslogtreecommitdiff
path: root/arch/sh/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sh/lib')
-rw-r--r--arch/sh/lib/Makefile11
-rw-r--r--arch/sh/lib/delay.c10
-rw-r--r--arch/sh/lib/libgcc.h3
-rw-r--r--arch/sh/lib/mcount.S10
-rw-r--r--arch/sh/lib/memset-sh4.S107
-rw-r--r--arch/sh/lib/strlen.S2
6 files changed, 131 insertions, 12 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index a969b47c546..3baff31e58c 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -2,11 +2,11 @@
# Makefile for SuperH-specific library files..
#
-lib-y = delay.o memset.o memmove.o memchr.o \
+lib-y = delay.o memmove.o memchr.o \
checksum.o strlen.o div64.o div64-generic.o
# Extracted from libgcc
-lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
+obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \
ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \
udiv_qrnnd.o
@@ -23,8 +23,11 @@ obj-y += io.o
memcpy-y := memcpy.o
memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o
+memset-y := memset.o
+memset-$(CONFIG_CPU_SH4) := memset-sh4.o
+
lib-$(CONFIG_MMU) += copy_page.o __clear_user.o
lib-$(CONFIG_MCOUNT) += mcount.o
-lib-y += $(memcpy-y) $(udivsi3-y)
+lib-y += $(memcpy-y) $(memset-y) $(udivsi3-y)
-EXTRA_CFLAGS += -Werror
+ccflags-y := -Werror
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index faa8f86c0db..0901b2f14e1 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -10,6 +10,16 @@
void __delay(unsigned long loops)
{
__asm__ __volatile__(
+ /*
+ * ST40-300 appears to have an issue with this code,
+ * normally taking two cycles each loop, as with all
+ * other SH variants. If however the branch and the
+ * delay slot straddle an 8 byte boundary, this increases
+ * to 3 cycles.
+ * This align directive ensures this doesn't occur.
+ */
+ ".balign 8\n\t"
+
"tst %0, %0\n\t"
"1:\t"
"bf/s 1b\n\t"
diff --git a/arch/sh/lib/libgcc.h b/arch/sh/lib/libgcc.h
index 3f19d1c5d94..05909d58e2f 100644
--- a/arch/sh/lib/libgcc.h
+++ b/arch/sh/lib/libgcc.h
@@ -17,8 +17,7 @@ struct DWstruct {
#error I feel sick.
#endif
-typedef union
-{
+typedef union {
struct DWstruct s;
long long ll;
} DWunion;
diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S
index 84a57761f17..52aa2011d75 100644
--- a/arch/sh/lib/mcount.S
+++ b/arch/sh/lib/mcount.S
@@ -39,7 +39,7 @@
*
* Make sure the stack pointer contains a valid address. Valid
* addresses for kernel stacks are anywhere after the bss
- * (after _ebss) and anywhere in init_thread_union (init_stack).
+ * (after __bss_stop) and anywhere in init_thread_union (init_stack).
*/
#define STACK_CHECK() \
mov #(THREAD_SIZE >> 10), r0; \
@@ -60,7 +60,7 @@
cmp/hi r2, r1; \
bf stack_panic; \
\
- /* If sp > _ebss then we're OK. */ \
+ /* If sp > __bss_stop then we're OK. */ \
mov.l .L_ebss, r1; \
cmp/hi r1, r15; \
bt 1f; \
@@ -70,7 +70,7 @@
cmp/hs r1, r15; \
bf stack_panic; \
\
- /* If sp > init_stack && sp < _ebss, not OK. */ \
+ /* If sp > init_stack && sp < __bss_stop, not OK. */ \
add r0, r1; \
cmp/hs r1, r15; \
bt stack_panic; \
@@ -292,10 +292,10 @@ stack_panic:
nop
.align 2
-.L_ebss:
- .long _ebss
.L_init_thread_union:
.long init_thread_union
+.L_ebss:
+ .long __bss_stop
.Lpanic:
.long panic
.Lpanic_s:
diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S
new file mode 100644
index 00000000000..1a6e32cc4e4
--- /dev/null
+++ b/arch/sh/lib/memset-sh4.S
@@ -0,0 +1,107 @@
+/*
+ * "memset" implementation for SH4
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ * Copyright (c) 2009 STMicroelectronics Limited
+ * Author: Stuart Menefy <stuart.menefy:st.com>
+ */
+
+/*
+ * void *memset(void *s, int c, size_t n);
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(memset)
+ mov #12,r0
+ add r6,r4
+ cmp/gt r6,r0
+ bt/s 40f ! if it's too small, set a byte at once
+ mov r4,r0
+ and #3,r0
+ cmp/eq #0,r0
+ bt/s 2f ! It's aligned
+ sub r0,r6
+1:
+ dt r0
+ bf/s 1b
+ mov.b r5,@-r4
+2: ! make VVVV
+ extu.b r5,r5
+ swap.b r5,r0 ! V0
+ or r0,r5 ! VV
+ swap.w r5,r0 ! VV00
+ or r0,r5 ! VVVV
+
+ ! Check if enough bytes need to be copied to be worth the big loop
+ mov #0x40, r0 ! (MT)
+ cmp/gt r6,r0 ! (MT) 64 > len => slow loop
+
+ bt/s 22f
+ mov r6,r0
+
+ ! align the dst to the cache block size if necessary
+ mov r4, r3
+ mov #~(0x1f), r1
+
+ and r3, r1
+ cmp/eq r3, r1
+
+ bt/s 11f ! dst is already aligned
+ sub r1, r3 ! r3-r1 -> r3
+ shlr2 r3 ! number of loops
+
+10: mov.l r5,@-r4
+ dt r3
+ bf/s 10b
+ add #-4, r6
+
+11: ! dst is 32byte aligned
+ mov r6,r2
+ mov #-5,r0
+ shld r0,r2 ! number of loops
+
+ add #-32, r4
+ mov r5, r0
+12:
+ movca.l r0,@r4
+ mov.l r5,@(4, r4)
+ mov.l r5,@(8, r4)
+ mov.l r5,@(12,r4)
+ mov.l r5,@(16,r4)
+ mov.l r5,@(20,r4)
+ add #-0x20, r6
+ mov.l r5,@(24,r4)
+ dt r2
+ mov.l r5,@(28,r4)
+ bf/s 12b
+ add #-32, r4
+
+ add #32, r4
+ mov #8, r0
+ cmp/ge r0, r6
+ bf 40f
+
+ mov r6,r0
+22:
+ shlr2 r0
+ shlr r0 ! r0 = r6 >> 3
+3:
+ dt r0
+ mov.l r5,@-r4 ! set 8-byte at once
+ bf/s 3b
+ mov.l r5,@-r4
+ !
+ mov #7,r0
+ and r0,r6
+
+ ! fill bytes (length may be zero)
+40: tst r6,r6
+ bt 5f
+4:
+ dt r6
+ bf/s 4b
+ mov.b r5,@-r4
+5:
+ rts
+ mov r4,r0
diff --git a/arch/sh/lib/strlen.S b/arch/sh/lib/strlen.S
index f8ab296047b..1bcc13f0596 100644
--- a/arch/sh/lib/strlen.S
+++ b/arch/sh/lib/strlen.S
@@ -35,7 +35,7 @@ ENTRY(strlen)
mov.b @r4+,r1
tst r1,r1
bt 8f
- add #1,r2
+ add #1,r2
1:
mov #0,r3