aboutsummaryrefslogtreecommitdiff
path: root/arch/microblaze/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r--arch/microblaze/lib/Makefile8
-rw-r--r--arch/microblaze/lib/ashldi3.c3
-rw-r--r--arch/microblaze/lib/ashrdi3.c3
-rw-r--r--arch/microblaze/lib/cmpdi2.c26
-rw-r--r--arch/microblaze/lib/fastcopy.S4
-rw-r--r--arch/microblaze/lib/libgcc.h7
-rw-r--r--arch/microblaze/lib/lshrdi3.c3
-rw-r--r--arch/microblaze/lib/memcpy.c21
-rw-r--r--arch/microblaze/lib/memmove.c19
-rw-r--r--arch/microblaze/lib/memset.c4
-rw-r--r--arch/microblaze/lib/muldi3.S121
-rw-r--r--arch/microblaze/lib/muldi3.c57
-rw-r--r--arch/microblaze/lib/uaccess_old.S158
-rw-r--r--arch/microblaze/lib/ucmpdi2.c20
14 files changed, 284 insertions, 170 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index f1fcbff3da2..844960e8ae1 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -2,6 +2,12 @@
# Makefile
#
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ashldi3.o = -pg
+CFLAGS_REMOVE_ashrdi3.o = -pg
+CFLAGS_REMOVE_lshrdi3.o = -pg
+endif
+
lib-y := memset.o
ifeq ($(CONFIG_OPT_LIB_ASM),y)
@@ -14,10 +20,12 @@ lib-y += uaccess_old.o
lib-y += ashldi3.o
lib-y += ashrdi3.o
+lib-y += cmpdi2.o
lib-y += divsi3.o
lib-y += lshrdi3.o
lib-y += modsi3.o
lib-y += muldi3.o
lib-y += mulsi3.o
+lib-y += ucmpdi2.o
lib-y += udivsi3.o
lib-y += umodsi3.o
diff --git a/arch/microblaze/lib/ashldi3.c b/arch/microblaze/lib/ashldi3.c
index beb80f31609..1af904cd972 100644
--- a/arch/microblaze/lib/ashldi3.c
+++ b/arch/microblaze/lib/ashldi3.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include "libgcc.h"
@@ -25,5 +25,4 @@ long long __ashldi3(long long u, word_type b)
return w.ll;
}
-
EXPORT_SYMBOL(__ashldi3);
diff --git a/arch/microblaze/lib/ashrdi3.c b/arch/microblaze/lib/ashrdi3.c
index c884a912b66..32c334c05d0 100644
--- a/arch/microblaze/lib/ashrdi3.c
+++ b/arch/microblaze/lib/ashrdi3.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include "libgcc.h"
@@ -27,5 +27,4 @@ long long __ashrdi3(long long u, word_type b)
return w.ll;
}
-
EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/microblaze/lib/cmpdi2.c b/arch/microblaze/lib/cmpdi2.c
new file mode 100644
index 00000000000..67abc9ac1bd
--- /dev/null
+++ b/arch/microblaze/lib/cmpdi2.c
@@ -0,0 +1,26 @@
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+word_type __cmpdi2(long long a, long long b)
+{
+ const DWunion au = {
+ .ll = a
+ };
+ const DWunion bu = {
+ .ll = b
+ };
+
+ if (au.s.high < bu.s.high)
+ return 0;
+ else if (au.s.high > bu.s.high)
+ return 2;
+
+ if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+ return 0;
+ else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+ return 2;
+
+ return 1;
+}
+EXPORT_SYMBOL(__cmpdi2);
diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S
index fdc48bb065d..62021d7e249 100644
--- a/arch/microblaze/lib/fastcopy.S
+++ b/arch/microblaze/lib/fastcopy.S
@@ -29,6 +29,10 @@
* between mem locations with size of xfer spec'd in bytes
*/
+#ifdef __MICROBLAZEEL__
+#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
+#endif
+
#include <linux/linkage.h>
.text
.globl memcpy
diff --git a/arch/microblaze/lib/libgcc.h b/arch/microblaze/lib/libgcc.h
index 05909d58e2f..ab077ef7e14 100644
--- a/arch/microblaze/lib/libgcc.h
+++ b/arch/microblaze/lib/libgcc.h
@@ -22,4 +22,11 @@ typedef union {
long long ll;
} DWunion;
+extern long long __ashldi3(long long u, word_type b);
+extern long long __ashrdi3(long long u, word_type b);
+extern word_type __cmpdi2(long long a, long long b);
+extern long long __lshrdi3(long long u, word_type b);
+extern long long __muldi3(long long u, long long v);
+extern word_type __ucmpdi2(unsigned long long a, unsigned long long b);
+
#endif /* __ASM_LIBGCC_H */
diff --git a/arch/microblaze/lib/lshrdi3.c b/arch/microblaze/lib/lshrdi3.c
index dcf8d6810b7..adcb253f11c 100644
--- a/arch/microblaze/lib/lshrdi3.c
+++ b/arch/microblaze/lib/lshrdi3.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include "libgcc.h"
@@ -25,5 +25,4 @@ long long __lshrdi3(long long u, word_type b)
return w.ll;
}
-
EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index cc495d7d99c..f536e81b816 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -24,13 +24,12 @@
* not any responsibility to update it.
*/
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
-#include <linux/module.h>
#include <linux/string.h>
-#include <asm/system.h>
#ifdef __HAVE_ARCH_MEMCPY
#ifndef CONFIG_OPT_LIB_FUNCTION
@@ -63,8 +62,8 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
if (likely(c >= 4)) {
unsigned value, buf_hold;
- /* Align the dstination to a word boundry. */
- /* This is done in an endian independant manner. */
+ /* Align the destination to a word boundary. */
+ /* This is done in an endian independent manner. */
switch ((unsigned long)dst & 3) {
case 1:
*dst++ = *src++;
@@ -80,7 +79,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
i_dst = (void *)dst;
/* Choose a copy scheme based on the source */
- /* alignment relative to dstination. */
+ /* alignment relative to destination. */
switch ((unsigned long)src & 3) {
case 0x0: /* Both byte offsets are aligned */
i_src = (const void *)src;
@@ -104,12 +103,12 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
}
#else
/* Load the holding buffer */
- buf_hold = (*i_src++ & 0xFFFFFF00) >>8;
+ buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
for (; c >= 4; c -= 4) {
value = *i_src++;
*i_dst++ = buf_hold | ((value & 0xFF) << 24);
- buf_hold = (value & 0xFFFFFF00) >>8;
+ buf_hold = (value & 0xFFFFFF00) >> 8;
}
#endif
/* Realign the source */
@@ -130,12 +129,12 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
}
#else
/* Load the holding buffer */
- buf_hold = (*i_src++ & 0xFFFF0000 )>>16;
+ buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
for (; c >= 4; c -= 4) {
value = *i_src++;
- *i_dst++ = buf_hold | ((value & 0xFFFF)<<16);
- buf_hold = (value & 0xFFFF0000) >>16;
+ *i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
+ buf_hold = (value & 0xFFFF0000) >> 16;
}
#endif
/* Realign the source */
@@ -173,7 +172,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
}
/* Finish off any remaining bytes */
- /* simple fast copy, ... unless a cache boundry is crossed */
+ /* simple fast copy, ... unless a cache boundary is crossed */
switch (c) {
case 3:
*dst++ = *src++;
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index 123e3616f2d..3611ce70415 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -24,10 +24,10 @@
* not any responsibility to update it.
*/
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
-#include <linux/module.h>
#include <linux/string.h>
#ifdef __HAVE_ARCH_MEMMOVE
@@ -83,8 +83,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
if (c >= 4) {
unsigned value, buf_hold;
- /* Align the destination to a word boundry. */
- /* This is done in an endian independant manner. */
+ /* Align the destination to a word boundary. */
+ /* This is done in an endian independent manner. */
switch ((unsigned long)dst & 3) {
case 3:
@@ -129,7 +129,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
for (; c >= 4; c -= 4) {
value = *--i_src;
- *--i_dst = buf_hold | ((value & 0xFFFFFF00)>>8);
+ *--i_dst = buf_hold |
+ ((value & 0xFFFFFF00) >> 8);
buf_hold = (value & 0xFF) << 24;
}
#endif
@@ -155,7 +156,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
for (; c >= 4; c -= 4) {
value = *--i_src;
- *--i_dst = buf_hold | ((value & 0xFFFF0000)>>16);
+ *--i_dst = buf_hold |
+ ((value & 0xFFFF0000) >> 16);
buf_hold = (value & 0xFFFF) << 16;
}
#endif
@@ -181,8 +183,9 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
for (; c >= 4; c -= 4) {
value = *--i_src;
- *--i_dst = buf_hold | ((value & 0xFF000000)>> 24);
- buf_hold = (value & 0xFFFFFF) << 8;;
+ *--i_dst = buf_hold |
+ ((value & 0xFF000000) >> 24);
+ buf_hold = (value & 0xFFFFFF) << 8;
}
#endif
/* Realign the source */
@@ -193,7 +196,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
dst = (void *)i_dst;
}
- /* simple fast copy, ... unless a cache boundry is crossed */
+ /* simple fast copy, ... unless a cache boundary is crossed */
/* Finish off any remaining bytes */
switch (c) {
case 4:
diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c
index 834565d1607..04ea72c8a81 100644
--- a/arch/microblaze/lib/memset.c
+++ b/arch/microblaze/lib/memset.c
@@ -24,10 +24,10 @@
* not any responsibility to update it.
*/
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/compiler.h>
-#include <linux/module.h>
#include <linux/string.h>
#ifdef __HAVE_ARCH_MEMSET
@@ -64,7 +64,7 @@ void *memset(void *v_src, int c, __kernel_size_t n)
if (likely(n >= 4)) {
/* Align the destination to a word boundary */
- /* This is done in an endian independant manner */
+ /* This is done in an endian independent manner */
switch ((unsigned) src & 3) {
case 1:
*src++ = c;
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
deleted file mode 100644
index ceeaa8c407f..00000000000
--- a/arch/microblaze/lib/muldi3.S
+++ /dev/null
@@ -1,121 +0,0 @@
-#include <linux/linkage.h>
-
-/*
- * Multiply operation for 64 bit integers, for devices with hard multiply
- * Input : Operand1[H] in Reg r5
- * Operand1[L] in Reg r6
- * Operand2[H] in Reg r7
- * Operand2[L] in Reg r8
- * Output: Result[H] in Reg r3
- * Result[L] in Reg r4
- *
- * Explaination:
- *
- * Both the input numbers are divided into 16 bit number as follows
- * op1 = A B C D
- * op2 = E F G H
- * result = D * H
- * + (C * H + D * G) << 16
- * + (B * H + C * G + D * F) << 32
- * + (A * H + B * G + C * F + D * E) << 48
- *
- * Only 64 bits of the output are considered
- */
-
- .text
- .globl __muldi3
- .type __muldi3, @function
- .ent __muldi3
-
-__muldi3:
- addi r1, r1, -40
-
-/* Save the input operands on the caller's stack */
- swi r5, r1, 44
- swi r6, r1, 48
- swi r7, r1, 52
- swi r8, r1, 56
-
-/* Store all the callee saved registers */
- sw r20, r1, r0
- swi r21, r1, 4
- swi r22, r1, 8
- swi r23, r1, 12
- swi r24, r1, 16
- swi r25, r1, 20
- swi r26, r1, 24
- swi r27, r1, 28
-
-/* Load all the 16 bit values for A thru H */
- lhui r20, r1, 44 /* A */
- lhui r21, r1, 46 /* B */
- lhui r22, r1, 48 /* C */
- lhui r23, r1, 50 /* D */
- lhui r24, r1, 52 /* E */
- lhui r25, r1, 54 /* F */
- lhui r26, r1, 56 /* G */
- lhui r27, r1, 58 /* H */
-
-/* D * H ==> LSB of the result on stack ==> Store1 */
- mul r9, r23, r27
- swi r9, r1, 36 /* Pos2 and Pos3 */
-
-/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
-/* Store the carry generated in position 2 for Pos 3 */
- lhui r11, r1, 36 /* Pos2 */
- mul r9, r22, r27 /* C * H */
- mul r10, r23, r26 /* D * G */
- add r9, r9, r10
- addc r12, r0, r0
- add r9, r9, r11
- addc r12, r12, r0 /* Store the Carry */
- shi r9, r1, 36 /* Store Pos2 */
- swi r9, r1, 32
- lhui r11, r1, 32
- shi r11, r1, 34 /* Store Pos1 */
-
-/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
- mul r9, r21, r27 /* B * H */
- mul r10, r22, r26 /* C * G */
- mul r7, r23, r25 /* D * F */
- add r9, r9, r11
- add r9, r9, r10
- add r9, r9, r7
- swi r9, r1, 32 /* Pos0 and Pos1 */
-
-/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
- lhui r11, r1, 32 /* Pos0 */
- mul r9, r20, r27 /* A * H */
- mul r10, r21, r26 /* B * G */
- mul r7, r22, r25 /* C * F */
- mul r8, r23, r24 /* D * E */
- add r9, r9, r11
- add r9, r9, r10
- add r9, r9, r7
- add r9, r9, r8
- sext16 r9, r9 /* Sign extend the MSB */
- shi r9, r1, 32
-
-/* Move results to r3 and r4 */
- lhui r3, r1, 32
- add r3, r3, r12
- shi r3, r1, 32
- lwi r3, r1, 32 /* Hi Part */
- lwi r4, r1, 36 /* Lo Part */
-
-/* Restore Callee saved registers */
- lw r20, r1, r0
- lwi r21, r1, 4
- lwi r22, r1, 8
- lwi r23, r1, 12
- lwi r24, r1, 16
- lwi r25, r1, 20
- lwi r26, r1, 24
- lwi r27, r1, 28
-
-/* Restore Frame and return */
- rtsd r15, 8
- addi r1, r1, 40
-
-.size __muldi3, . - __muldi3
-.end __muldi3
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c
new file mode 100644
index 00000000000..a3f9a03acdc
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.c
@@ -0,0 +1,57 @@
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ unsigned long __x0, __x1, __x2, __x3; \
+ unsigned short __ul, __vl, __uh, __vh; \
+ \
+ __ul = __ll_lowpart(u); \
+ __uh = __ll_highpart(u); \
+ __vl = __ll_lowpart(v); \
+ __vh = __ll_highpart(v); \
+ \
+ __x0 = (unsigned long) __ul * __vl; \
+ __x1 = (unsigned long) __ul * __vh; \
+ __x2 = (unsigned long) __uh * __vl; \
+ __x3 = (unsigned long) __uh * __vh; \
+ \
+ __x1 += __ll_highpart(__x0); /* this can't give carry */\
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos */ \
+ \
+ (w1) = __x3 + __ll_highpart(__x1); \
+ (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+ } while (0)
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) ({ \
+ DWunion __w; \
+ umul_ppmm(__w.s.high, __w.s.low, u, v); \
+ __w.ll; \
+ })
+#endif
+
+long long __muldi3(long long u, long long v)
+{
+ const DWunion uu = {.ll = u};
+ const DWunion vv = {.ll = v};
+ DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+
+ w.s.high += ((unsigned long) uu.s.low * (unsigned long) vv.s.high
+ + (unsigned long) uu.s.high * (unsigned long) vv.s.low);
+
+ return w.ll;
+}
+EXPORT_SYMBOL(__muldi3);
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index 5810cec54a7..0e8cc2710c2 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/linkage.h>
+#include <asm/page.h>
/*
* int __strncpy_user(char *to, char *from, int len);
@@ -33,19 +34,18 @@ __strncpy_user:
* r3 - temp count
* r4 - temp val
*/
+ beqid r7,3f
addik r3,r7,0 /* temp_count = len */
- beqi r3,3f
1:
lbu r4,r6,r0
+ beqid r4,2f
sb r4,r5,r0
- addik r3,r3,-1
- beqi r3,2f /* break on len */
-
addik r5,r5,1
- bneid r4,1b
addik r6,r6,1 /* delay slot */
- addik r3,r3,1 /* undo "temp_count--" */
+
+ addik r3,r3,-1
+ bnei r3,1b /* break on len */
2:
rsubk r3,r3,r7 /* temp_count = len - temp_count */
3:
@@ -76,8 +76,8 @@ __strncpy_user:
.type __strnlen_user, @function
.align 4;
__strnlen_user:
+ beqid r6,3f
addik r3,r6,0
- beqi r3,3f
1:
lbu r4,r5,r0
beqid r4,2f /* break on NUL */
@@ -102,6 +102,49 @@ __strnlen_user:
.section __ex_table,"a"
.word 1b,4b
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset) \
+1: lwi r4 , r6, 0x0000 + offset; \
+2: lwi r19, r6, 0x0004 + offset; \
+3: lwi r20, r6, 0x0008 + offset; \
+4: lwi r21, r6, 0x000C + offset; \
+5: lwi r22, r6, 0x0010 + offset; \
+6: lwi r23, r6, 0x0014 + offset; \
+7: lwi r24, r6, 0x0018 + offset; \
+8: lwi r25, r6, 0x001C + offset; \
+9: swi r4 , r5, 0x0000 + offset; \
+10: swi r19, r5, 0x0004 + offset; \
+11: swi r20, r5, 0x0008 + offset; \
+12: swi r21, r5, 0x000C + offset; \
+13: swi r22, r5, 0x0010 + offset; \
+14: swi r23, r5, 0x0014 + offset; \
+15: swi r24, r5, 0x0018 + offset; \
+16: swi r25, r5, 0x001C + offset; \
+ .section __ex_table,"a"; \
+ .word 1b, 33f; \
+ .word 2b, 33f; \
+ .word 3b, 33f; \
+ .word 4b, 33f; \
+ .word 5b, 33f; \
+ .word 6b, 33f; \
+ .word 7b, 33f; \
+ .word 8b, 33f; \
+ .word 9b, 33f; \
+ .word 10b, 33f; \
+ .word 11b, 33f; \
+ .word 12b, 33f; \
+ .word 13b, 33f; \
+ .word 14b, 33f; \
+ .word 15b, 33f; \
+ .word 16b, 33f; \
+ .text
+
+#define COPY_80(offset) \
+ COPY(0x00 + offset);\
+ COPY(0x20 + offset);\
+ COPY(0x40 + offset);\
+ COPY(0x60 + offset);
+
/*
* int __copy_tofrom_user(char *to, char *from, int len)
* Return:
@@ -119,34 +162,105 @@ __copy_tofrom_user:
* r7, r3 - count
* r4 - tempval
*/
- beqid r7, 3f /* zero size is not likely */
- andi r3, r7, 0x3 /* filter add count */
- bneid r3, 4f /* if is odd value then byte copying */
+ beqid r7, 0f /* zero size is not likely */
or r3, r5, r6 /* find if is any to/from unaligned */
- andi r3, r3, 0x3 /* mask unaligned */
- bneid r3, 1f /* it is unaligned -> then jump */
+ or r3, r3, r7 /* find if count is unaligned */
+ andi r3, r3, 0x3 /* mask last 3 bits */
+ bneid r3, bu1 /* if r3 is not zero then byte copying */
+ or r3, r0, r0
+
+ rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+ beqid r3, page;
or r3, r0, r0
-/* at least one 4 byte copy */
-5: lw r4, r6, r3
-6: sw r4, r5, r3
+w1: lw r4, r6, r3 /* at least one 4 byte copy */
+w2: sw r4, r5, r3
addik r7, r7, -4
- bneid r7, 5b
+ bneid r7, w1
addik r3, r3, 4
addik r3, r7, 0
rtsd r15, 8
nop
-4: or r3, r0, r0
-1: lbu r4,r6,r3
-2: sb r4,r5,r3
+
+ .section __ex_table,"a"
+ .word w1, 0f;
+ .word w2, 0f;
+ .text
+
+.align 4 /* Alignment is important to keep icache happy */
+page: /* Create room on stack and save registers for storign values */
+ addik r1, r1, -40
+ swi r5, r1, 0
+ swi r6, r1, 4
+ swi r7, r1, 8
+ swi r19, r1, 12
+ swi r20, r1, 16
+ swi r21, r1, 20
+ swi r22, r1, 24
+ swi r23, r1, 28
+ swi r24, r1, 32
+ swi r25, r1, 36
+loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+ /* Loop unrolling to get performance boost */
+ COPY_80(0x000);
+ COPY_80(0x080);
+ COPY_80(0x100);
+ COPY_80(0x180);
+ /* copy loop */
+ addik r6, r6, 0x200
+ addik r7, r7, -0x200
+ bneid r7, loop
+ addik r5, r5, 0x200
+
+ /* Restore register content */
+ lwi r5, r1, 0
+ lwi r6, r1, 4
+ lwi r7, r1, 8
+ lwi r19, r1, 12
+ lwi r20, r1, 16
+ lwi r21, r1, 20
+ lwi r22, r1, 24
+ lwi r23, r1, 28
+ lwi r24, r1, 32
+ lwi r25, r1, 36
+ addik r1, r1, 40
+ /* return back */
+ addik r3, r0, 0
+ rtsd r15, 8
+ nop
+
+/* Fault case - return temp count */
+33:
+ addik r3, r7, 0
+ /* Restore register content */
+ lwi r5, r1, 0
+ lwi r6, r1, 4
+ lwi r7, r1, 8
+ lwi r19, r1, 12
+ lwi r20, r1, 16
+ lwi r21, r1, 20
+ lwi r22, r1, 24
+ lwi r23, r1, 28
+ lwi r24, r1, 32
+ lwi r25, r1, 36
+ addik r1, r1, 40
+ /* return back */
+ rtsd r15, 8
+ nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1: lbu r4,r6,r3
+bu2: sb r4,r5,r3
addik r7,r7,-1
- bneid r7,1b
+ bneid r7,bu1
addik r3,r3,1 /* delay slot */
-3:
+0:
addik r3,r7,0
rtsd r15,8
nop
.size __copy_tofrom_user, . - __copy_tofrom_user
.section __ex_table,"a"
- .word 1b,3b,2b,3b,5b,3b,6b,3b
+ .word bu1, 0b;
+ .word bu2, 0b;
+ .text
diff --git a/arch/microblaze/lib/ucmpdi2.c b/arch/microblaze/lib/ucmpdi2.c
new file mode 100644
index 00000000000..d05f1585121
--- /dev/null
+++ b/arch/microblaze/lib/ucmpdi2.c
@@ -0,0 +1,20 @@
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+ const DWunion au = {.ll = a};
+ const DWunion bu = {.ll = b};
+
+ if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+ return 0;
+ else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+ return 2;
+ if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+ return 0;
+ else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+ return 2;
+ return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);