diff options
Diffstat (limited to 'arch/microblaze/lib')
| -rw-r--r-- | arch/microblaze/lib/Makefile | 8 | ||||
| -rw-r--r-- | arch/microblaze/lib/ashldi3.c | 3 | ||||
| -rw-r--r-- | arch/microblaze/lib/ashrdi3.c | 3 | ||||
| -rw-r--r-- | arch/microblaze/lib/cmpdi2.c | 26 | ||||
| -rw-r--r-- | arch/microblaze/lib/libgcc.h | 7 | ||||
| -rw-r--r-- | arch/microblaze/lib/lshrdi3.c | 3 | ||||
| -rw-r--r-- | arch/microblaze/lib/memcpy.c | 21 | ||||
| -rw-r--r-- | arch/microblaze/lib/memmove.c | 17 | ||||
| -rw-r--r-- | arch/microblaze/lib/memset.c | 4 | ||||
| -rw-r--r-- | arch/microblaze/lib/muldi3.c | 31 | ||||
| -rw-r--r-- | arch/microblaze/lib/uaccess_old.S | 158 | ||||
| -rw-r--r-- | arch/microblaze/lib/ucmpdi2.c | 20 |
12 files changed, 236 insertions, 65 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile index f1fcbff3da2..844960e8ae1 100644 --- a/arch/microblaze/lib/Makefile +++ b/arch/microblaze/lib/Makefile @@ -2,6 +2,12 @@ # Makefile # +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ashldi3.o = -pg +CFLAGS_REMOVE_ashrdi3.o = -pg +CFLAGS_REMOVE_lshrdi3.o = -pg +endif + lib-y := memset.o ifeq ($(CONFIG_OPT_LIB_ASM),y) @@ -14,10 +20,12 @@ lib-y += uaccess_old.o lib-y += ashldi3.o lib-y += ashrdi3.o +lib-y += cmpdi2.o lib-y += divsi3.o lib-y += lshrdi3.o lib-y += modsi3.o lib-y += muldi3.o lib-y += mulsi3.o +lib-y += ucmpdi2.o lib-y += udivsi3.o lib-y += umodsi3.o diff --git a/arch/microblaze/lib/ashldi3.c b/arch/microblaze/lib/ashldi3.c index beb80f31609..1af904cd972 100644 --- a/arch/microblaze/lib/ashldi3.c +++ b/arch/microblaze/lib/ashldi3.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/export.h> #include "libgcc.h" @@ -25,5 +25,4 @@ long long __ashldi3(long long u, word_type b) return w.ll; } - EXPORT_SYMBOL(__ashldi3); diff --git a/arch/microblaze/lib/ashrdi3.c b/arch/microblaze/lib/ashrdi3.c index c884a912b66..32c334c05d0 100644 --- a/arch/microblaze/lib/ashrdi3.c +++ b/arch/microblaze/lib/ashrdi3.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/export.h> #include "libgcc.h" @@ -27,5 +27,4 @@ long long __ashrdi3(long long u, word_type b) return w.ll; } - EXPORT_SYMBOL(__ashrdi3); diff --git a/arch/microblaze/lib/cmpdi2.c b/arch/microblaze/lib/cmpdi2.c new file mode 100644 index 00000000000..67abc9ac1bd --- /dev/null +++ b/arch/microblaze/lib/cmpdi2.c @@ -0,0 +1,26 @@ +#include <linux/export.h> + +#include "libgcc.h" + +word_type __cmpdi2(long long a, long long b) +{ + const DWunion au = { + .ll = a + }; + const DWunion bu = { + .ll = b + }; + + if (au.s.high < bu.s.high) + return 0; + else if (au.s.high > bu.s.high) + return 2; + + if ((unsigned int) au.s.low < (unsigned int) bu.s.low) + return 0; + else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) + return 2; + + return 1; +} +EXPORT_SYMBOL(__cmpdi2); diff --git a/arch/microblaze/lib/libgcc.h b/arch/microblaze/lib/libgcc.h index 05909d58e2f..ab077ef7e14 100644 --- a/arch/microblaze/lib/libgcc.h +++ b/arch/microblaze/lib/libgcc.h @@ -22,4 +22,11 @@ typedef union { long long ll; } DWunion; +extern long long __ashldi3(long long u, word_type b); +extern long long __ashrdi3(long long u, word_type b); +extern word_type __cmpdi2(long long a, long long b); +extern long long __lshrdi3(long long u, word_type b); +extern long long __muldi3(long long u, long long v); +extern word_type __ucmpdi2(unsigned long long a, unsigned long long b); + #endif /* __ASM_LIBGCC_H */ diff --git a/arch/microblaze/lib/lshrdi3.c b/arch/microblaze/lib/lshrdi3.c index dcf8d6810b7..adcb253f11c 100644 --- a/arch/microblaze/lib/lshrdi3.c +++ b/arch/microblaze/lib/lshrdi3.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/export.h> #include "libgcc.h" @@ -25,5 +25,4 @@ long long __lshrdi3(long long u, word_type b) return w.ll; } - EXPORT_SYMBOL(__lshrdi3); diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c index cc495d7d99c..f536e81b816 100644 --- a/arch/microblaze/lib/memcpy.c +++ b/arch/microblaze/lib/memcpy.c @@ -24,13 +24,12 @@ * not any responsibility to update it. */ +#include <linux/export.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/compiler.h> -#include <linux/module.h> #include <linux/string.h> -#include <asm/system.h> #ifdef __HAVE_ARCH_MEMCPY #ifndef CONFIG_OPT_LIB_FUNCTION @@ -63,8 +62,8 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) if (likely(c >= 4)) { unsigned value, buf_hold; - /* Align the dstination to a word boundry. */ - /* This is done in an endian independant manner. */ + /* Align the destination to a word boundary. */ + /* This is done in an endian independent manner. */ switch ((unsigned long)dst & 3) { case 1: *dst++ = *src++; @@ -80,7 +79,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) i_dst = (void *)dst; /* Choose a copy scheme based on the source */ - /* alignment relative to dstination. */ + /* alignment relative to destination. */ switch ((unsigned long)src & 3) { case 0x0: /* Both byte offsets are aligned */ i_src = (const void *)src; @@ -104,12 +103,12 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) } #else /* Load the holding buffer */ - buf_hold = (*i_src++ & 0xFFFFFF00) >>8; + buf_hold = (*i_src++ & 0xFFFFFF00) >> 8; for (; c >= 4; c -= 4) { value = *i_src++; *i_dst++ = buf_hold | ((value & 0xFF) << 24); - buf_hold = (value & 0xFFFFFF00) >>8; + buf_hold = (value & 0xFFFFFF00) >> 8; } #endif /* Realign the source */ @@ -130,12 +129,12 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) } #else /* Load the holding buffer */ - buf_hold = (*i_src++ & 0xFFFF0000 )>>16; + buf_hold = (*i_src++ & 0xFFFF0000) >> 16; for (; c >= 4; c -= 4) { value = *i_src++; - *i_dst++ = buf_hold | ((value & 0xFFFF)<<16); - buf_hold = (value & 0xFFFF0000) >>16; + *i_dst++ = buf_hold | ((value & 0xFFFF) << 16); + buf_hold = (value & 0xFFFF0000) >> 16; } #endif /* Realign the source */ @@ -173,7 +172,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) } /* Finish off any remaining bytes */ - /* simple fast copy, ... unless a cache boundry is crossed */ + /* simple fast copy, ... unless a cache boundary is crossed */ switch (c) { case 3: *dst++ = *src++; diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c index 810fd68775e..3611ce70415 100644 --- a/arch/microblaze/lib/memmove.c +++ b/arch/microblaze/lib/memmove.c @@ -24,10 +24,10 @@ * not any responsibility to update it. */ +#include <linux/export.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/compiler.h> -#include <linux/module.h> #include <linux/string.h> #ifdef __HAVE_ARCH_MEMMOVE @@ -83,8 +83,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) if (c >= 4) { unsigned value, buf_hold; - /* Align the destination to a word boundry. */ - /* This is done in an endian independant manner. */ + /* Align the destination to a word boundary. */ + /* This is done in an endian independent manner. */ switch ((unsigned long)dst & 3) { case 3: @@ -129,7 +129,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) for (; c >= 4; c -= 4) { value = *--i_src; - *--i_dst = buf_hold | ((value & 0xFFFFFF00)>>8); + *--i_dst = buf_hold | + ((value & 0xFFFFFF00) >> 8); buf_hold = (value & 0xFF) << 24; } #endif @@ -155,7 +156,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) for (; c >= 4; c -= 4) { value = *--i_src; - *--i_dst = buf_hold | ((value & 0xFFFF0000)>>16); + *--i_dst = buf_hold | + ((value & 0xFFFF0000) >> 16); buf_hold = (value & 0xFFFF) << 16; } #endif @@ -181,7 +183,8 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) for (; c >= 4; c -= 4) { value = *--i_src; - *--i_dst = buf_hold | ((value & 0xFF000000)>> 24); + *--i_dst = buf_hold | + ((value & 0xFF000000) >> 24); buf_hold = (value & 0xFFFFFF) << 8; } #endif @@ -193,7 +196,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) dst = (void *)i_dst; } - /* simple fast copy, ... unless a cache boundry is crossed */ + /* simple fast copy, ... unless a cache boundary is crossed */ /* Finish off any remaining bytes */ switch (c) { case 4: diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c index 834565d1607..04ea72c8a81 100644 --- a/arch/microblaze/lib/memset.c +++ b/arch/microblaze/lib/memset.c @@ -24,10 +24,10 @@ * not any responsibility to update it. */ +#include <linux/export.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/compiler.h> -#include <linux/module.h> #include <linux/string.h> #ifdef __HAVE_ARCH_MEMSET @@ -64,7 +64,7 @@ void *memset(void *v_src, int c, __kernel_size_t n) if (likely(n >= 4)) { /* Align the destination to a word boundary */ - /* This is done in an endian independant manner */ + /* This is done in an endian independent manner */ switch ((unsigned) src & 3) { case 1: *src++ = c; diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c index d4860e154d2..a3f9a03acdc 100644 --- a/arch/microblaze/lib/muldi3.c +++ b/arch/microblaze/lib/muldi3.c @@ -1,33 +1,29 @@ -#include <linux/module.h> +#include <linux/export.h> #include "libgcc.h" -#define DWtype long long -#define UWtype unsigned long -#define UHWtype unsigned short - #define W_TYPE_SIZE 32 -#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) -#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) -#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) +#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2)) /* If we still don't have umul_ppmm, define it using plain C. */ #if !defined(umul_ppmm) #define umul_ppmm(w1, w0, u, v) \ do { \ - UWtype __x0, __x1, __x2, __x3; \ - UHWtype __ul, __vl, __uh, __vh; \ + unsigned long __x0, __x1, __x2, __x3; \ + unsigned short __ul, __vl, __uh, __vh; \ \ __ul = __ll_lowpart(u); \ __uh = __ll_highpart(u); \ __vl = __ll_lowpart(v); \ __vh = __ll_highpart(v); \ \ - __x0 = (UWtype) __ul * __vl; \ - __x1 = (UWtype) __ul * __vh; \ - __x2 = (UWtype) __uh * __vl; \ - __x3 = (UWtype) __uh * __vh; \ + __x0 = (unsigned long) __ul * __vl; \ + __x1 = (unsigned long) __ul * __vh; \ + __x2 = (unsigned long) __uh * __vl; \ + __x3 = (unsigned long) __uh * __vh; \ \ __x1 += __ll_highpart(__x0); /* this can't give carry */\ __x1 += __x2; /* but this indeed can */ \ @@ -47,14 +43,15 @@ }) #endif -DWtype __muldi3(DWtype u, DWtype v) +long long __muldi3(long long u, long long v) { const DWunion uu = {.ll = u}; const DWunion vv = {.ll = v}; DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)}; - w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high - + (UWtype) uu.s.high * (UWtype) vv.s.low); + w.s.high += ((unsigned long) uu.s.low * (unsigned long) vv.s.high + + (unsigned long) uu.s.high * (unsigned long) vv.s.low); return w.ll; } +EXPORT_SYMBOL(__muldi3); diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S index 5810cec54a7..0e8cc2710c2 100644 --- a/arch/microblaze/lib/uaccess_old.S +++ b/arch/microblaze/lib/uaccess_old.S @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/linkage.h> +#include <asm/page.h> /* * int __strncpy_user(char *to, char *from, int len); @@ -33,19 +34,18 @@ __strncpy_user: * r3 - temp count * r4 - temp val */ + beqid r7,3f addik r3,r7,0 /* temp_count = len */ - beqi r3,3f 1: lbu r4,r6,r0 + beqid r4,2f sb r4,r5,r0 - addik r3,r3,-1 - beqi r3,2f /* break on len */ - addik r5,r5,1 - bneid r4,1b addik r6,r6,1 /* delay slot */ - addik r3,r3,1 /* undo "temp_count--" */ + + addik r3,r3,-1 + bnei r3,1b /* break on len */ 2: rsubk r3,r3,r7 /* temp_count = len - temp_count */ 3: @@ -76,8 +76,8 @@ __strncpy_user: .type __strnlen_user, @function .align 4; __strnlen_user: + beqid r6,3f addik r3,r6,0 - beqi r3,3f 1: lbu r4,r5,r0 beqid r4,2f /* break on NUL */ @@ -102,6 +102,49 @@ __strnlen_user: .section __ex_table,"a" .word 1b,4b +/* Loop unrolling for __copy_tofrom_user */ +#define COPY(offset) \ +1: lwi r4 , r6, 0x0000 + offset; \ +2: lwi r19, r6, 0x0004 + offset; \ +3: lwi r20, r6, 0x0008 + offset; \ +4: lwi r21, r6, 0x000C + offset; \ +5: lwi r22, r6, 0x0010 + offset; \ +6: lwi r23, r6, 0x0014 + offset; \ +7: lwi r24, r6, 0x0018 + offset; \ +8: lwi r25, r6, 0x001C + offset; \ +9: swi r4 , r5, 0x0000 + offset; \ +10: swi r19, r5, 0x0004 + offset; \ +11: swi r20, r5, 0x0008 + offset; \ +12: swi r21, r5, 0x000C + offset; \ +13: swi r22, r5, 0x0010 + offset; \ +14: swi r23, r5, 0x0014 + offset; \ +15: swi r24, r5, 0x0018 + offset; \ +16: swi r25, r5, 0x001C + offset; \ + .section __ex_table,"a"; \ + .word 1b, 33f; \ + .word 2b, 33f; \ + .word 3b, 33f; \ + .word 4b, 33f; \ + .word 5b, 33f; \ + .word 6b, 33f; \ + .word 7b, 33f; \ + .word 8b, 33f; \ + .word 9b, 33f; \ + .word 10b, 33f; \ + .word 11b, 33f; \ + .word 12b, 33f; \ + .word 13b, 33f; \ + .word 14b, 33f; \ + .word 15b, 33f; \ + .word 16b, 33f; \ + .text + +#define COPY_80(offset) \ + COPY(0x00 + offset);\ + COPY(0x20 + offset);\ + COPY(0x40 + offset);\ + COPY(0x60 + offset); + /* * int __copy_tofrom_user(char *to, char *from, int len) * Return: @@ -119,34 +162,105 @@ __copy_tofrom_user: * r7, r3 - count * r4 - tempval */ - beqid r7, 3f /* zero size is not likely */ - andi r3, r7, 0x3 /* filter add count */ - bneid r3, 4f /* if is odd value then byte copying */ + beqid r7, 0f /* zero size is not likely */ or r3, r5, r6 /* find if is any to/from unaligned */ - andi r3, r3, 0x3 /* mask unaligned */ - bneid r3, 1f /* it is unaligned -> then jump */ + or r3, r3, r7 /* find if count is unaligned */ + andi r3, r3, 0x3 /* mask last 3 bits */ + bneid r3, bu1 /* if r3 is not zero then byte copying */ + or r3, r0, r0 + + rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */ + beqid r3, page; or r3, r0, r0 -/* at least one 4 byte copy */ -5: lw r4, r6, r3 -6: sw r4, r5, r3 +w1: lw r4, r6, r3 /* at least one 4 byte copy */ +w2: sw r4, r5, r3 addik r7, r7, -4 - bneid r7, 5b + bneid r7, w1 addik r3, r3, 4 addik r3, r7, 0 rtsd r15, 8 nop -4: or r3, r0, r0 -1: lbu r4,r6,r3 -2: sb r4,r5,r3 + + .section __ex_table,"a" + .word w1, 0f; + .word w2, 0f; + .text + +.align 4 /* Alignment is important to keep icache happy */ +page: /* Create room on stack and save registers for storign values */ + addik r1, r1, -40 + swi r5, r1, 0 + swi r6, r1, 4 + swi r7, r1, 8 + swi r19, r1, 12 + swi r20, r1, 16 + swi r21, r1, 20 + swi r22, r1, 24 + swi r23, r1, 28 + swi r24, r1, 32 + swi r25, r1, 36 +loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */ + /* Loop unrolling to get performance boost */ + COPY_80(0x000); + COPY_80(0x080); + COPY_80(0x100); + COPY_80(0x180); + /* copy loop */ + addik r6, r6, 0x200 + addik r7, r7, -0x200 + bneid r7, loop + addik r5, r5, 0x200 + + /* Restore register content */ + lwi r5, r1, 0 + lwi r6, r1, 4 + lwi r7, r1, 8 + lwi r19, r1, 12 + lwi r20, r1, 16 + lwi r21, r1, 20 + lwi r22, r1, 24 + lwi r23, r1, 28 + lwi r24, r1, 32 + lwi r25, r1, 36 + addik r1, r1, 40 + /* return back */ + addik r3, r0, 0 + rtsd r15, 8 + nop + +/* Fault case - return temp count */ +33: + addik r3, r7, 0 + /* Restore register content */ + lwi r5, r1, 0 + lwi r6, r1, 4 + lwi r7, r1, 8 + lwi r19, r1, 12 + lwi r20, r1, 16 + lwi r21, r1, 20 + lwi r22, r1, 24 + lwi r23, r1, 28 + lwi r24, r1, 32 + lwi r25, r1, 36 + addik r1, r1, 40 + /* return back */ + rtsd r15, 8 + nop + +.align 4 /* Alignment is important to keep icache happy */ +bu1: lbu r4,r6,r3 +bu2: sb r4,r5,r3 addik r7,r7,-1 - bneid r7,1b + bneid r7,bu1 addik r3,r3,1 /* delay slot */ -3: +0: addik r3,r7,0 rtsd r15,8 nop .size __copy_tofrom_user, . - __copy_tofrom_user .section __ex_table,"a" - .word 1b,3b,2b,3b,5b,3b,6b,3b + .word bu1, 0b; + .word bu2, 0b; + .text diff --git a/arch/microblaze/lib/ucmpdi2.c b/arch/microblaze/lib/ucmpdi2.c new file mode 100644 index 00000000000..d05f1585121 --- /dev/null +++ b/arch/microblaze/lib/ucmpdi2.c @@ -0,0 +1,20 @@ +#include <linux/export.h> + +#include "libgcc.h" + +word_type __ucmpdi2(unsigned long long a, unsigned long long b) +{ + const DWunion au = {.ll = a}; + const DWunion bu = {.ll = b}; + + if ((unsigned int) au.s.high < (unsigned int) bu.s.high) + return 0; + else if ((unsigned int) au.s.high > (unsigned int) bu.s.high) + return 2; + if ((unsigned int) au.s.low < (unsigned int) bu.s.low) + return 0; + else if ((unsigned int) au.s.low > (unsigned int) bu.s.low) + return 2; + return 1; +} +EXPORT_SYMBOL(__ucmpdi2); |
