aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMichal Simek <monstr@monstr.eu>2011-06-10 10:49:08 +0200
committerMichal Simek <monstr@monstr.eu>2011-10-14 12:24:26 +0200
commitebe211254bfa6295f4ab0b33c7c881bdfabbab60 (patch)
tree08345bf06ede9d07585342f1264a12a47f2a0c50 /arch
parent782d491fc210fac03976d01071145728339b6887 (diff)
microblaze: Add loop unrolling for PAGE in copy_tofrom_user
Increase performance by loop unrolling. Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch')
-rw-r--r--arch/microblaze/lib/uaccess_old.S84
1 files changed, 84 insertions, 0 deletions
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index d09f2dce648..142492ec270 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/linkage.h>
+#include <asm/page.h>
/*
* int __strncpy_user(char *to, char *from, int len);
@@ -102,6 +103,49 @@ __strnlen_user:
.section __ex_table,"a"
.word 1b,4b
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset) \
+1: lwi r4 , r6, 0x0000 + offset; \
+2: lwi r19, r6, 0x0004 + offset; \
+3: lwi r20, r6, 0x0008 + offset; \
+4: lwi r21, r6, 0x000C + offset; \
+5: lwi r22, r6, 0x0010 + offset; \
+6: lwi r23, r6, 0x0014 + offset; \
+7: lwi r24, r6, 0x0018 + offset; \
+8: lwi r25, r6, 0x001C + offset; \
+9: swi r4 , r5, 0x0000 + offset; \
+10: swi r19, r5, 0x0004 + offset; \
+11: swi r20, r5, 0x0008 + offset; \
+12: swi r21, r5, 0x000C + offset; \
+13: swi r22, r5, 0x0010 + offset; \
+14: swi r23, r5, 0x0014 + offset; \
+15: swi r24, r5, 0x0018 + offset; \
+16: swi r25, r5, 0x001C + offset; \
+ .section __ex_table,"a"; \
+ .word 1b, 0f; \
+ .word 2b, 0f; \
+ .word 3b, 0f; \
+ .word 4b, 0f; \
+ .word 5b, 0f; \
+ .word 6b, 0f; \
+ .word 7b, 0f; \
+ .word 8b, 0f; \
+ .word 9b, 0f; \
+ .word 10b, 0f; \
+ .word 11b, 0f; \
+ .word 12b, 0f; \
+ .word 13b, 0f; \
+ .word 14b, 0f; \
+ .word 15b, 0f; \
+ .word 16b, 0f; \
+ .text
+
+#define COPY_80(offset) \
+ COPY(0x00 + offset);\
+ COPY(0x20 + offset);\
+ COPY(0x40 + offset);\
+ COPY(0x60 + offset);
+
/*
* int __copy_tofrom_user(char *to, char *from, int len)
* Return:
@@ -126,6 +170,10 @@ __copy_tofrom_user:
bneid r3, bu1 /* if r3 is not zero then byte copying */
or r3, r0, r0
+ rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+ beqid r3, page;
+ or r3, r0, r0
+
w1: lw r4, r6, r3 /* at least one 4 byte copy */
w2: sw r4, r5, r3
addik r7, r7, -4
@@ -140,6 +188,42 @@ w2: sw r4, r5, r3
.word w2, 0f;
.text
+.align 4 /* Alignment is important to keep icache happy */
+page: /* Create room on stack and save registers for storign values */
+ addik r1, r1, -32
+ swi r19, r1, 4
+ swi r20, r1, 8
+ swi r21, r1, 12
+ swi r22, r1, 16
+ swi r23, r1, 20
+ swi r24, r1, 24
+ swi r25, r1, 28
+loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+ /* Loop unrolling to get performance boost */
+ COPY_80(0x000);
+ COPY_80(0x080);
+ COPY_80(0x100);
+ COPY_80(0x180);
+ /* copy loop */
+ addik r6, r6, 0x200
+ addik r7, r7, -0x200
+ bneid r7, loop
+ addik r5, r5, 0x200
+ /* Restore register content */
+ lwi r19, r1, 4
+ lwi r20, r1, 8
+ lwi r21, r1, 12
+ lwi r22, r1, 16
+ lwi r23, r1, 20
+ lwi r24, r1, 24
+ lwi r25, r1, 28
+ addik r1, r1, 32
+ /* return back */
+ addik r3, r7, 0
+ rtsd r15, 8
+ nop
+
+.align 4 /* Alignment is important to keep icache happy */
bu1: lbu r4,r6,r3
bu2: sb r4,r5,r3
addik r7,r7,-1