aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/lib/copyuser_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib/copyuser_64.S')
-rw-r--r--arch/powerpc/lib/copyuser_64.S147
1 files changed, 105 insertions, 42 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 693b14a778f..0860ee46013 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,8 +9,22 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#ifdef __BIG_ENDIAN__
+#define sLd sld /* Shift towards low-numbered address. */
+#define sHd srd /* Shift towards high-numbered address. */
+#else
+#define sLd srd /* Shift towards low-numbered address. */
+#define sHd sld /* Shift towards high-numbered address. */
+#endif
+
.align 7
-_GLOBAL(__copy_tofrom_user)
+_GLOBAL_TOC(__copy_tofrom_user)
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
+ b __copy_tofrom_user_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+_GLOBAL(__copy_tofrom_user_base)
/* first check for a whole page copy on a page boundary */
cmpldi cr1,r5,16
cmpdi cr6,r5,4096
@@ -24,7 +38,7 @@ _GLOBAL(__copy_tofrom_user)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- PPC_MTOCRF 0x01,r5
+ PPC_MTOCRF(0x01,r5)
blt cr1,.Lshort_copy
/* Below we want to nop out the bne if we're on a CPU that has the
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
@@ -44,37 +58,55 @@ BEGIN_FTR_SECTION
andi. r0,r4,7
bne .Lsrc_unaligned
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
- srdi r7,r5,4
-20: ld r9,0(r4)
- addi r4,r4,-8
- mtctr r7
- andi. r5,r5,7
- bf cr7*4+0,22f
- addi r3,r3,8
- addi r4,r4,8
- mr r8,r9
- blt cr1,72f
-21: ld r9,8(r4)
-70: std r8,8(r3)
-22: ldu r8,16(r4)
-71: stdu r9,16(r3)
+ blt cr1,.Ldo_tail /* if < 16 bytes to copy */
+ srdi r0,r5,5
+ cmpdi cr1,r0,0
+20: ld r7,0(r4)
+220: ld r6,8(r4)
+ addi r4,r4,16
+ mtctr r0
+ andi. r0,r5,0x10
+ beq 22f
+ addi r3,r3,16
+ addi r4,r4,-16
+ mr r9,r7
+ mr r8,r6
+ beq cr1,72f
+21: ld r7,16(r4)
+221: ld r6,24(r4)
+ addi r4,r4,32
+70: std r9,0(r3)
+270: std r8,8(r3)
+22: ld r9,0(r4)
+222: ld r8,8(r4)
+71: std r7,16(r3)
+271: std r6,24(r3)
+ addi r3,r3,32
bdnz 21b
-72: std r8,8(r3)
+72: std r9,0(r3)
+272: std r8,8(r3)
+ andi. r5,r5,0xf
beq+ 3f
- addi r3,r3,16
+ addi r4,r4,16
.Ldo_tail:
- bf cr7*4+1,1f
-23: lwz r9,8(r4)
+ addi r3,r3,16
+ bf cr7*4+0,246f
+244: ld r9,0(r4)
+ addi r4,r4,8
+245: std r9,0(r3)
+ addi r3,r3,8
+246: bf cr7*4+1,1f
+23: lwz r9,0(r4)
addi r4,r4,4
73: stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
-44: lhz r9,8(r4)
+44: lhz r9,0(r4)
addi r4,r4,2
74: sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
-45: lbz r9,8(r4)
+45: lbz r9,0(r4)
75: stb r9,0(r3)
3: li r3,0
blr
@@ -94,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
25: ld r0,8(r4)
- sld r6,r9,r10
+ sLd r6,r9,r10
26: ldu r9,16(r4)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
27: ld r0,8(r4)
@@ -105,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
29: ldu r9,8(r4)
- sld r8,r0,r10
+ sLd r8,r0,r10
addi r3,r3,-8
blt cr6,5f
30: ld r0,8(r4)
- srd r12,r9,r11
- sld r6,r9,r10
+ sHd r12,r9,r11
+ sLd r6,r9,r10
31: ldu r9,16(r4)
or r12,r8,r12
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
addi r3,r3,16
beq cr6,78f
1: or r7,r7,r6
32: ld r0,8(r4)
76: std r12,8(r3)
-2: srd r12,r9,r11
- sld r6,r9,r10
+2: sHd r12,r9,r11
+ sLd r6,r9,r10
33: ldu r9,16(r4)
or r12,r8,r12
77: stdu r7,16(r3)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
bdnz 1b
78: std r12,8(r3)
or r7,r7,r6
79: std r7,16(r3)
-5: srd r12,r9,r11
+5: sHd r12,r9,r11
or r12,r8,r12
80: std r12,24(r3)
bne 6f
@@ -141,28 +173,43 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
- sld r9,r9,r10
+ sLd r9,r9,r10
ble cr1,7f
34: ld r0,8(r4)
- srd r7,r0,r11
+ sHd r7,r0,r11
or r9,r7,r9
7:
bf cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,32
+#endif
94: stw r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,32
+#endif
addi r3,r3,4
1: bf cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,16
+#endif
95: sth r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,16
+#endif
addi r3,r3,2
2: bf cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,8
+#endif
96: stb r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,8
+#endif
3: li r3,0
blr
.Ldst_unaligned:
- PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
@@ -177,7 +224,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: PPC_MTOCRF 0x01,r5
+3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
@@ -220,7 +267,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
131:
addi r3,r3,8
120:
+320:
122:
+322:
124:
125:
126:
@@ -229,9 +278,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
129:
133:
addi r3,r3,8
-121:
132:
addi r3,r3,8
+121:
+321:
+344:
134:
135:
138:
@@ -303,18 +354,22 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
183:
add r3,r3,r7
b 1f
+371:
180:
addi r3,r3,8
171:
177:
addi r3,r3,8
-170:
-172:
+370:
+372:
176:
178:
addi r3,r3,4
185:
addi r3,r3,4
+170:
+172:
+345:
173:
174:
175:
@@ -341,11 +396,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
.section __ex_table,"a"
.align 3
.llong 20b,120b
+ .llong 220b,320b
.llong 21b,121b
+ .llong 221b,321b
.llong 70b,170b
+ .llong 270b,370b
.llong 22b,122b
+ .llong 222b,322b
.llong 71b,171b
+ .llong 271b,371b
.llong 72b,172b
+ .llong 272b,372b
+ .llong 244b,344b
+ .llong 245b,345b
.llong 23b,123b
.llong 73b,173b
.llong 44b,144b