diff options
Diffstat (limited to 'arch/powerpc/lib/copyuser_64.S')
| -rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 147 |
1 files changed, 105 insertions, 42 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 693b14a778f..0860ee46013 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,8 +9,22 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define sLd sld /* Shift towards low-numbered address. */ +#define sHd srd /* Shift towards high-numbered address. */ +#else +#define sLd srd /* Shift towards low-numbered address. */ +#define sHd sld /* Shift towards high-numbered address. */ +#endif + .align 7 -_GLOBAL(__copy_tofrom_user) +_GLOBAL_TOC(__copy_tofrom_user) +BEGIN_FTR_SECTION + nop +FTR_SECTION_ELSE + b __copy_tofrom_user_power7 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +_GLOBAL(__copy_tofrom_user_base) /* first check for a whole page copy on a page boundary */ cmpldi cr1,r5,16 cmpdi cr6,r5,4096 @@ -24,7 +38,7 @@ _GLOBAL(__copy_tofrom_user) dcbt 0,r4 beq .Lcopy_page_4K andi. r6,r6,7 - PPC_MTOCRF 0x01,r5 + PPC_MTOCRF(0x01,r5) blt cr1,.Lshort_copy /* Below we want to nop out the bne if we're on a CPU that has the * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit @@ -44,37 +58,55 @@ BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) - srdi r7,r5,4 -20: ld r9,0(r4) - addi r4,r4,-8 - mtctr r7 - andi. r5,r5,7 - bf cr7*4+0,22f - addi r3,r3,8 - addi r4,r4,8 - mr r8,r9 - blt cr1,72f -21: ld r9,8(r4) -70: std r8,8(r3) -22: ldu r8,16(r4) -71: stdu r9,16(r3) + blt cr1,.Ldo_tail /* if < 16 bytes to copy */ + srdi r0,r5,5 + cmpdi cr1,r0,0 +20: ld r7,0(r4) +220: ld r6,8(r4) + addi r4,r4,16 + mtctr r0 + andi. r0,r5,0x10 + beq 22f + addi r3,r3,16 + addi r4,r4,-16 + mr r9,r7 + mr r8,r6 + beq cr1,72f +21: ld r7,16(r4) +221: ld r6,24(r4) + addi r4,r4,32 +70: std r9,0(r3) +270: std r8,8(r3) +22: ld r9,0(r4) +222: ld r8,8(r4) +71: std r7,16(r3) +271: std r6,24(r3) + addi r3,r3,32 bdnz 21b -72: std r8,8(r3) +72: std r9,0(r3) +272: std r8,8(r3) + andi. r5,r5,0xf beq+ 3f - addi r3,r3,16 + addi r4,r4,16 .Ldo_tail: - bf cr7*4+1,1f -23: lwz r9,8(r4) + addi r3,r3,16 + bf cr7*4+0,246f +244: ld r9,0(r4) + addi r4,r4,8 +245: std r9,0(r3) + addi r3,r3,8 +246: bf cr7*4+1,1f +23: lwz r9,0(r4) addi r4,r4,4 73: stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f -44: lhz r9,8(r4) +44: lhz r9,0(r4) addi r4,r4,2 74: sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f -45: lbz r9,8(r4) +45: lbz r9,0(r4) 75: stb r9,0(r3) 3: li r3,0 blr @@ -94,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 25: ld r0,8(r4) - sld r6,r9,r10 + sLd r6,r9,r10 26: ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27: ld r0,8(r4) @@ -105,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 29: ldu r9,8(r4) - sld r8,r0,r10 + sLd r8,r0,r10 addi r3,r3,-8 blt cr6,5f 30: ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 + sHd r12,r9,r11 + sLd r6,r9,r10 31: ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addi r3,r3,16 beq cr6,78f 1: or r7,r7,r6 32: ld r0,8(r4) 76: std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 +2: sHd r12,r9,r11 + sLd r6,r9,r10 33: ldu r9,16(r4) or r12,r8,r12 77: stdu r7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz 1b 78: std r12,8(r3) or r7,r7,r6 79: std r7,16(r3) -5: srd r12,r9,r11 +5: sHd r12,r9,r11 or r12,r8,r12 80: std r12,24(r3) bne 6f @@ -141,28 +173,43 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr 6: cmpwi cr1,r5,8 addi r3,r3,32 - sld r9,r9,r10 + sLd r9,r9,r10 ble cr1,7f 34: ld r0,8(r4) - srd r7,r0,r11 + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 +#endif 94: stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addi r3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 +#endif 95: sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addi r3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 +#endif 96: stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr .Ldst_unaligned: - PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ + PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ subf r5,r6,r5 li r7,0 cmpldi cr1,r5,16 @@ -177,7 +224,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+1,3f 37: lwzx r0,r7,r4 83: stwx r0,r7,r3 -3: PPC_MTOCRF 0x01,r5 +3: PPC_MTOCRF(0x01,r5) add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned @@ -220,7 +267,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 131: addi r3,r3,8 120: +320: 122: +322: 124: 125: 126: @@ -229,9 +278,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 129: 133: addi r3,r3,8 -121: 132: addi r3,r3,8 +121: +321: +344: 134: 135: 138: @@ -303,18 +354,22 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 183: add r3,r3,r7 b 1f +371: 180: addi r3,r3,8 171: 177: addi r3,r3,8 -170: -172: +370: +372: 176: 178: addi r3,r3,4 185: addi r3,r3,4 +170: +172: +345: 173: 174: 175: @@ -341,11 +396,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) .section __ex_table,"a" .align 3 .llong 20b,120b + .llong 220b,320b .llong 21b,121b + .llong 221b,321b .llong 70b,170b + .llong 270b,370b .llong 22b,122b + .llong 222b,322b .llong 71b,171b + .llong 271b,371b .llong 72b,172b + .llong 272b,372b + .llong 244b,344b + .llong 245b,345b .llong 23b,123b .llong 73b,173b .llong 44b,144b |
