diff options
Diffstat (limited to 'arch/powerpc/lib/memcpy_64.S')
| -rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 62 |
1 files changed, 49 insertions, 13 deletions
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index fe2d34e5332..32a06ec395d 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -10,9 +10,30 @@ #include <asm/ppc_asm.h> .align 7 -_GLOBAL(memcpy) - std r3,48(r1) /* save destination pointer for return value */ - PPC_MTOCRF 0x01,r5 +_GLOBAL_TOC(memcpy) +BEGIN_FTR_SECTION +#ifdef __LITTLE_ENDIAN__ + cmpdi cr7,r5,0 +#else + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ +#endif +FTR_SECTION_ELSE +#ifndef SELFTEST + b memcpy_power7 +#endif +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ + /* dumb little-endian memcpy that will get replaced at runtime */ + addi r9,r3,-1 + addi r4,r4,-1 + beqlr cr7 + mtctr r5 +1: lbzu r10,1(r4) + stbu r10,1(r9) + bdnz 1b + blr +#else + PPC_MTOCRF(0x01,r5) cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry andi. r6,r6,7 @@ -53,20 +74,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: std r8,8(r3) beq 3f addi r3,r3,16 - ld r9,8(r4) .Ldo_tail: bf cr7*4+1,1f - rotldi r9,r9,32 + lwz r9,8(r4) + addi r4,r4,4 stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f - rotldi r9,r9,16 + lhz r9,8(r4) + addi r4,r4,2 sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f - rotldi r9,r9,8 + lbz r9,8(r4) stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr .Lsrc_unaligned: @@ -133,14 +155,27 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) cmpwi cr1,r5,8 addi r3,r3,32 sld r9,r9,r10 - ble cr1,.Ldo_tail + ble cr1,6f ld r0,8(r4) srd r7,r0,r11 or r9,r7,r9 - b .Ldo_tail +6: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ + blr .Ldst_unaligned: - PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 + PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 subf r5,r6,r5 li r7,0 cmpldi cr1,r5,16 @@ -155,7 +190,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+1,3f lwzx r0,r7,r4 stwx r0,r7,r3 -3: PPC_MTOCRF 0x01,r5 +3: PPC_MTOCRF(0x01,r5) add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned @@ -181,5 +216,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: bf cr7*4+3,4f lbz r0,0(r4) stb r0,0(r3) -4: ld r3,48(r1) /* return dest pointer */ +4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr +#endif |
