/*
* "memcpy" implementation of SuperH
*
* Copyright (C) 1999 Niibe Yutaka
* Copyright (c) 2002 STMicroelectronics Ltd
* Modified from memcpy.S and micro-optimised for SH4
* Stuart Menefy (stuart.menefy@st.com)
*
*/
#include <linux/linkage.h>
/*
* void *memcpy(void *dst, const void *src, size_t n);
*
* It is assumed that there is no overlap between src and dst.
* If there is an overlap, then the results are undefined.
*/
!
! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR.
!
! Size is 16 or greater, and may have trailing bytes
.balign 32
.Lcase1:
! Read a long word and write a long word at once
! At the start of each iteration, r7 contains last long load
add #-1,r5 ! 79 EX
mov r4,r2 ! 5 MT (0 cycles latency)
mov.l @(r0,r5),r7 ! 21 LS (2 cycles latency)
add #-4,r5 ! 50 EX
add #7,r2 ! 79 EX
!
#ifdef CONFIG_CPU_LITTLE_ENDIAN
! 6 cycles, 4 bytes per iteration
3: mov.l @(r0,r5),r1 ! 21 LS (latency=2) ! NMLK
mov r7, r3 ! 5 MT (latency=0) ! RQPO
cmp/hi r2,r0 ! 57 MT
shll16 r3 ! 103 EX
mov r1,r6 ! 5 MT (latency=0)
shll8 r3 ! 102 EX ! Oxxx
shlr8 r6 ! 106 EX ! xNML
mov r1, r7 ! 5 MT (latency=0)
or r6,r3 ! 82 EX ! ONML
bt/s 3b ! 109 BR
mov.l r3,@-r0 ! 30 LS
#else
3: mov.l @(r0,r5),r1 ! 21 LS (latency=2) ! KLMN
mov r7,r3 ! 5 MT (latency=0) ! OPQR
cmp/hi r2,r0 ! 57 MT
shlr16 r3 ! 107 EX
shlr8 r3 ! 106 EX ! xxxO
mov r1,r6 ! 5 MT (latency=0)
shll8 r6 ! 102 EX ! LMNx
mov r1,r7 ! 5 MT (latency=0)
or r6,r3 ! 82 EX ! LMNO
bt/s 3b ! 109 BR
mov.l r3,@-r0 ! 30 LS
#endif
! Finally, copy a byte at once, if necessary
add #4,r5 ! 50 EX
cmp/eq r4,r0 ! 54 MT
add #-6,r2 ! 50 EX
bt 9f ! 109 BR
8: cmp/hi r2,r0 ! 57 MT
mov.b @(r0,r5),r1 ! 20 LS (latency=2)
bt/s 8b ! 109 BR
mov.b r1,@-r0 ! 29 LS
9: rts
nop
!
! GHIJ KLMN OPQR --> .GHI JKLM NOPQ R...
!
! Size is 16 or greater, and may have trailing bytes
.balign 32
.Lcase3:
! Read a long word and write a long word at once
! At the start of each iteration, r7 contains last long load
add #-3,r5 ! 79 EX
mov r4,r2 ! 5 MT (0 cycles latency)
mov.l @(r0,r5),r7 ! 21 LS (2 cycles latency)
add #-4,r5 ! 50 EX
add #7,r2 ! 79 EX
!
#ifdef CONFIG_CPU_LITTLE_ENDIAN
! 6 cycles, 4 bytes per iteration
3: mov.l @(r0,r5),r1 ! 21 LS (latency=2) ! NMLK
mov r7, r3 ! 5 MT (latency=0) ! RQPO
cmp/hi r2,r0 ! 57 MT
shll8 r3 ! 102 EX ! QPOx
mov r1,r6 ! 5 MT (latency=0)
shlr16 r6 ! 107 EX
shlr8 r6 ! 106 EX ! xxxN
mov r1, r7 ! 5 MT (latency=0)
or r6,r3 ! 82 EX ! QPON
bt/s 3b ! 109 BR
mov.l r3,@-r0 ! 30 LS
#else
3: mov r7,r3 ! OPQR
shlr8 r3 ! xOPQ
mov.l @(r0,r5),r7 ! KLMN
mov r7,r6
shll16 r6
shll8 r6 ! Nxxx
or r6,r3 !