diff options
Diffstat (limited to 'arch/mips/cavium-octeon/octeon-memcpy.S')
| -rw-r--r-- | arch/mips/cavium-octeon/octeon-memcpy.S | 65 |
1 files changed, 23 insertions, 42 deletions
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 88e0cddca20..64e08df51d6 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -79,11 +79,6 @@ /* * Only on the 64-bit kernel we can made use of 64-bit registers. */ -#ifdef CONFIG_64BIT -#define USE_DOUBLE -#endif - -#ifdef USE_DOUBLE #define LOAD ld #define LOADL ldl @@ -119,37 +114,17 @@ #define t6 $14 #define t7 $15 -#else - -#define LOAD lw -#define LOADL lwl -#define LOADR lwr -#define STOREL swl -#define STORER swr -#define STORE sw -#define ADD addu -#define SUB subu -#define SRL srl -#define SLL sll -#define SRA sra -#define SLLV sllv -#define SRLV srlv -#define NBYTES 4 -#define LOG_NBYTES 2 - -#endif /* USE_DOUBLE */ - #ifdef CONFIG_CPU_LITTLE_ENDIAN #define LDFIRST LOADR -#define LDREST LOADL +#define LDREST LOADL #define STFIRST STORER -#define STREST STOREL +#define STREST STOREL #define SHIFT_DISCARD SLLV #else #define LDFIRST LOADL -#define LDREST LOADR +#define LDREST LOADR #define STFIRST STOREL -#define STREST STORER +#define STREST STORER #define SHIFT_DISCARD SRLV #endif @@ -164,6 +139,14 @@ .set noat /* + * t7 is used as a flag to note inatomic mode. + */ +LEAF(__copy_user_inatomic) + b __copy_user_common + li t7, 1 + END(__copy_user_inatomic) + +/* * A combined memcpy/__copy_user * __copy_user sets len to 0 for success; else to an upper bound of * the number of uncopied bytes. @@ -174,6 +157,8 @@ LEAF(memcpy) /* a0=dst a1=src a2=len */ move v0, dst /* return value */ __memcpy: FEXPORT(__copy_user) + li t7, 0 /* not inatomic */ +__copy_user_common: /* * Note: dst & src may be unaligned, len may be 0 * Temps @@ -331,9 +316,9 @@ EXC( STORE t0, -8(dst), s_exc_p1u) src_unaligned: #define rem t8 - SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter + SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter beqz t0, cleanup_src_unaligned - and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES + and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 1: /* * Avoid consecutive LD*'s to the same register since some mips @@ -341,13 +326,13 @@ src_unaligned: * It's OK to load FIRST(N+1) before REST(N) because the two addresses * are to the same unit (unless src is aligned, but it's not). */ -EXC( LDFIRST t0, FIRST(0)(src), l_exc) -EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) - SUB len, len, 4*NBYTES +EXC( LDFIRST t0, FIRST(0)(src), l_exc) +EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) + SUB len, len, 4*NBYTES EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t1, REST(1)(src), l_exc_copy) -EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) -EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) +EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) +EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) EXC( LDREST t2, REST(2)(src), l_exc_copy) EXC( LDREST t3, REST(3)(src), l_exc_copy) ADD src, src, 4*NBYTES @@ -385,12 +370,10 @@ EXC( sb t0, N(dst), s_exc_p1) COPY_BYTE(0) COPY_BYTE(1) -#ifdef USE_DOUBLE COPY_BYTE(2) COPY_BYTE(3) COPY_BYTE(4) COPY_BYTE(5) -#endif EXC( lb t0, NBYTES-2(src), l_exc) SUB len, len, 1 jr ra @@ -412,7 +395,6 @@ l_exc_copy: * Assumes src < THREAD_BUADDR($28) */ LOAD t0, TI_TASK($28) - nop LOAD t0, THREAD_BUADDR(t0) 1: EXC( lb t1, 0(src), l_exc) @@ -422,10 +404,9 @@ EXC( lb t1, 0(src), l_exc) ADD dst, dst, 1 l_exc: LOAD t0, TI_TASK($28) - nop LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address - nop SUB len, AT, t0 # len number of uncopied bytes + bnez t7, 2f /* Skip the zeroing out part if inatomic */ /* * Here's where we rely on src and dst being incremented in tandem, * See (3) above. @@ -443,7 +424,7 @@ l_exc: ADD dst, dst, 1 bnez src, 1b SUB src, src, 1 - jr ra +2: jr ra nop |
