aboutsummaryrefslogtreecommitdiff
path: root/arch/cris/arch-v10/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/cris/arch-v10/lib')
-rw-r--r--arch/cris/arch-v10/lib/Makefile3
-rw-r--r--arch/cris/arch-v10/lib/checksum.S8
-rw-r--r--arch/cris/arch-v10/lib/checksumcopy.S8
-rw-r--r--arch/cris/arch-v10/lib/dmacopy.c15
-rw-r--r--arch/cris/arch-v10/lib/dram_init.S99
-rw-r--r--arch/cris/arch-v10/lib/hw_settings.S14
-rw-r--r--arch/cris/arch-v10/lib/memset.c415
-rw-r--r--arch/cris/arch-v10/lib/old_checksum.c65
-rw-r--r--arch/cris/arch-v10/lib/string.c347
-rw-r--r--arch/cris/arch-v10/lib/usercopy.c322
10 files changed, 619 insertions, 677 deletions
diff --git a/arch/cris/arch-v10/lib/Makefile b/arch/cris/arch-v10/lib/Makefile
index 36e9a9c5239..725153edb76 100644
--- a/arch/cris/arch-v10/lib/Makefile
+++ b/arch/cris/arch-v10/lib/Makefile
@@ -2,8 +2,5 @@
# Makefile for Etrax-specific library files..
#
-
-EXTRA_AFLAGS := -traditional
-
lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o
diff --git a/arch/cris/arch-v10/lib/checksum.S b/arch/cris/arch-v10/lib/checksum.S
index 85c48f0a9ec..7d552f4bd5a 100644
--- a/arch/cris/arch-v10/lib/checksum.S
+++ b/arch/cris/arch-v10/lib/checksum.S
@@ -1,4 +1,4 @@
-/* $Id: checksum.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
+/*
* A fast checksum routine using movem
* Copyright (c) 1998-2001 Axis Communications AB
*
@@ -61,8 +61,6 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords
ax
addq 0,$r12
- ax ; do it again, since we might have generated a carry
- addq 0,$r12
subq 10*4,$r11
bge _mloop
@@ -88,10 +86,6 @@ _word_loop:
lsrq 16,$r13 ; r13 = checksum >> 16
and.d $r9,$r12 ; checksum = checksum & 0xffff
add.d $r13,$r12 ; checksum += r13
- move.d $r12,$r13 ; do the same again, maybe we got a carry last add
- lsrq 16,$r13
- and.d $r9,$r12
- add.d $r13,$r12
_no_fold:
cmpq 2,$r11
diff --git a/arch/cris/arch-v10/lib/checksumcopy.S b/arch/cris/arch-v10/lib/checksumcopy.S
index 35cbffb306f..540db8a5f84 100644
--- a/arch/cris/arch-v10/lib/checksumcopy.S
+++ b/arch/cris/arch-v10/lib/checksumcopy.S
@@ -1,4 +1,4 @@
-/* $Id: checksumcopy.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
+/*
* A fast checksum+copy routine using movem
* Copyright (c) 1998, 2001 Axis Communications AB
*
@@ -67,8 +67,6 @@ _mloop: movem [$r10+],$r9 ; read 10 longwords
ax
addq 0,$r13
- ax ; do it again, since we might have generated a carry
- addq 0,$r13
subq 10*4,$r12
bge _mloop
@@ -91,10 +89,6 @@ _word_loop:
lsrq 16,$r9 ; r0 = checksum >> 16
and.d 0xffff,$r13 ; checksum = checksum & 0xffff
add.d $r9,$r13 ; checksum += r0
- move.d $r13,$r9 ; do the same again, maybe we got a carry last add
- lsrq 16,$r9
- and.d 0xffff,$r13
- add.d $r9,$r13
_no_fold:
cmpq 2,$r12
diff --git a/arch/cris/arch-v10/lib/dmacopy.c b/arch/cris/arch-v10/lib/dmacopy.c
index e5fb44f505c..49f5b8ca5b4 100644
--- a/arch/cris/arch-v10/lib/dmacopy.c
+++ b/arch/cris/arch-v10/lib/dmacopy.c
@@ -1,5 +1,4 @@
-/* $Id: dmacopy.c,v 1.1 2001/12/17 13:59:27 bjornw Exp $
- *
+/*
* memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax
*/
@@ -13,11 +12,11 @@ void *dma_memcpy(void *pdst,
unsigned int pn)
{
static etrax_dma_descr indma, outdma;
-
- D(printk("dma_memcpy %d bytes... ", pn));
+
+ D(printk(KERN_DEBUG "dma_memcpy %d bytes... ", pn));
#if 0
- *R_GEN_CONFIG = genconfig_shadow =
+ *R_GEN_CONFIG = genconfig_shadow =
(genconfig_shadow & ~0x3c0000) |
IO_STATE(R_GEN_CONFIG, dma6, intdma7) |
IO_STATE(R_GEN_CONFIG, dma7, intdma6);
@@ -32,11 +31,11 @@ void *dma_memcpy(void *pdst,
*R_DMA_CH7_FIRST = &outdma;
*R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start);
*R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start);
-
- while(*R_DMA_CH7_CMD == 1) /* wait for completion */ ;
- D(printk("done\n"));
+ while (*R_DMA_CH7_CMD == 1)
+ /* wait for completion */;
+ D(printk(KERN_DEBUG "done\n"));
}
diff --git a/arch/cris/arch-v10/lib/dram_init.S b/arch/cris/arch-v10/lib/dram_init.S
index 2ef4ad5706e..e541d3d8f92 100644
--- a/arch/cris/arch-v10/lib/dram_init.S
+++ b/arch/cris/arch-v10/lib/dram_init.S
@@ -1,88 +1,30 @@
-/* $Id: dram_init.S,v 1.4 2003/09/22 09:21:59 starvik Exp $
- *
+/*
* DRAM/SDRAM initialization - alter with care
* This file is intended to be included from other assembler files
*
* Note: This file may not modify r9 because r9 is used to carry
* information from the decompresser to the kernel
*
- * Copyright (C) 2000, 2001 Axis Communications AB
- *
- * Authors: Mikael Starvik (starvik@axis.com)
- *
- * $Log: dram_init.S,v $
- * Revision 1.4 2003/09/22 09:21:59 starvik
- * Decompresser is linked to 0x407xxxxx and sdram commands are at 0x000xxxxx
- * so we need to mask off 12 bits.
- *
- * Revision 1.3 2003/03/31 09:38:37 starvik
- * Corrected calculation of end of sdram init commands
- *
- * Revision 1.2 2002/11/19 13:33:29 starvik
- * Changes from Linux 2.4
- *
- * Revision 1.13 2002/10/30 07:42:28 starvik
- * Always read SDRAM command sequence from flash
- *
- * Revision 1.12 2002/08/09 11:37:37 orjanf
- * Added double initialization work-around for Samsung SDRAMs.
- *
- * Revision 1.11 2002/06/04 11:43:21 starvik
- * Check if mrs_data is specified in kernelconfig (necessary for MCM)
- *
- * Revision 1.10 2001/10/04 12:00:21 martinnn
- * Added missing underscores.
- *
- * Revision 1.9 2001/10/01 14:47:35 bjornw
- * Added register prefixes and removed underscores
- *
- * Revision 1.8 2001/05/15 07:12:45 hp
- * Copy warning from head.S about r8 and r9
- *
- * Revision 1.7 2001/04/18 12:05:39 bjornw
- * Fixed comments, and explicitely include config.h to be sure its there
- *
- * Revision 1.6 2001/04/10 06:20:16 starvik
- * Delay should be 200us, not 200ns
- *
- * Revision 1.5 2001/04/09 06:01:13 starvik
- * Added support for 100 MHz SDRAMs
- *
- * Revision 1.4 2001/03/26 14:24:01 bjornw
- * Namechange of some config options
- *
- * Revision 1.3 2001/03/23 08:29:41 starvik
- * Corrected calculation of mrs_data
- *
- * Revision 1.2 2001/02/08 15:20:00 starvik
- * Corrected SDRAM initialization
- * Should now be included as inline
- *
- * Revision 1.1 2001/01/29 13:08:02 starvik
- * Initial version
- * This file should be included from all assembler files that needs to
- * initialize DRAM/SDRAM.
+ * Copyright (C) 2000-2012 Axis Communications AB
*
*/
/* Just to be certain the config file is included, we include it here
- * explicitely instead of depending on it being included in the file that
+ * explicitly instead of depending on it being included in the file that
* uses this code.
*/
-#include <linux/config.h>
;; WARNING! The registers r8 and r9 are used as parameters carrying
- ;; information from the decompressor (if the kernel was compressed).
+ ;; information from the decompressor (if the kernel was compressed).
;; They should not be used in the code below.
-#ifndef CONFIG_SVINTO_SIM
move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0
move.d $r0, [R_WAITSTATES]
move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0
move.d $r0, [R_BUS_CONFIG]
-
+
#ifndef CONFIG_ETRAX_SDRAM
move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0
move.d $r0, [R_DRAM_CONFIG]
@@ -93,14 +35,14 @@
;; Samsung SDRAMs seem to require to be initialized twice to work properly.
moveq 2, $r6
_sdram_init:
-
+
; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization
-
+
; Bank configuration
move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0
move.d $r0, [R_SDRAM_CONFIG]
- ; Calculate value of mrs_data
+ ; Calculate value of mrs_data
; CAS latency = 2 && bus_width = 32 => 0x40
; CAS latency = 3 && bus_width = 32 => 0x60
; CAS latency = 2 && bus_width = 16 => 0x20
@@ -111,22 +53,22 @@ _sdram_init:
and.d 0x00ff0000, $r2
bne _set_timing
lsrq 16, $r2
-
+
move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
move.d $r1, $r3
- and.d 0x03, $r1 ; Get CAS latency
+ and.d 0x03, $r1 ; Get CAS latency
and.d 0x1000, $r3 ; 50 or 100 MHz?
beq _speed_50
nop
-_speed_100:
+_speed_100:
cmp.d 0x00, $r1 ; CAS latency = 2?
beq _bw_check
nop
- or.d 0x20, $r2 ; CAS latency = 3
+ or.d 0x20, $r2 ; CAS latency = 3
ba _bw_check
nop
-_speed_50:
+_speed_50:
cmp.d 0x01, $r1 ; CAS latency = 2?
beq _bw_check
nop
@@ -141,19 +83,19 @@ _bw_check:
; Set timing parameters. Starts master clock
_set_timing:
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
- and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0
+ and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0
or.d 0x80000000, $r1 ; Make sure sdram enable bit is set
move.d $r1, $r5
or.d 0x0000c000, $r1 ; ref = disable
lslq 16, $r2 ; mrs data starts at bit 16
- or.d $r2, $r1
- move.d $r1, [R_SDRAM_TIMING]
-
+ or.d $r2, $r1
+ move.d $r1, [R_SDRAM_TIMING]
+
; Wait 200us
move.d 10000, $r2
1: bne 1b
subq 1, $r2
-
+
; Issue initialization command sequence
move.d _sdram_commands_start, $r2
and.d 0x000fffff, $r2 ; Make sure commands are read from flash
@@ -199,7 +141,6 @@ _sdram_commands_start:
.byte 2 ; refresh
.byte 0 ; nop
.byte 1 ; mrs
- .byte 0 ; nop
-_sdram_commands_end:
-#endif
+ .byte 0 ; nop
+_sdram_commands_end:
#endif
diff --git a/arch/cris/arch-v10/lib/hw_settings.S b/arch/cris/arch-v10/lib/hw_settings.S
index 56905aaa7b6..c09f19f478a 100644
--- a/arch/cris/arch-v10/lib/hw_settings.S
+++ b/arch/cris/arch-v10/lib/hw_settings.S
@@ -1,13 +1,11 @@
/*
- * $Id: hw_settings.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
- *
* This table is used by some tools to extract hardware parameters.
* The table should be included in the kernel and the decompressor.
* Don't forget to update the tools if you change this table.
*
* Copyright (C) 2001 Axis Communications AB
*
- * Authors: Mikael Starvik (starvik@axis.com)
+ * Authors: Mikael Starvik (starvik@axis.com)
*/
#define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \
@@ -15,13 +13,13 @@
#define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \
(CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \
(CONFIG_ETRAX_DEF_R_PORT_PB_DATA))
-
+
.ascii "HW_PARAM_MAGIC" ; Magic number
.dword 0xc0004000 ; Kernel start address
; Debug port
#ifdef CONFIG_ETRAX_DEBUG_PORT0
- .dword 0
+ .dword 0
#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
.dword 1
#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
@@ -30,7 +28,7 @@
.dword 3
#else
.dword 4 ; No debug
-#endif
+#endif
; SDRAM or EDO DRAM?
#ifdef CONFIG_ETRAX_SDRAM
@@ -39,7 +37,7 @@
.dword 0
#endif
- ; Register values
+ ; Register values
.dword R_WAITSTATES
.dword CONFIG_ETRAX_DEF_R_WAITSTATES
.dword R_BUS_CONFIG
@@ -56,7 +54,7 @@
.dword CONFIG_ETRAX_DEF_R_DRAM_TIMING
#endif
.dword R_PORT_PA_SET
- .dword PA_SET_VALUE
+ .dword PA_SET_VALUE
.dword R_PORT_PB_SET
.dword PB_SET_VALUE
.dword 0 ; No more register values
diff --git a/arch/cris/arch-v10/lib/memset.c b/arch/cris/arch-v10/lib/memset.c
index 82bb6683917..c94ea9b3ec2 100644
--- a/arch/cris/arch-v10/lib/memset.c
+++ b/arch/cris/arch-v10/lib/memset.c
@@ -1,252 +1,259 @@
-/*#************************************************************************#*/
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# FUNCTION NAME: memset() */
-/*# */
-/*# PARAMETERS: void* dst; Destination address. */
-/*# int c; Value of byte to write. */
-/*# int len; Number of bytes to write. */
-/*# */
-/*# RETURNS: dst. */
-/*# */
-/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
-/*# Framework taken from memcpy. This routine is */
-/*# very sensitive to compiler changes in register allocation. */
-/*# Should really be rewritten to avoid this problem. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# HISTORY */
-/*# */
-/*# DATE NAME CHANGES */
-/*# ---- ---- ------- */
-/*# 990713 HP Tired of watching this function (or */
-/*# really, the nonoptimized generic */
-/*# implementation) take up 90% of simulator */
-/*# output. Measurements needed. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-
-#include <linux/types.h>
-
-/* No, there's no macro saying 12*4, since it is "hard" to get it into
- the asm in a good way. Thus better to expose the problem everywhere.
- */
-
-/* Assuming 1 cycle per dword written or read (ok, not really true), and
- one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
- so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
-
-#define ZERO_BLOCK_SIZE (1*12*4)
-
-void *memset(void *pdst,
- int c,
- size_t plen)
+/* A memset for CRIS.
+ Copyright (C) 1999-2005 Axis Communications.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Neither the name of Axis Communications nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+ COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE. */
+
+/* FIXME: This file should really only be used for reference, as the
+ result is somewhat depending on gcc generating what we expect rather
+ than what we describe. An assembly file should be used instead. */
+
+/* Note the multiple occurrence of the expression "12*4", including the
+ asm. It is hard to get it into the asm in a good way. Thus better to
+ expose the problem everywhere: no macro. */
+
+/* Assuming one cycle per dword written or read (ok, not really true; the
+ world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
+ <= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
+ 48-byte block to set. */
+
+#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
+
+/* No name ambiguities in this file. */
+__asm__ (".syntax no_register_prefix");
+
+void *memset(void *pdst, int c, unsigned int plen)
{
- /* Ok. Now we want the parameters put in special registers.
- Make sure the compiler is able to make something useful of this. */
+ /* Now we want the parameters in special registers. Make sure the
+ compiler does something usable with this. */
register char *return_dst __asm__ ("r10") = pdst;
register int n __asm__ ("r12") = plen;
register int lc __asm__ ("r11") = c;
- /* Most apps use memset sanely. Only those memsetting about 3..4
- bytes or less get penalized compared to the generic implementation
- - and that's not really sane use. */
+ /* Most apps use memset sanely. Memsetting about 3..4 bytes or less get
+ penalized here compared to the generic implementation. */
- /* Ugh. This is fragile at best. Check with newer GCC releases, if
- they compile cascaded "x |= x << 8" sanely! */
- __asm__("movu.b %0,$r13\n\t"
- "lslq 8,$r13\n\t"
- "move.b %0,$r13\n\t"
- "move.d $r13,%0\n\t"
- "lslq 16,$r13\n\t"
- "or.d $r13,%0"
- : "=r" (lc) : "0" (lc) : "r13");
+ /* This is fragile performancewise at best. Check with newer GCC
+ releases, if they compile cascaded "x |= x << 8" to sane code. */
+ __asm__("movu.b %0,r13 \n\
+ lslq 8,r13 \n\
+ move.b %0,r13 \n\
+ move.d r13,%0 \n\
+ lslq 16,r13 \n\
+ or.d r13,%0"
+ : "=r" (lc) /* Inputs. */
+ : "0" (lc) /* Outputs. */
+ : "r13"); /* Trash. */
{
register char *dst __asm__ ("r13") = pdst;
-
- /* This is NONPORTABLE, but since this whole routine is */
- /* grossly nonportable that doesn't matter. */
- if (((unsigned long) pdst & 3) != 0
- /* Oops! n=0 must be a legal call, regardless of alignment. */
- && n >= 3)
- {
- if ((unsigned long)dst & 1)
- {
- *dst = (char) lc;
- n--;
- dst++;
- }
-
- if ((unsigned long)dst & 2)
- {
- *(short *)dst = lc;
- n -= 2;
- dst += 2;
- }
- }
+ if (((unsigned long) pdst & 3) != 0
+ /* Oops! n = 0 must be a valid call, regardless of alignment. */
+ && n >= 3)
+ {
+ if ((unsigned long) dst & 1)
+ {
+ *dst = (char) lc;
+ n--;
+ dst++;
+ }
- /* Now the fun part. For the threshold value of this, check the equation
- above. */
- /* Decide which copying method to use. */
- if (n >= ZERO_BLOCK_SIZE)
- {
- /* For large copies we use 'movem' */
-
- /* It is not optimal to tell the compiler about clobbering any
- registers; that will move the saving/restoring of those registers
- to the function prologue/epilogue, and make non-movem sizes
- suboptimal.
-
- This method is not foolproof; it assumes that the "asm reg"
- declarations at the beginning of the function really are used
- here (beware: they may be moved to temporary registers).
- This way, we do not have to save/move the registers around into
- temporaries; we can safely use them straight away.
-
- If you want to check that the allocation was right; then
- check the equalities in the first comment. It should say
- "r13=r13, r12=r12, r11=r11" */
- __asm__ volatile ("
- ;; Check that the following is true (same register names on
- ;; both sides of equal sign, as in r8=r8):
- ;; %0=r13, %1=r12, %4=r11
- ;;
- ;; Save the registers we'll clobber in the movem process
- ;; on the stack. Don't mention them to gcc, it will only be
- ;; upset.
- subq 11*4,$sp
- movem $r10,[$sp]
-
- move.d $r11,$r0
- move.d $r11,$r1
- move.d $r11,$r2
- move.d $r11,$r3
- move.d $r11,$r4
- move.d $r11,$r5
- move.d $r11,$r6
- move.d $r11,$r7
- move.d $r11,$r8
- move.d $r11,$r9
- move.d $r11,$r10
-
- ;; Now we've got this:
- ;; r13 - dst
- ;; r12 - n
-
- ;; Update n for the first loop
- subq 12*4,$r12
-0:
- subq 12*4,$r12
- bge 0b
- movem $r11,[$r13+]
-
- addq 12*4,$r12 ;; compensate for last loop underflowing n
-
- ;; Restore registers from stack
- movem [$sp+],$r10"
-
- /* Outputs */ : "=r" (dst), "=r" (n)
- /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
-
- }
+ if ((unsigned long) dst & 2)
+ {
+ *(short *) dst = lc;
+ n -= 2;
+ dst += 2;
+ }
+ }
+
+ /* Decide which setting method to use. */
+ if (n >= MEMSET_BY_BLOCK_THRESHOLD)
+ {
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-block sizes
+ suboptimal. */
+ __asm__ volatile
+ ("\
+ ;; GCC does promise correct register allocations, but let's \n\
+ ;; make sure it keeps its promises. \n\
+ .ifnc %0-%1-%4,$r13-$r12-$r11 \n\
+ .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll clobber in the movem process \n\
+ ;; on the stack. Don't mention them to gcc, it will only be \n\
+ ;; upset. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ move.d r11,r0 \n\
+ move.d r11,r1 \n\
+ move.d r11,r2 \n\
+ move.d r11,r3 \n\
+ move.d r11,r4 \n\
+ move.d r11,r5 \n\
+ move.d r11,r6 \n\
+ move.d r11,r7 \n\
+ move.d r11,r8 \n\
+ move.d r11,r9 \n\
+ move.d r11,r10 \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 12*4,r12 \n\
+0: \n\
+"
+#ifdef __arch_common_v10_v32
+ /* Cater to branch offset difference between v32 and v10. We
+ assume the branch below has an 8-bit offset. */
+" setf\n"
+#endif
+" subq 12*4,r12 \n\
+ bge 0b \n\
+ movem r11,[r13+] \n\
+ \n\
+ ;; Compensate for last loop underflowing n. \n\
+ addq 12*4,r12 \n\
+ \n\
+ ;; Restore registers from stack. \n\
+ movem [sp+],r10"
+
+ /* Outputs. */
+ : "=r" (dst), "=r" (n)
+
+ /* Inputs. */
+ : "0" (dst), "1" (n), "r" (lc));
+ }
+
+ /* An ad-hoc unroll, used for 4*12-1..16 bytes. */
+ while (n >= 16)
+ {
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ n -= 16;
+ }
- /* Either we directly starts copying, using dword copying
- in a loop, or we copy as much as possible with 'movem'
- and then the last block (<44 bytes) is copied here.
- This will work since 'movem' will have updated src,dst,n. */
-
- while ( n >= 16 )
- {
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- n -= 16;
- }
-
- /* A switch() is definitely the fastest although it takes a LOT of code.
- * Particularly if you inline code this.
- */
switch (n)
- {
+ {
case 0:
break;
+
case 1:
- *(char*)dst = (char) lc;
+ *dst = (char) lc;
break;
+
case 2:
- *(short*)dst = (short) lc;
+ *(short *) dst = (short) lc;
break;
+
case 3:
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 4:
- *((long*)dst)++ = lc;
+ *(long *) dst = lc;
break;
+
case 5:
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 6:
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 7:
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 8:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc;
break;
+
case 9:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 10:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 11:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 12:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc;
break;
+
case 13:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 14:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 15:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
- }
+ }
}
- return return_dst; /* destination pointer. */
-} /* memset() */
+ return return_dst;
+}
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c
index 22a6f0aa9ce..8f79163f139 100644
--- a/arch/cris/arch-v10/lib/old_checksum.c
+++ b/arch/cris/arch-v10/lib/old_checksum.c
@@ -1,5 +1,4 @@
-/* $Id: old_checksum.c,v 1.3 2003/10/27 08:04:32 starvik Exp $
- *
+/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
@@ -47,39 +46,41 @@
#include <asm/delay.h>
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+__wsum csum_partial(const void *p, int len, __wsum __sum)
{
- /*
- * Experiments with ethernet and slip connections show that buff
- * is aligned on either a 2-byte or 4-byte boundary.
- */
- const unsigned char *endMarker = buff + len;
- const unsigned char *marker = endMarker - (len % 16);
+ u32 sum = (__force u32)__sum;
+ const u16 *buff = p;
+ /*
+ * Experiments with ethernet and slip connections show that buff
+ * is aligned on either a 2-byte or 4-byte boundary.
+ */
+ const void *endMarker = p + len;
+ const void *marker = endMarker - (len % 16);
#if 0
- if((int)buff & 0x3)
- printk("unaligned buff %p\n", buff);
- __delay(900); /* extra delay of 90 us to test performance hit */
+ if((int)buff & 0x3)
+ printk("unaligned buff %p\n", buff);
+ __delay(900); /* extra delay of 90 us to test performance hit */
#endif
- BITON;
- while (buff < marker) {
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- sum += *((unsigned short *)buff)++;
- }
- marker = endMarker - (len % 2);
- while(buff < marker) {
- sum += *((unsigned short *)buff)++;
- }
- if(endMarker - buff > 0) {
- sum += *buff; /* add extra byte seperately */
- }
- BITOFF;
- return(sum);
+ BITON;
+ while (buff < marker) {
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ sum += *buff++;
+ }
+ marker = endMarker - (len % 2);
+ while (buff < marker)
+ sum += *buff++;
+
+ if (endMarker > buff)
+ sum += *(const u8 *)buff; /* add extra byte separately */
+
+ BITOFF;
+ return (__force __wsum)sum;
}
EXPORT_SYMBOL(csum_partial);
diff --git a/arch/cris/arch-v10/lib/string.c b/arch/cris/arch-v10/lib/string.c
index 8ffde4901b5..c7bd6ebdc93 100644
--- a/arch/cris/arch-v10/lib/string.c
+++ b/arch/cris/arch-v10/lib/string.c
@@ -1,55 +1,59 @@
-/*#************************************************************************#*/
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# FUNCTION NAME: memcpy() */
-/*# */
-/*# PARAMETERS: void* dst; Destination address. */
-/*# void* src; Source address. */
-/*# int len; Number of bytes to copy. */
-/*# */
-/*# RETURNS: dst. */
-/*# */
-/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
-/*# about copying of overlapping memory areas. This routine is */
-/*# very sensitive to compiler changes in register allocation. */
-/*# Should really be rewritten to avoid this problem. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# HISTORY */
-/*# */
-/*# DATE NAME CHANGES */
-/*# ---- ---- ------- */
-/*# 941007 Kenny R Creation */
-/*# 941011 Kenny R Lots of optimizations and inlining. */
-/*# 941129 Ulf A Adapted for use in libc. */
-/*# 950216 HP N==0 forgotten if non-aligned src/dst. */
-/*# Added some optimizations. */
-/*# 001025 HP Make src and dst char *. Align dst to */
-/*# dword, not just word-if-both-src-and-dst- */
-/*# are-misaligned. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-
-#include <linux/types.h>
-
-void *memcpy(void *pdst,
- const void *psrc,
- size_t pn)
+/* A memcpy for CRIS.
+ Copyright (C) 1994-2005 Axis Communications.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Neither the name of Axis Communications nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+ COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE. */
+
+/* FIXME: This file should really only be used for reference, as the
+ result is somewhat depending on gcc generating what we expect rather
+ than what we describe. An assembly file should be used instead. */
+
+#include <stddef.h>
+
+/* Break even between movem and move16 is really at 38.7 * 2, but
+ modulo 44, so up to the next multiple of 44, we use ordinary code. */
+#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
+
+/* No name ambiguities in this file. */
+__asm__ (".syntax no_register_prefix");
+
+void *
+memcpy(void *pdst, const void *psrc, size_t pn)
{
- /* Ok. Now we want the parameters put in special registers.
+ /* Now we want the parameters put in special registers.
Make sure the compiler is able to make something useful of this.
- As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+ As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
If gcc was allright, it really would need no temporaries, and no
- stack space to save stuff on. */
+ stack space to save stuff on. */
register void *return_dst __asm__ ("r10") = pdst;
- register char *dst __asm__ ("r13") = pdst;
- register const char *src __asm__ ("r11") = psrc;
+ register unsigned char *dst __asm__ ("r13") = pdst;
+ register unsigned const char *src __asm__ ("r11") = psrc;
register int n __asm__ ("r12") = pn;
-
-
+
/* When src is aligned but not dst, this makes a few extra needless
cycles. I believe it would take as many to check that the
re-alignment was unnecessary. */
@@ -59,167 +63,174 @@ void *memcpy(void *pdst,
&& n >= 3)
{
if ((unsigned long) dst & 1)
- {
- n--;
- *(char*)dst = *(char*)src;
- src++;
- dst++;
- }
+ {
+ n--;
+ *dst = *src;
+ src++;
+ dst++;
+ }
if ((unsigned long) dst & 2)
- {
- n -= 2;
- *(short*)dst = *(short*)src;
- src += 2;
- dst += 2;
- }
+ {
+ n -= 2;
+ *(short *) dst = *(short *) src;
+ src += 2;
+ dst += 2;
+ }
}
- /* Decide which copying method to use. */
- if (n >= 44*2) /* Break even between movem and
- move16 is at 38.7*2, but modulo 44. */
- {
- /* For large copies we use 'movem' */
-
- /* It is not optimal to tell the compiler about clobbering any
- registers; that will move the saving/restoring of those registers
- to the function prologue/epilogue, and make non-movem sizes
- suboptimal.
-
- This method is not foolproof; it assumes that the "asm reg"
- declarations at the beginning of the function really are used
- here (beware: they may be moved to temporary registers).
- This way, we do not have to save/move the registers around into
- temporaries; we can safely use them straight away.
-
- If you want to check that the allocation was right; then
- check the equalities in the first comment. It should say
- "r13=r13, r11=r11, r12=r12" */
- __asm__ volatile ("
- ;; Check that the following is true (same register names on
- ;; both sides of equal sign, as in r8=r8):
- ;; %0=r13, %1=r11, %2=r12
- ;;
- ;; Save the registers we'll use in the movem process
- ;; on the stack.
- subq 11*4,$sp
- movem $r10,[$sp]
-
- ;; Now we've got this:
- ;; r11 - src
- ;; r13 - dst
- ;; r12 - n
-
- ;; Update n for the first loop
- subq 44,$r12
-0:
- movem [$r11+],$r10
- subq 44,$r12
- bge 0b
- movem $r10,[$r13+]
-
- addq 44,$r12 ;; compensate for last loop underflowing n
-
- ;; Restore registers from stack
- movem [$sp+],$r10"
-
- /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
- /* Inputs */ : "0" (dst), "1" (src), "2" (n));
-
- }
+ /* Decide which copying method to use. */
+ if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
+ {
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-movem sizes
+ suboptimal. */
+ __asm__ volatile
+ ("\
+ ;; GCC does promise correct register allocations, but let's \n\
+ ;; make sure it keeps its promises. \n\
+ .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
+ .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll use in the movem process \n\
+ ;; on the stack. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r11 - src \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop. \n\
+ subq 44,r12 \n\
+0: \n\
+"
+#ifdef __arch_common_v10_v32
+ /* Cater to branch offset difference between v32 and v10. We
+ assume the branch below has an 8-bit offset. */
+" setf\n"
+#endif
+" movem [r11+],r10 \n\
+ subq 44,r12 \n\
+ bge 0b \n\
+ movem r10,[r13+] \n\
+ \n\
+ ;; Compensate for last loop underflowing n. \n\
+ addq 44,r12 \n\
+ \n\
+ ;; Restore registers from stack. \n\
+ movem [sp+],r10"
+
+ /* Outputs. */
+ : "=r" (dst), "=r" (src), "=r" (n)
+
+ /* Inputs. */
+ : "0" (dst), "1" (src), "2" (n));
+ }
- /* Either we directly starts copying, using dword copying
- in a loop, or we copy as much as possible with 'movem'
- and then the last block (<44 bytes) is copied here.
- This will work since 'movem' will have updated src,dst,n. */
+ while (n >= 16)
+ {
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
- while ( n >= 16 )
- {
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- n -= 16;
- }
+ n -= 16;
+ }
- /* A switch() is definitely the fastest although it takes a LOT of code.
- * Particularly if you inline code this.
- */
switch (n)
- {
+ {
case 0:
break;
+
case 1:
- *(char*)dst = *(char*)src;
+ *dst = *src;
break;
+
case 2:
- *(short*)dst = *(short*)src;
+ *(short *) dst = *(short *) src;
break;
+
case 3:
- *((short*)dst)++ = *((short*)src)++;
- *(char*)dst = *(char*)src;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 4:
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src;
break;
+
case 5:
- *((long*)dst)++ = *((long*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 6:
- *((long*)dst)++ = *((long*)src)++;
- *(short*)dst = *(short*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 7:
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 8:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src;
break;
+
case 9:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 10:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *(short*)dst = *(short*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 11:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 12:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src;
break;
+
case 13:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 14:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *(short*)dst = *(short*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 15:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *(char*)dst = *(char*)src;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
- }
+ }
- return return_dst; /* destination pointer. */
-} /* memcpy() */
+ return return_dst;
+}
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index 43778d53c25..b0a608da7bd 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -38,7 +38,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
FIXME: Comment for old gcc version. Check.
- If gcc was allright, it really would need no temporaries, and no
+ If gcc was alright, it really would need no temporaries, and no
stack space to save stuff on. */
register char *dst __asm__ ("r13") = pdst;
@@ -92,58 +92,58 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
.err \n\
.endif \n\
-
- ;; Save the registers we'll use in the movem process
- ;; on the stack.
- subq 11*4,$sp
- movem $r10,[$sp]
-
- ;; Now we've got this:
- ;; r11 - src
- ;; r13 - dst
- ;; r12 - n
-
- ;; Update n for the first loop
- subq 44,$r12
-
-; Since the noted PC of a faulting instruction in a delay-slot of a taken
-; branch, is that of the branch target, we actually point at the from-movem
-; for this case. There is no ambiguity here; if there was a fault in that
-; instruction (meaning a kernel oops), the faulted PC would be the address
-; after *that* movem.
-
-0:
- movem [$r11+],$r10
- subq 44,$r12
- bge 0b
- movem $r10,[$r13+]
-1:
- addq 44,$r12 ;; compensate for last loop underflowing n
-
- ;; Restore registers from stack
- movem [$sp+],$r10
-2:
- .section .fixup,\"ax\"
-
-; To provide a correct count in r10 of bytes that failed to be copied,
-; we jump back into the loop if the loop-branch was taken. There is no
-; performance penalty for sany use; the program will segfault soon enough.
-
-3:
- move.d [$sp],$r10
- addq 44,$r10
- move.d $r10,[$sp]
- jump 0b
-4:
- movem [$sp+],$r10
- addq 44,$r10
- addq 44,$r12
- jump 2b
-
- .previous
- .section __ex_table,\"a\"
- .dword 0b,3b
- .dword 1b,4b
+ \n\
+ ;; Save the registers we'll use in the movem process \n\
+ ;; on the stack. \n\
+ subq 11*4,$sp \n\
+ movem $r10,[$sp] \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r11 - src \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 44,$r12 \n\
+ \n\
+; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\
+; branch, is that of the branch target, we actually point at the from-movem \n\
+; for this case. There is no ambiguity here; if there was a fault in that \n\
+; instruction (meaning a kernel oops), the faulted PC would be the address \n\
+; after *that* movem. \n\
+ \n\
+0: \n\
+ movem [$r11+],$r10 \n\
+ subq 44,$r12 \n\
+ bge 0b \n\
+ movem $r10,[$r13+] \n\
+1: \n\
+ addq 44,$r12 ;; compensate for last loop underflowing n \n\
+ \n\
+ ;; Restore registers from stack \n\
+ movem [$sp+],$r10 \n\
+2: \n\
+ .section .fixup,\"ax\" \n\
+ \n\
+; To provide a correct count in r10 of bytes that failed to be copied, \n\
+; we jump back into the loop if the loop-branch was taken. There is no \n\
+; performance penalty for sany use; the program will segfault soon enough.\n\
+ \n\
+3: \n\
+ move.d [$sp],$r10 \n\
+ addq 44,$r10 \n\
+ move.d $r10,[$sp] \n\
+ jump 0b \n\
+4: \n\
+ movem [$sp+],$r10 \n\
+ addq 44,$r10 \n\
+ addq 44,$r12 \n\
+ jump 2b \n\
+ \n\
+ .previous \n\
+ .section __ex_table,\"a\" \n\
+ .dword 0b,3b \n\
+ .dword 1b,4b \n\
.previous"
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
@@ -193,14 +193,14 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
inaccessible. */
unsigned long
-__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
+__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
{
/* We want the parameters put in special registers.
Make sure the compiler is able to make something useful of this.
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
FIXME: Comment for old gcc version. Check.
- If gcc was allright, it really would need no temporaries, and no
+ If gcc was alright, it really would need no temporaries, and no
stack space to save stuff on. */
register char *dst __asm__ ("r13") = pdst;
@@ -253,59 +253,59 @@ __copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
If you want to check that the allocation was right; then
check the equalities in the first comment. It should say
"r13=r13, r11=r11, r12=r12" */
- __asm__ volatile ("
+ __asm__ volatile ("\n\
.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
.err \n\
.endif \n\
-
- ;; Save the registers we'll use in the movem process
- ;; on the stack.
- subq 11*4,$sp
- movem $r10,[$sp]
-
- ;; Now we've got this:
- ;; r11 - src
- ;; r13 - dst
- ;; r12 - n
-
- ;; Update n for the first loop
- subq 44,$r12
-0:
- movem [$r11+],$r10
-1:
- subq 44,$r12
- bge 0b
- movem $r10,[$r13+]
-
- addq 44,$r12 ;; compensate for last loop underflowing n
-
- ;; Restore registers from stack
- movem [$sp+],$r10
-4:
- .section .fixup,\"ax\"
-
-;; Do not jump back into the loop if we fail. For some uses, we get a
-;; page fault somewhere on the line. Without checking for page limits,
-;; we don't know where, but we need to copy accurately and keep an
-;; accurate count; not just clear the whole line. To do that, we fall
-;; down in the code below, proceeding with smaller amounts. It should
-;; be kept in mind that we have to cater to code like what at one time
-;; was in fs/super.c:
-;; i = size - copy_from_user((void *)page, data, size);
-;; which would cause repeated faults while clearing the remainder of
-;; the SIZE bytes at PAGE after the first fault.
-;; A caveat here is that we must not fall through from a failing page
-;; to a valid page.
-
-3:
- movem [$sp+],$r10
- addq 44,$r12 ;; Get back count before faulting point.
- subq 44,$r11 ;; Get back pointer to faulting movem-line.
- jump 4b ;; Fall through, pretending the fault didn't happen.
-
- .previous
- .section __ex_table,\"a\"
- .dword 1b,3b
+ \n\
+ ;; Save the registers we'll use in the movem process \n\
+ ;; on the stack. \n\
+ subq 11*4,$sp \n\
+ movem $r10,[$sp] \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r11 - src \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 44,$r12 \n\
+0: \n\
+ movem [$r11+],$r10 \n\
+1: \n\
+ subq 44,$r12 \n\
+ bge 0b \n\
+ movem $r10,[$r13+] \n\
+ \n\
+ addq 44,$r12 ;; compensate for last loop underflowing n \n\
+ \n\
+ ;; Restore registers from stack \n\
+ movem [$sp+],$r10 \n\
+4: \n\
+ .section .fixup,\"ax\" \n\
+ \n\
+;; Do not jump back into the loop if we fail. For some uses, we get a \n\
+;; page fault somewhere on the line. Without checking for page limits, \n\
+;; we don't know where, but we need to copy accurately and keep an \n\
+;; accurate count; not just clear the whole line. To do that, we fall \n\
+;; down in the code below, proceeding with smaller amounts. It should \n\
+;; be kept in mind that we have to cater to code like what at one time \n\
+;; was in fs/super.c: \n\
+;; i = size - copy_from_user((void *)page, data, size); \n\
+;; which would cause repeated faults while clearing the remainder of \n\
+;; the SIZE bytes at PAGE after the first fault. \n\
+;; A caveat here is that we must not fall through from a failing page \n\
+;; to a valid page. \n\
+ \n\
+3: \n\
+ movem [$sp+],$r10 \n\
+ addq 44,$r12 ;; Get back count before faulting point. \n\
+ subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\
+ jump 4b ;; Fall through, pretending the fault didn't happen.\n\
+ \n\
+ .previous \n\
+ .section __ex_table,\"a\" \n\
+ .dword 1b,3b \n\
.previous"
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
@@ -380,7 +380,7 @@ __do_clear_user (void __user *pto, unsigned long pn)
As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
FIXME: Comment for old gcc version. Check.
- If gcc was allright, it really would need no temporaries, and no
+ If gcc was alright, it really would need no temporaries, and no
stack space to save stuff on. */
register char *dst __asm__ ("r13") = pto;
@@ -425,64 +425,64 @@ __do_clear_user (void __user *pto, unsigned long pn)
If you want to check that the allocation was right; then
check the equalities in the first comment. It should say
something like "r13=r13, r11=r11, r12=r12". */
- __asm__ volatile ("
+ __asm__ volatile ("\n\
.ifnc %0%1%2,$r13$r12$r10 \n\
.err \n\
.endif \n\
-
- ;; Save the registers we'll clobber in the movem process
- ;; on the stack. Don't mention them to gcc, it will only be
- ;; upset.
- subq 11*4,$sp
- movem $r10,[$sp]
-
- clear.d $r0
- clear.d $r1
- clear.d $r2
- clear.d $r3
- clear.d $r4
- clear.d $r5
- clear.d $r6
- clear.d $r7
- clear.d $r8
- clear.d $r9
- clear.d $r10
- clear.d $r11
-
- ;; Now we've got this:
- ;; r13 - dst
- ;; r12 - n
-
- ;; Update n for the first loop
- subq 12*4,$r12
-0:
- subq 12*4,$r12
- bge 0b
- movem $r11,[$r13+]
-1:
- addq 12*4,$r12 ;; compensate for last loop underflowing n
-
- ;; Restore registers from stack
- movem [$sp+],$r10
-2:
- .section .fixup,\"ax\"
-3:
- move.d [$sp],$r10
- addq 12*4,$r10
- move.d $r10,[$sp]
- clear.d $r10
- jump 0b
-
-4:
- movem [$sp+],$r10
- addq 12*4,$r10
- addq 12*4,$r12
- jump 2b
-
- .previous
- .section __ex_table,\"a\"
- .dword 0b,3b
- .dword 1b,4b
+ \n\
+ ;; Save the registers we'll clobber in the movem process \n\
+ ;; on the stack. Don't mention them to gcc, it will only be \n\
+ ;; upset. \n\
+ subq 11*4,$sp \n\
+ movem $r10,[$sp] \n\
+ \n\
+ clear.d $r0 \n\
+ clear.d $r1 \n\
+ clear.d $r2 \n\
+ clear.d $r3 \n\
+ clear.d $r4 \n\
+ clear.d $r5 \n\
+ clear.d $r6 \n\
+ clear.d $r7 \n\
+ clear.d $r8 \n\
+ clear.d $r9 \n\
+ clear.d $r10 \n\
+ clear.d $r11 \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 12*4,$r12 \n\
+0: \n\
+ subq 12*4,$r12 \n\
+ bge 0b \n\
+ movem $r11,[$r13+] \n\
+1: \n\
+ addq 12*4,$r12 ;; compensate for last loop underflowing n\n\
+ \n\
+ ;; Restore registers from stack \n\
+ movem [$sp+],$r10 \n\
+2: \n\
+ .section .fixup,\"ax\" \n\
+3: \n\
+ move.d [$sp],$r10 \n\
+ addq 12*4,$r10 \n\
+ move.d $r10,[$sp] \n\
+ clear.d $r10 \n\
+ jump 0b \n\
+ \n\
+4: \n\
+ movem [$sp+],$r10 \n\
+ addq 12*4,$r10 \n\
+ addq 12*4,$r12 \n\
+ jump 2b \n\
+ \n\
+ .previous \n\
+ .section __ex_table,\"a\" \n\
+ .dword 0b,3b \n\
+ .dword 1b,4b \n\
.previous"
/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)