aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/Makefile33
-rw-r--r--arch/arm/lib/ashldi3.S7
-rw-r--r--arch/arm/lib/ashrdi3.S7
-rw-r--r--arch/arm/lib/backtrace.S179
-rw-r--r--arch/arm/lib/bitops.h93
-rw-r--r--arch/arm/lib/bswapsdi2.S36
-rw-r--r--arch/arm/lib/call_with_stack.S44
-rw-r--r--arch/arm/lib/changebit.S8
-rw-r--r--arch/arm/lib/clear_user.S32
-rw-r--r--arch/arm/lib/clearbit.S9
-rw-r--r--arch/arm/lib/copy_from_user.S29
-rw-r--r--arch/arm/lib/copy_page.S19
-rw-r--r--arch/arm/lib/copy_template.S74
-rw-r--r--arch/arm/lib/copy_to_user.S31
-rw-r--r--arch/arm/lib/csumipv6.S3
-rw-r--r--arch/arm/lib/csumpartial.S33
-rw-r--r--arch/arm/lib/csumpartialcopy.S7
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S169
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S63
-rw-r--r--arch/arm/lib/delay-loop.S68
-rw-r--r--arch/arm/lib/delay.S58
-rw-r--r--arch/arm/lib/delay.c93
-rw-r--r--arch/arm/lib/div64.S17
-rw-r--r--arch/arm/lib/ecard.S5
-rw-r--r--arch/arm/lib/findbit.S76
-rw-r--r--arch/arm/lib/getuser.S58
-rw-r--r--arch/arm/lib/io-acorn.S8
-rw-r--r--arch/arm/lib/io-readsb.S31
-rw-r--r--arch/arm/lib/io-readsl.S13
-rw-r--r--arch/arm/lib/io-readsw-armv3.S37
-rw-r--r--arch/arm/lib/io-readsw-armv4.S25
-rw-r--r--arch/arm/lib/io-shark.c13
-rw-r--r--arch/arm/lib/io-writesb.S32
-rw-r--r--arch/arm/lib/io-writesl.S13
-rw-r--r--arch/arm/lib/io-writesw-armv3.S37
-rw-r--r--arch/arm/lib/io-writesw-armv4.S29
-rw-r--r--arch/arm/lib/lib1funcs.S63
-rw-r--r--arch/arm/lib/lshrdi3.S7
-rw-r--r--arch/arm/lib/memchr.S3
-rw-r--r--arch/arm/lib/memcpy.S8
-rw-r--r--arch/arm/lib/memmove.S83
-rw-r--r--arch/arm/lib/memset.S113
-rw-r--r--arch/arm/lib/memzero.S49
-rw-r--r--arch/arm/lib/muldi3.S7
-rw-r--r--arch/arm/lib/putuser.S64
-rw-r--r--arch/arm/lib/setbit.S9
-rw-r--r--arch/arm/lib/sha1.S206
-rw-r--r--arch/arm/lib/strchr.S3
-rw-r--r--arch/arm/lib/strncpy_from_user.S43
-rw-r--r--arch/arm/lib/strnlen_user.S40
-rw-r--r--arch/arm/lib/strrchr.S3
-rw-r--r--arch/arm/lib/testchangebit.S5
-rw-r--r--arch/arm/lib/testclearbit.S5
-rw-r--r--arch/arm/lib/testsetbit.S5
-rw-r--r--arch/arm/lib/uaccess.S519
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c270
-rw-r--r--arch/arm/lib/ucmpdi2.S17
-rw-r--r--arch/arm/lib/xor-neon.c46
58 files changed, 1763 insertions, 1294 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 391f3ab3ff3..0573faab96a 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,28 +6,34 @@
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
- copy_page.o delay.o findbit.o memchr.o memcpy.o \
+ delay.o delay-loop.o findbit.o memchr.o memcpy.o \
memmove.o memset.o memzero.o setbit.o \
- strncpy_from_user.o strnlen_user.o \
strchr.o strrchr.o \
testchangebit.o testclearbit.o testsetbit.o \
- getuser.o putuser.o clear_user.o \
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
- ucmpdi2.o lib1funcs.o div64.o sha1.o \
- io-readsb.o io-writesb.o io-readsl.o io-writesl.o
+ ucmpdi2.o lib1funcs.o div64.o \
+ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
+ call_with_stack.o bswapsdi2.o
+
+mmu-y := clear_user.o copy_page.o getuser.o putuser.o
# the code in uaccess.S is not preemption safe and
# probably faster on ARMv3 only
-ifeq ($CONFIG_PREEMPT,y)
- lib-y += copy_from_user.o copy_to_user.o
+ifeq ($(CONFIG_PREEMPT),y)
+ mmu-y += copy_from_user.o copy_to_user.o
else
ifneq ($(CONFIG_CPU_32v3),y)
- lib-y += copy_from_user.o copy_to_user.o
+ mmu-y += copy_from_user.o copy_to_user.o
else
- lib-y += uaccess.o
+ mmu-y += uaccess.o
endif
endif
+# using lib_ here won't override already available weak symbols
+obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
+
+lib-$(CONFIG_MMU) += $(mmu-y)
+
ifeq ($(CONFIG_CPU_32v3),y)
lib-y += io-readsw-armv3.o io-writesw-armv3.o
else
@@ -35,9 +41,12 @@ else
endif
lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o
-lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o
-lib-$(CONFIG_ARCH_L7200) += io-acorn.o
-lib-$(CONFIG_ARCH_SHARK) += io-shark.o
$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
+
+ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
+ NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon
+ CFLAGS_xor-neon.o += $(NEON_FLAGS)
+ obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
+endif
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 561e20717b3..638deb13da1 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */
#endif
ENTRY(__ashldi3)
+ENTRY(__aeabi_llsl)
subs r3, r2, #32
rsb ip, r2, #32
movmi ah, ah, lsl r2
movpl ah, al, lsl r3
- orrmi ah, ah, al, lsr ip
+ ARM( orrmi ah, ah, al, lsr ip )
+ THUMB( lsrmi r3, al, ip )
+ THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
mov pc, lr
+ENDPROC(__ashldi3)
+ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 86fb2a90c30..015e8aa5a1d 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */
#endif
ENTRY(__ashrdi3)
+ENTRY(__aeabi_lasr)
subs r3, r2, #32
rsb ip, r2, #32
movmi al, al, lsr r2
movpl al, ah, asr r3
- orrmi al, al, ah, lsl ip
+ ARM( orrmi al, al, ah, lsl ip )
+ THUMB( lslmi r3, ah, ip )
+ THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
mov pc, lr
+ENDPROC(__ashrdi3)
+ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index 68a21c0f3f5..4102be617fc 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -10,7 +10,6 @@
* 27/03/03 Ian Molton Clean up CONFIG_CPU
*
*/
-#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
@@ -18,105 +17,100 @@
@ fp is 0 or stack frame
#define frame r4
-#define next r5
-#define save r6
+#define sv_fp r5
+#define sv_pc r6
#define mask r7
#define offset r8
-ENTRY(__backtrace)
- mov r1, #0x10
- mov r0, fp
-
ENTRY(c_backtrace)
-#ifndef CONFIG_FRAME_POINTER
+#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
mov pc, lr
+ENDPROC(c_backtrace)
#else
-
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
- tst r1, #0x10 @ 26 or 32-bit?
- moveq mask, #0xfc000003
- movne mask, #0
- tst mask, r0
- movne r0, #0
- movs frame, r0
-1: moveq r0, #-2
- LOADREGS(eqfd, sp!, {r4 - r8, pc})
-
-2: stmfd sp!, {pc} @ calculate offset of PC in STMIA instruction
- ldr r0, [sp], #4
- adr r1, 2b - 4
+ movs frame, r0 @ if frame pointer is zero
+ beq no_frame @ we have no stack frames
+
+ tst r1, #0x10 @ 26 or 32-bit mode?
+ ARM( moveq mask, #0xfc000003 )
+ THUMB( moveq mask, #0xfc000000 )
+ THUMB( orreq mask, #0x03 )
+ movne mask, #0 @ mask for 32-bit
+
+1: stmfd sp!, {pc} @ calculate offset of PC stored
+ ldr r0, [sp], #4 @ by stmfd for this CPU
+ adr r1, 1b
sub offset, r0, r1
-3: tst frame, mask @ Check for address exceptions...
- bne 1b
+/*
+ * Stack frame layout:
+ * optionally saved caller registers (r4 - r10)
+ * saved fp
+ * saved sp
+ * saved lr
+ * frame => saved pc
+ * optionally saved arguments (r0 - r3)
+ * saved sp => <next word>
+ *
+ * Functions start with the following code sequence:
+ * mov ip, sp
+ * stmfd sp!, {r0 - r3} (optional)
+ * corrected pc => stmfd sp!, {..., fp, ip, lr, pc}
+ */
+for_each_frame: tst frame, mask @ Check for address exceptions
+ bne no_frame
+
+1001: ldr sv_pc, [frame, #0] @ get saved pc
+1002: ldr sv_fp, [frame, #-12] @ get saved fp
-1001: ldr next, [frame, #-12] @ get fp
-1002: ldr r2, [frame, #-4] @ get lr
-1003: ldr r3, [frame, #0] @ get pc
- sub save, r3, offset @ Correct PC for prefetching
- bic save, save, mask
-1004: ldr r1, [save, #0] @ get instruction at function
- mov r1, r1, lsr #10
- ldr r3, .Ldsi+4
- teq r1, r3
- subeq save, save, #4
- mov r0, save
- bic r1, r2, mask
+ sub sv_pc, sv_pc, offset @ Correct PC for prefetching
+ bic sv_pc, sv_pc, mask @ mask PC/LR for the mode
+
+1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
+ ldr r3, .Ldsi+4 @ adjust saved 'pc' back one
+ teq r3, r2, lsr #10 @ instruction
+ subne r0, sv_pc, #4 @ allow for mov
+ subeq r0, sv_pc, #8 @ allow for mov + stmia
+
+ ldr r1, [frame, #-4] @ get saved lr
+ mov r2, frame
+ bic r1, r1, mask @ mask PC/LR for the mode
bl dump_backtrace_entry
- ldr r0, [frame, #-8] @ get sp
- sub r0, r0, #4
-1005: ldr r1, [save, #4] @ get instruction at function+4
- mov r3, r1, lsr #10
- ldr r2, .Ldsi+4
- teq r3, r2 @ Check for stmia sp!, {args}
- addeq save, save, #4 @ next instruction
- bleq .Ldumpstm
-
- sub r0, frame, #16
-1006: ldr r1, [save, #4] @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction
- mov r3, r1, lsr #10
- ldr r2, .Ldsi
- teq r3, r2
- bleq .Ldumpstm
-
- /*
- * A zero next framepointer means we're done.
- */
- teq next, #0
- LOADREGS(eqfd, sp!, {r4 - r8, pc})
-
- /*
- * The next framepointer must be above the
- * current framepointer.
- */
- cmp next, frame
- mov frame, next
- bhi 3b
- b 1007f
+ ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
+ ldr r3, .Ldsi+4
+ teq r3, r1, lsr #11
+ ldreq r0, [frame, #-8] @ get sp
+ subeq r0, r0, #4 @ point at the last arg
+ bleq .Ldumpstm @ dump saved registers
-/*
- * Fixup for LDMDB
- */
- .section .fixup,"ax"
- .align 0
-1007: ldr r0, =.Lbad
+1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc}
+ ldr r3, .Ldsi @ instruction exists,
+ teq r3, r1, lsr #11
+ subeq r0, frame, #16
+ bleq .Ldumpstm @ dump saved registers
+
+ teq sv_fp, #0 @ zero saved fp means
+ beq no_frame @ no further frames
+
+ cmp sv_fp, frame @ next frame must be
+ mov frame, sv_fp @ above the current frame
+ bhi for_each_frame
+
+1006: adr r0, .Lbad
mov r1, frame
bl printk
- LOADREGS(fd, sp!, {r4 - r8, pc})
- .ltorg
- .previous
+no_frame: ldmfd sp!, {r4 - r8, pc}
+ENDPROC(c_backtrace)
- .section __ex_table,"a"
+ .pushsection __ex_table,"a"
.align 3
- .long 1001b, 1007b
- .long 1002b, 1007b
- .long 1003b, 1007b
- .long 1004b, 1007b
- .long 1005b, 1007b
- .long 1006b, 1007b
- .previous
+ .long 1001b, 1006b
+ .long 1002b, 1006b
+ .long 1003b, 1006b
+ .long 1004b, 1006b
+ .popsection
#define instr r4
#define reg r5
@@ -125,16 +119,18 @@ ENTRY(c_backtrace)
.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr}
mov stack, r0
mov instr, r1
- mov reg, #9
+ mov reg, #10
mov r7, #0
1: mov r3, #1
- tst instr, r3, lsl reg
+ ARM( tst instr, r3, lsl reg )
+ THUMB( lsl r3, reg )
+ THUMB( tst instr, r3 )
beq 2f
add r7, r7, #1
- teq r7, #4
+ teq r7, #6
moveq r7, #0
- moveq r3, #'\n'
- movne r3, #' '
+ adr r3, .Lcr
+ addne r3, r3, #1 @ skip newline
ldr r2, [stack], #-4
mov r1, reg
adr r0, .Lfp
@@ -144,14 +140,13 @@ ENTRY(c_backtrace)
teq r7, #0
adrne r0, .Lcr
blne printk
- mov r0, stack
- LOADREGS(fd, sp!, {instr, reg, stack, r7, pc})
+ ldmfd sp!, {instr, reg, stack, r7, pc}
-.Lfp: .asciz " r%d = %08X%c"
+.Lfp: .asciz " r%d:%08x%s"
.Lcr: .asciz "\n"
.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n"
.align
-.Ldsi: .word 0x00e92dd8 >> 2
- .word 0x00e92d00 >> 2
+.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc}
+ .word 0xe92d0000 >> 11 @ stmfd sp!, {}
#endif
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index f35d91fbe11..9f12ed1eea8 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,45 +1,78 @@
-#include <linux/config.h>
+#include <asm/unwind.h>
-#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K)
- .macro bitop, instr
+#if __LINUX_ARM_ARCH__ >= 6
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
mov r2, #1
- and r3, r0, #7 @ Get bit offset
- add r1, r1, r0, lsr #3 @ Get byte offset
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
mov r3, r2, lsl r3
-1: ldrexb r2, [r1]
+1: ldrex r2, [r1]
\instr r2, r2, r3
- strexb r0, r2, [r1]
+ strex r0, r2, [r1]
cmp r0, #0
bne 1b
- mov pc, lr
+ bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
- .macro testop, instr, store
- and r3, r0, #7 @ Get bit offset
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
mov r2, #1
- add r1, r1, r0, lsr #3 @ Get byte offset
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
-1: ldrexb r2, [r1]
+ smp_dmb
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
+1: ldrex r2, [r1]
ands r0, r2, r3 @ save old value of bit
- \instr r2, r2, r3 @ toggle bit
- strexb ip, r2, [r1]
+ \instr r2, r2, r3 @ toggle bit
+ strex ip, r2, [r1]
cmp ip, #0
bne 1b
+ smp_dmb
cmp r0, #0
movne r0, #1
-2: mov pc, lr
+2: bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
#else
- .macro bitop, instr
- and r2, r0, #7
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r2, r0, #31
+ mov r0, r0, lsr #5
mov r3, #1
mov r3, r3, lsl r2
- save_and_disable_irqs ip, r2
- ldrb r2, [r1, r0, lsr #3]
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]
\instr r2, r2, r3
- strb r2, [r1, r0, lsr #3]
+ str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
/**
@@ -48,19 +81,25 @@
* @store: store instruction
*
* Note: we can trivially conditionalise the store instruction
- * to avoid dirting the data cache.
+ * to avoid dirtying the data cache.
*/
- .macro testop, instr, store
- add r1, r1, r0, lsr #3
- and r3, r0, #7
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r3, r0, #31
+ mov r0, r0, lsr #5
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]!
mov r0, #1
- save_and_disable_irqs ip, r2
- ldrb r2, [r1]
tst r2, r0, lsl r3
\instr r2, r2, r0, lsl r3
\store r2, [r1]
- restore_irqs ip
moveq r0, #0
+ restore_irqs ip
mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
#endif
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
new file mode 100644
index 00000000000..9fcdd154eff
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.S
@@ -0,0 +1,36 @@
+#include <linux/linkage.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ENTRY(__bswapsi2)
+ rev r0, r0
+ bx lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ rev r3, r0
+ rev r0, r1
+ mov r1, r3
+ bx lr
+ENDPROC(__bswapdi2)
+#else
+ENTRY(__bswapsi2)
+ eor r3, r0, r0, ror #16
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ eor r0, r3, r0, ror #8
+ mov pc, lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ mov ip, r1
+ eor r3, ip, ip, ror #16
+ eor r1, r0, r0, ror #16
+ mov r1, r1, lsr #8
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ bic r1, r1, #0xff00
+ eor r1, r1, r0, ror #8
+ eor r0, r3, ip, ror #8
+ mov pc, lr
+ENDPROC(__bswapdi2)
+#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
new file mode 100644
index 00000000000..916c80f13ae
--- /dev/null
+++ b/arch/arm/lib/call_with_stack.S
@@ -0,0 +1,44 @@
+/*
+ * arch/arm/lib/call_with_stack.S
+ *
+ * Copyright (C) 2011 ARM Ltd.
+ * Written by Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * void call_with_stack(void (*fn)(void *), void *arg, void *sp)
+ *
+ * Change the stack to that pointed at by sp, then invoke fn(arg) with
+ * the new stack.
+ */
+ENTRY(call_with_stack)
+ str sp, [r2, #-4]!
+ str lr, [r2, #-4]!
+
+ mov sp, r2
+ mov r2, r0
+ mov r0, r1
+
+ adr lr, BSYM(1f)
+ mov pc, r2
+
+1: ldr lr, [sp]
+ ldr sp, [sp, #4]
+ mov pc, lr
+ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
index 389567c2409..f4027862172 100644
--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -12,10 +12,4 @@
#include "bitops.h"
.text
-/* Purpose : Function to change a bit
- * Prototype: int change_bit(int bit, void *addr)
- */
-ENTRY(_change_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_change_bit_le)
- bitop eor
+bitop _change_bit, eor
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 7ff9f831b3f..14a0d988c82 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -12,13 +12,14 @@
.text
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
+/* Prototype: int __clear_user(void *addr, size_t sz)
* Purpose : clear some user memory
* Params : addr - user memory address to clear
* : sz - number of bytes to clear
* Returns : number of bytes NOT cleared
*/
-ENTRY(__arch_clear_user)
+ENTRY(__clear_user_std)
+WEAK(__clear_user)
stmfd sp!, {r1, lr}
mov r2, #0
cmp r1, #4
@@ -26,27 +27,28 @@ ENTRY(__arch_clear_user)
ands ip, r0, #3
beq 1f
cmp ip, #2
-USER( strbt r2, [r0], #1)
-USER( strlebt r2, [r0], #1)
-USER( strltbt r2, [r0], #1)
+ strusr r2, r0, 1
+ strusr r2, r0, 1, le
+ strusr r2, r0, 1, lt
rsb ip, ip, #4
sub r1, r1, ip @ 7 6 5 4 3 2 1
1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7
-USER( strplt r2, [r0], #4)
-USER( strplt r2, [r0], #4)
+ strusr r2, r0, 4, pl, rept=2
bpl 1b
adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3
-USER( strplt r2, [r0], #4)
+ strusr r2, r0, 4, pl
2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x
-USER( strnebt r2, [r0], #1)
-USER( strnebt r2, [r0], #1)
+ strusr r2, r0, 1, ne, rept=2
tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1
-USER( strnebt r2, [r0], #1)
+ it ne @ explicit IT needed for the label
+USER( strnebt r2, [r0])
mov r0, #0
- LOADREGS(fd,sp!, {r1, pc})
+ ldmfd sp!, {r1, pc}
+ENDPROC(__clear_user)
+ENDPROC(__clear_user_std)
- .section .fixup,"ax"
+ .pushsection .fixup,"ax"
.align 0
-9001: LOADREGS(fd,sp!, {r0, pc})
- .previous
+9001: ldmfd sp!, {r0, pc}
+ .popsection
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
index 34751653302..f6b75fb64d3 100644
--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -12,11 +12,4 @@
#include "bitops.h"
.text
-/*
- * Purpose : Function to clear a bit
- * Prototype: int clear_bit(int bit, void *addr)
- */
-ENTRY(_clear_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_clear_bit_le)
- bitop bic
+bitop _clear_bit, bic
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 7497393a0e8..66a477a3e3c 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -16,7 +16,7 @@
/*
* Prototype:
*
- * size_t __arch_copy_from_user(void *to, const void *from, size_t n)
+ * size_t __copy_from_user(void *to, const void *from, size_t n)
*
* Purpose:
*
@@ -33,11 +33,15 @@
* Number of bytes NOT copied.
*/
+#ifndef CONFIG_THUMB2_KERNEL
+#define LDR1W_SHIFT 0
+#else
+#define LDR1W_SHIFT 1
+#endif
+#define STR1W_SHIFT 0
+
.macro ldr1w ptr reg abort
-100: ldrt \reg, [\ptr], #4
- .section __ex_table, "a"
- .long 100b, \abort
- .previous
+ ldrusr \reg, \ptr, 4, abort=\abort
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -53,14 +57,11 @@
.endm
.macro ldr1b ptr reg cond=al abort
-100: ldr\cond\()bt \reg, [\ptr], #1
- .section __ex_table, "a"
- .long 100b, \abort
- .previous
+ ldrusr \reg, \ptr, 1, \cond, abort=\abort
.endm
.macro str1w ptr reg abort
- str \reg, [\ptr], #4
+ W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -83,11 +84,13 @@
.text
-ENTRY(__arch_copy_from_user)
+ENTRY(__copy_from_user)
#include "copy_template.S"
- .section .fixup,"ax"
+ENDPROC(__copy_from_user)
+
+ .pushsection .fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2}
@@ -97,5 +100,5 @@ ENTRY(__arch_copy_from_user)
bl __memzero
ldr r0, [sp], #4
copy_abort_end
- .previous
+ .popsection
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
index 68117968482..6ee2f6706f8 100644
--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -12,8 +12,9 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
-#define COPY_COUNT (PAGE_SZ/64 PLD( -1 ))
+#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
.text
.align 5
@@ -26,21 +27,21 @@
ENTRY(copy_page)
stmfd sp!, {r4, lr} @ 2
PLD( pld [r1, #0] )
- PLD( pld [r1, #32] )
+ PLD( pld [r1, #L1_CACHE_BYTES] )
mov r2, #COPY_COUNT @ 1
ldmia r1!, {r3, r4, ip, lr} @ 4+1
-1: PLD( pld [r1, #64] )
- PLD( pld [r1, #96] )
-2: stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
+1: PLD( pld [r1, #2 * L1_CACHE_BYTES])
+ PLD( pld [r1, #3 * L1_CACHE_BYTES])
+2:
+ .rept (2 * L1_CACHE_BYTES / 16 - 1)
stmia r0!, {r3, r4, ip, lr} @ 4
ldmia r1!, {r3, r4, ip, lr} @ 4
+ .endr
subs r2, r2, #1 @ 1
stmia r0!, {r3, r4, ip, lr} @ 4
ldmgtia r1!, {r3, r4, ip, lr} @ 4
bgt 1b @ 1
PLD( ldmeqia r1!, {r3, r4, ip, lr} )
PLD( beq 2b )
- LOADREGS(fd, sp!, {r4, pc}) @ 3
+ ldmfd sp!, {r4, pc} @ 3
+ENDPROC(copy_page)
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 838e435e492..3bc8eb811a7 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -13,14 +13,6 @@
*/
/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do. That might be different in the future.
- */
-//#define CALGN(code...) code
-#define CALGN(code...)
-
-/*
* Theory of operation
* -------------------
*
@@ -65,6 +57,13 @@
*
* Restore registers with the values previously saved with the
* 'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ * Correction to be applied to the "ip" register when branching into
+ * the ldr1w or str1w instructions (some of these macros may expand to
+ * than one 32bit instruction in Thumb-2)
*/
@@ -82,7 +81,7 @@
stmfd sp!, {r5 - r8}
blt 5f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( rsb r3, ip, #32 )
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
CALGN( bcs 2f )
@@ -107,9 +106,15 @@
5: ands ip, r2, #28
rsb ip, ip, #32
+#if LDR1W_SHIFT > 0
+ lsl ip, ip, #LDR1W_SHIFT
+#endif
addne pc, pc, ip @ C is always clear here
b 7f
-6: nop
+6:
+ .rept (1 << LDR1W_SHIFT)
+ W(nop)
+ .endr
ldr1w r1, r3, abort=20f
ldr1w r1, r4, abort=20f
ldr1w r1, r5, abort=20f
@@ -118,9 +123,16 @@
ldr1w r1, r8, abort=20f
ldr1w r1, lr, abort=20f
+#if LDR1W_SHIFT < STR1W_SHIFT
+ lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+ lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
add pc, pc, ip
nop
- nop
+ .rept (1 << STR1W_SHIFT)
+ W(nop)
+ .endr
str1w r0, r3, abort=20f
str1w r0, r4, abort=20f
str1w r0, r5, abort=20f
@@ -168,7 +180,7 @@
subs r2, r2, #28
blt 14f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( rsb ip, ip, #32 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
@@ -185,24 +197,24 @@
12: PLD( pld [r1, #124] )
13: ldr4w r1, r4, r5, r6, r7, abort=19f
- mov r3, lr, pull #\pull
+ mov r3, lr, lspull #\pull
subs r2, r2, #32
ldr4w r1, r8, r9, ip, lr, abort=19f
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, lspush #\push
+ mov r4, r4, lspull #\pull
+ orr r4, r4, r5, lspush #\push
+ mov r5, r5, lspull #\pull
+ orr r5, r5, r6, lspush #\push
+ mov r6, r6, lspull #\pull
+ orr r6, r6, r7, lspush #\push
+ mov r7, r7, lspull #\pull
+ orr r7, r7, r8, lspush #\push
+ mov r8, r8, lspull #\pull
+ orr r8, r8, r9, lspush #\push
+ mov r9, r9, lspull #\pull
+ orr r9, r9, ip, lspush #\push
+ mov ip, ip, lspull #\pull
+ orr ip, ip, lr, lspush #\push
str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
bge 12b
PLD( cmn r2, #96 )
@@ -213,10 +225,10 @@
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, lspull #\pull
ldr1w r1, lr, abort=21f
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, lspush #\push
str1w r0, r3, abort=21f
bgt 15b
CALGN( cmp r2, #0 )
@@ -236,7 +248,7 @@
/*
- * Abort preanble and completion macros.
+ * Abort preamble and completion macros.
* If a fixup handler is required then those macros must surround it.
* It is assumed that the fixup code will handle the private part of
* the exit macro.
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 4a6d8ea1402..d066df686e1 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -16,7 +16,7 @@
/*
* Prototype:
*
- * size_t __arch_copy_to_user(void *to, const void *from, size_t n)
+ * size_t __copy_to_user(void *to, const void *from, size_t n)
*
* Purpose:
*
@@ -33,8 +33,15 @@
* Number of bytes NOT copied.
*/
+#define LDR1W_SHIFT 0
+#ifndef CONFIG_THUMB2_KERNEL
+#define STR1W_SHIFT 0
+#else
+#define STR1W_SHIFT 1
+#endif
+
.macro ldr1w ptr reg abort
- ldr \reg, [\ptr], #4
+ W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -50,10 +57,7 @@
.endm
.macro str1w ptr reg abort
-100: strt \reg, [\ptr], #4
- .section __ex_table, "a"
- .long 100b, \abort
- .previous
+ strusr \reg, \ptr, 4, abort=\abort
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -68,10 +72,7 @@
.endm
.macro str1b ptr reg cond=al abort
-100: str\cond\()bt \reg, [\ptr], #1
- .section __ex_table, "a"
- .long 100b, \abort
- .previous
+ strusr \reg, \ptr, 1, \cond, abort=\abort
.endm
.macro enter reg1 reg2
@@ -86,16 +87,20 @@
.text
-ENTRY(__arch_copy_to_user)
+ENTRY(__copy_to_user_std)
+WEAK(__copy_to_user)
#include "copy_template.S"
- .section .fixup,"ax"
+ENDPROC(__copy_to_user)
+ENDPROC(__copy_to_user_std)
+
+ .pushsection .fixup,"ax"
.align 0
copy_abort_preamble
ldmfd sp!, {r1, r2, r3}
sub r0, r0, r1
rsb r0, r0, r2
copy_abort_end
- .previous
+ .popsection
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
index 7065a20ee8a..3ac6ef01bc4 100644
--- a/arch/arm/lib/csumipv6.S
+++ b/arch/arm/lib/csumipv6.S
@@ -28,5 +28,6 @@ ENTRY(__csum_ipv6_magic)
adcs r0, r0, r3
adcs r0, r0, r2
adcs r0, r0, #0
- LOADREGS(fd, sp!, {pc})
+ ldmfd sp!, {pc}
+ENDPROC(__csum_ipv6_magic)
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index cb5e3708f11..31d3cb34740 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -26,7 +26,7 @@ td1 .req r4 @ save before use
td2 .req r5 @ save before use
td3 .req lr
-.zero: mov r0, sum
+.Lzero: mov r0, sum
add sp, sp, #4
ldr pc, [sp], #4
@@ -34,21 +34,22 @@ td3 .req lr
* Handle 0 to 7 bytes, with any alignment of source and
* destination pointers. Note that when we get here, C = 0
*/
-.less8: teq len, #0 @ check for zero count
- beq .zero
+.Lless8: teq len, #0 @ check for zero count
+ beq .Lzero
/* we must have at least one byte. */
tst buf, #1 @ odd address?
+ movne sum, sum, ror #8
ldrneb td0, [buf], #1
subne len, len, #1
adcnes sum, sum, td0, put_byte_1
-.less4: tst len, #6
- beq .less8_byte
+.Lless4: tst len, #6
+ beq .Lless8_byte
/* we are now half-word aligned */
-.less8_wordlp:
+.Lless8_wordlp:
#if __LINUX_ARM_ARCH__ >= 4
ldrh td0, [buf], #2
sub len, len, #2
@@ -64,19 +65,19 @@ td3 .req lr
#endif
adcs sum, sum, td0
tst len, #6
- bne .less8_wordlp
+ bne .Lless8_wordlp
-.less8_byte: tst len, #1 @ odd number of bytes
+.Lless8_byte: tst len, #1 @ odd number of bytes
ldrneb td0, [buf], #1 @ include last byte
adcnes sum, sum, td0, put_byte_0 @ update checksum
-.done: adc r0, sum, #0 @ collect up the last carry
+.Ldone: adc r0, sum, #0 @ collect up the last carry
ldr td0, [sp], #4
tst td0, #1 @ check buffer alignment
movne r0, r0, ror #8 @ rotate checksum by 8 bits
ldr pc, [sp], #4 @ return
-.not_aligned: tst buf, #1 @ odd address
+.Lnot_aligned: tst buf, #1 @ odd address
ldrneb td0, [buf], #1 @ make even
subne len, len, #1
adcnes sum, sum, td0, put_byte_1 @ update checksum
@@ -101,11 +102,14 @@ td3 .req lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}
cmp len, #8 @ Ensure that we have at least
- blo .less8 @ 8 bytes to copy.
+ blo .Lless8 @ 8 bytes to copy.
+
+ tst buf, #1
+ movne sum, sum, ror #8
adds sum, sum, #0 @ C = 0
tst buf, #3 @ Test destination alignment
- blne .not_aligned @ aligh destination, return here
+ blne .Lnot_aligned @ align destination, return here
1: bics ip, len, #31
beq 3f
@@ -127,11 +131,12 @@ ENTRY(csum_partial)
ldmfd sp!, {r4 - r5}
3: tst len, #0x1c @ should not change C
- beq .less4
+ beq .Lless4
4: ldr td0, [buf], #4
sub len, len, #4
adcs sum, sum, td0
tst len, #0x1c
bne 4b
- b .less4
+ b .Lless4
+ENDPROC(csum_partial)
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
index 990ee63b246..d03fc71fc88 100644
--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -18,11 +18,11 @@
*/
.macro save_regs
- stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc}
+ stmfd sp!, {r1, r4 - r8, lr}
.endm
- .macro load_regs,flags
- LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+ .macro load_regs
+ ldmfd sp!, {r1, r4 - r8, pc}
.endm
.macro load1b, reg1
@@ -48,5 +48,6 @@
.endm
#define FN_ENTRY ENTRY(csum_partial_copy_nocheck)
+#define FN_EXIT ENDPROC(csum_partial_copy_nocheck)
#include "csumpartialcopygeneric.S"
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d3a2f4667db..d6e742d2400 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -22,8 +22,8 @@ dst .req r1
len .req r2
sum .req r3
-.zero: mov r0, sum
- load_regs ea
+.Lzero: mov r0, sum
+ load_regs
/*
* Align an unaligned destination pointer. We know that
@@ -31,8 +31,9 @@ sum .req r3
* the length. Note that the source pointer hasn't been
* aligned yet.
*/
-.dst_unaligned: tst dst, #1
- beq .dst_16bit
+.Ldst_unaligned:
+ tst dst, #1
+ beq .Ldst_16bit
load1b ip
sub len, len, #1
@@ -41,7 +42,7 @@ sum .req r3
tst dst, #2
moveq pc, lr @ dst is now 32bit aligned
-.dst_16bit: load2b r8, ip
+.Ldst_16bit: load2b r8, ip
sub len, len, #2
adcs sum, sum, r8, put_byte_0
strb r8, [dst], #1
@@ -53,12 +54,12 @@ sum .req r3
* Handle 0 to 7 bytes, with any alignment of source and
* destination pointers. Note that when we get here, C = 0
*/
-.less8: teq len, #0 @ check for zero count
- beq .zero
+.Lless8: teq len, #0 @ check for zero count
+ beq .Lzero
/* we must have at least one byte. */
tst dst, #1 @ dst 16-bit aligned
- beq .less8_aligned
+ beq .Lless8_aligned
/* Align dst */
load1b ip
@@ -66,7 +67,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst len, #6
- beq .less8_byteonly
+ beq .Lless8_byteonly
1: load2b r8, ip
sub len, len, #2
@@ -74,27 +75,26 @@ sum .req r3
strb r8, [dst], #1
adcs sum, sum, ip, put_byte_1
strb ip, [dst], #1
-.less8_aligned: tst len, #6
+.Lless8_aligned:
+ tst len, #6
bne 1b
-.less8_byteonly:
+.Lless8_byteonly:
tst len, #1
- beq .done
+ beq .Ldone
load1b r8
adcs sum, sum, r8, put_byte_0 @ update checksum
strb r8, [dst], #1
- b .done
+ b .Ldone
FN_ENTRY
- mov ip, sp
save_regs
- sub fp, ip, #4
cmp len, #8 @ Ensure that we have at least
- blo .less8 @ 8 bytes to copy.
+ blo .Lless8 @ 8 bytes to copy.
adds sum, sum, #0 @ C = 0
tst dst, #3 @ Test destination alignment
- blne .dst_unaligned @ align destination, return here
+ blne .Ldst_unaligned @ align destination, return here
/*
* Ok, the dst pointer is now 32bit aligned, and we know
@@ -103,7 +103,7 @@ FN_ENTRY
*/
tst src, #3 @ Test source alignment
- bne .src_not_aligned
+ bne .Lsrc_not_aligned
/* Routine for src & dst aligned */
@@ -136,17 +136,17 @@ FN_ENTRY
adcs sum, sum, r4
4: ands len, len, #3
- beq .done
+ beq .Ldone
load1l r4
tst len, #2
mov r5, r4, get_byte_0
- beq .exit
- adcs sum, sum, r4, push #16
+ beq .Lexit
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
mov r5, r4, get_byte_2
-.exit: tst len, #1
+.Lexit: tst len, #1
strneb r5, [dst], #1
andne r5, r5, #255
adcnes sum, sum, r5, put_byte_0
@@ -157,37 +157,37 @@ FN_ENTRY
* the inefficient byte manipulations in the
* architecture independent code.
*/
-.done: adc r0, sum, #0
+.Ldone: adc r0, sum, #0
ldr sum, [sp, #0] @ dst
tst sum, #1
movne r0, r0, ror #8
- load_regs ea
+ load_regs
-.src_not_aligned:
+.Lsrc_not_aligned:
adc sum, sum, #0 @ include C from dst alignment
and ip, src, #3
bic src, src, #3
load1l r5
cmp ip, #2
- beq .src2_aligned
- bhi .src3_aligned
- mov r4, r5, pull #8 @ C = 0
+ beq .Lsrc2_aligned
+ bhi .Lsrc3_aligned
+ mov r4, r5, lspull #8 @ C = 0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
- mov r7, r7, pull #8
- orr r7, r7, r8, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
+ mov r7, r7, lspull #8
+ orr r7, r7, r8, lspush #24
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #8
+ mov r4, r8, lspull #8
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -196,50 +196,50 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #8
+ mov r4, r6, lspull #8
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #24
+ orr r4, r4, r5, lspush #24
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #8
+ mov r4, r5, lspull #8
4: ands len, len, #3
- beq .done
+ beq .Ldone
mov r5, r4, get_byte_0
tst len, #2
- beq .exit
- adcs sum, sum, r4, push #16
+ beq .Lexit
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
mov r5, r4, get_byte_2
- b .exit
+ b .Lexit
-.src2_aligned: mov r4, r5, pull #16
+.Lsrc2_aligned: mov r4, r5, lspull #16
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
- mov r7, r7, pull #16
- orr r7, r7, r8, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
+ mov r7, r7, lspull #16
+ orr r7, r7, r8, lspush #16
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #16
+ mov r4, r8, lspull #16
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -248,52 +248,52 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #16
+ mov r4, r6, lspull #16
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #16
+ orr r4, r4, r5, lspush #16
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #16
+ mov r4, r5, lspull #16
4: ands len, len, #3
- beq .done
+ beq .Ldone
mov r5, r4, get_byte_0
tst len, #2
- beq .exit
+ beq .Lexit
adcs sum, sum, r4
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
tst len, #1
- beq .done
+ beq .Ldone
load1b r5
- b .exit
+ b .Lexit
-.src3_aligned: mov r4, r5, pull #24
+.Lsrc3_aligned: mov r4, r5, lspull #24
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
- mov r7, r7, pull #24
- orr r7, r7, r8, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
+ mov r7, r7, lspull #24
+ orr r7, r7, r8, lspush #8
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #24
+ mov r4, r8, lspull #24
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -302,30 +302,31 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #24
+ mov r4, r6, lspull #24
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #8
+ orr r4, r4, r5, lspush #8
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #24
+ mov r4, r5, lspull #24
4: ands len, len, #3
- beq .done
+ beq .Ldone
mov r5, r4, get_byte_0
tst len, #2
- beq .exit
+ beq .Lexit
strb r5, [dst], #1
adcs sum, sum, r4
load1l r4
mov r5, r4, get_byte_0
strb r5, [dst], #1
- adcs sum, sum, r4, push #24
+ adcs sum, sum, r4, lspush #24
mov r5, r4, get_byte_1
- b .exit
+ b .Lexit
+FN_EXIT
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 333bca292de..7d08b43d2c0 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -18,58 +18,36 @@
.text
.macro save_regs
- stmfd sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}
+ stmfd sp!, {r1, r2, r4 - r8, lr}
.endm
- .macro load_regs,flags
- ldm\flags fp, {r1, r2, r4-r8, fp, sp, pc}
+ .macro load_regs
+ ldmfd sp!, {r1, r2, r4 - r8, pc}
.endm
.macro load1b, reg1
-9999: ldrbt \reg1, [r0], $1
- .section __ex_table, "a"
- .align 3
- .long 9999b, 6001f
- .previous
+ ldrusr \reg1, r0, 1
.endm
.macro load2b, reg1, reg2
-9999: ldrbt \reg1, [r0], $1
-9998: ldrbt \reg2, [r0], $1
- .section __ex_table, "a"
- .long 9999b, 6001f
- .long 9998b, 6001f
- .previous
+ ldrusr \reg1, r0, 1
+ ldrusr \reg2, r0, 1
.endm
.macro load1l, reg1
-9999: ldrt \reg1, [r0], $4
- .section __ex_table, "a"
- .align 3
- .long 9999b, 6001f
- .previous
+ ldrusr \reg1, r0, 4
.endm
.macro load2l, reg1, reg2
-9999: ldrt \reg1, [r0], $4
-9998: ldrt \reg2, [r0], $4
- .section __ex_table, "a"
- .long 9999b, 6001f
- .long 9998b, 6001f
- .previous
+ ldrusr \reg1, r0, 4
+ ldrusr \reg2, r0, 4
.endm
.macro load4l, reg1, reg2, reg3, reg4
-9999: ldrt \reg1, [r0], $4
-9998: ldrt \reg2, [r0], $4
-9997: ldrt \reg3, [r0], $4
-9996: ldrt \reg4, [r0], $4
- .section __ex_table, "a"
- .long 9999b, 6001f
- .long 9998b, 6001f
- .long 9997b, 6001f
- .long 9996b, 6001f
- .previous
+ ldrusr \reg1, r0, 4
+ ldrusr \reg2, r0, 4
+ ldrusr \reg3, r0, 4
+ ldrusr \reg4, r0, 4
.endm
/*
@@ -80,6 +58,7 @@
*/
#define FN_ENTRY ENTRY(csum_partial_copy_from_user)
+#define FN_EXIT ENDPROC(csum_partial_copy_from_user)
#include "csumpartialcopygeneric.S"
@@ -89,16 +68,16 @@
* so properly, we would have to add in whatever registers were loaded before
* the fault, which, with the current asm above is not predictable.
*/
- .section .fixup,"ax"
+ .pushsection .fixup,"ax"
.align 4
-6001: mov r4, #-EFAULT
- ldr r5, [fp, #4] @ *err_ptr
+9001: mov r4, #-EFAULT
+ ldr r5, [sp, #8*4] @ *err_ptr
str r4, [r5]
ldmia sp, {r1, r2} @ retrieve dst, len
add r2, r2, r1
mov r0, #0 @ zero the buffer
-6002: teq r2, r1
+9002: teq r2, r1
strneb r0, [r1], #1
- bne 6002b
- load_regs ea
- .previous
+ bne 9002b
+ load_regs
+ .popsection
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
new file mode 100644
index 00000000000..bc1033b897b
--- /dev/null
+++ b/arch/arm/lib/delay-loop.S
@@ -0,0 +1,68 @@
+/*
+ * linux/arch/arm/lib/delay.S
+ *
+ * Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/delay.h>
+ .text
+
+.LC0: .word loops_per_jiffy
+.LC1: .word UDELAY_MULT
+
+/*
+ * r0 <= 2000
+ * lpj <= 0x01ffffff (max. 3355 bogomips)
+ * HZ <= 1000
+ */
+
+ENTRY(__loop_udelay)
+ ldr r2, .LC1
+ mul r0, r2, r0
+ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
+ mov r1, #-1
+ ldr r2, .LC0
+ ldr r2, [r2] @ max = 0x01ffffff
+ add r0, r0, r1, lsr #32-14
+ mov r0, r0, lsr #14 @ max = 0x0001ffff
+ add r2, r2, r1, lsr #32-10
+ mov r2, r2, lsr #10 @ max = 0x00007fff
+ mul r0, r2, r0 @ max = 2^32-1
+ add r0, r0, r1, lsr #32-6
+ movs r0, r0, lsr #6
+ moveq pc, lr
+
+/*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ */
+ .align 3
+
+@ Delay routine
+ENTRY(__loop_delay)
+ subs r0, r0, #1
+#if 0
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+ movls pc, lr
+ subs r0, r0, #1
+#endif
+ bhi __loop_delay
+ mov pc, lr
+ENDPROC(__loop_udelay)
+ENDPROC(__loop_const_udelay)
+ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S
deleted file mode 100644
index 3c7f7e675dd..00000000000
--- a/arch/arm/lib/delay.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * linux/arch/arm/lib/delay.S
- *
- * Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
- .text
-
-LC0: .word loops_per_jiffy
-
-/*
- * 0 <= r0 <= 2000
- */
-ENTRY(__udelay)
- mov r2, #0x6800
- orr r2, r2, #0x00db
- mul r0, r2, r0
-ENTRY(__const_udelay) @ 0 <= r0 <= 0x01ffffff
- ldr r2, LC0
- ldr r2, [r2] @ max = 0x0fffffff
- mov r0, r0, lsr #11 @ max = 0x00003fff
- mov r2, r2, lsr #11 @ max = 0x0003ffff
- mul r0, r2, r0 @ max = 2^32-1
- movs r0, r0, lsr #6
- RETINSTR(moveq,pc,lr)
-
-/*
- * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32
- *
- * Oh, if only we had a cycle counter...
- */
-
-@ Delay routine
-ENTRY(__delay)
- subs r0, r0, #1
-#if 0
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
- RETINSTR(movls,pc,lr)
- subs r0, r0, #1
-#endif
- bhi __delay
- RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
new file mode 100644
index 00000000000..5306de35013
--- /dev/null
+++ b/arch/arm/lib/delay.c
@@ -0,0 +1,93 @@
+/*
+ * Delay loops based on the OpenRISC implementation.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timex.h>
+
+/*
+ * Default to the loop-based delay implementation.
+ */
+struct arm_delay_ops arm_delay_ops = {
+ .delay = __loop_delay,
+ .const_udelay = __loop_const_udelay,
+ .udelay = __loop_udelay,
+};
+
+static const struct delay_timer *delay_timer;
+static bool delay_calibrated;
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (!delay_timer)
+ return -ENXIO;
+
+ *timer_val = delay_timer->read_current_timer();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(read_current_timer);
+
+static void __timer_delay(unsigned long cycles)
+{
+ cycles_t start = get_cycles();
+
+ while ((get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static void __timer_const_udelay(unsigned long xloops)
+{
+ unsigned long long loops = xloops;
+ loops *= arm_delay_ops.ticks_per_jiffy;
+ __timer_delay(loops >> UDELAY_SHIFT);
+}
+
+static void __timer_udelay(unsigned long usecs)
+{
+ __timer_const_udelay(usecs * UDELAY_MULT);
+}
+
+void __init register_current_timer_delay(const struct delay_timer *timer)
+{
+ if (!delay_calibrated) {
+ pr_info("Switching to timer-based delay loop\n");
+ delay_timer = timer;
+ lpj_fine = timer->freq / HZ;
+
+ /* cpufreq may scale loops_per_jiffy, so keep a private copy */
+ arm_delay_ops.ticks_per_jiffy = lpj_fine;
+ arm_delay_ops.delay = __timer_delay;
+ arm_delay_ops.const_udelay = __timer_const_udelay;
+ arm_delay_ops.udelay = __timer_udelay;
+
+ delay_calibrated = true;
+ } else {
+ pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
+ }
+}
+
+unsigned long calibrate_delay_is_known(void)
+{
+ delay_calibrated = true;
+ return lpj_fine;
+}
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index ec9a1cd6176..e55c4842c29 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
+#include <asm/unwind.h>
#ifdef __ARMEB__
#define xh r0
@@ -44,6 +45,7 @@
*/
ENTRY(__do_div64)
+UNWIND(.fnstart)
@ Test for easy paths first.
subs ip, r4, #1
@@ -177,7 +179,9 @@ ENTRY(__do_div64)
mov yh, xh, lsr ip
mov yl, xl, lsr ip
rsb ip, ip, #32
- orr yl, yl, xh, lsl ip
+ ARM( orr yl, yl, xh, lsl ip )
+ THUMB( lsl xh, xh, ip )
+ THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
@@ -187,14 +191,21 @@ ENTRY(__do_div64)
moveq yh, xh
moveq xh, #0
moveq pc, lr
+UNWIND(.fnend)
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
@ Division by 0:
- str lr, [sp, #-4]!
+ str lr, [sp, #-8]!
bl __div0
@ as wrong as it could be...
mov yl, #0
mov yh, #0
mov xh, #0
- ldr pc, [sp], #4
+ ldr pc, [sp], #8
+UNWIND(.fnend)
+ENDPROC(__do_div64)
diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S
index fb7b602a6f7..e6057fa851b 100644
--- a/arch/arm/lib/ecard.S
+++ b/arch/arm/lib/ecard.S
@@ -12,7 +12,6 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <asm/hardware.h>
#define CPSR2SPSR(rt) \
mrs rt, cpsr; \
@@ -29,7 +28,7 @@ ENTRY(ecard_loader_read)
CPSR2SPSR(r0)
mov lr, pc
mov pc, r2
- LOADREGS(fd, sp!, {r4 - r12, pc})
+ ldmfd sp!, {r4 - r12, pc}
@ Purpose: call an expansion card loader to reset the card
@ Proto : void read_loader(int card_base, char *loader);
@@ -41,5 +40,5 @@ ENTRY(ecard_loader_reset)
CPSR2SPSR(r0)
mov lr, pc
add pc, r1, #8
- LOADREGS(fd, sp!, {r4 - r12, pc})
+ ldmfd sp!, {r4 - r12, pc}
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index f055d56ea68..64f6bc1a913 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -25,14 +25,18 @@ ENTRY(_find_first_zero_bit_le)
teq r1, #0
beq 3f
mov r2, #0
-1: ldrb r3, [r0, r2, lsr #3]
+1:
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
eors r3, r3, #0xff @ invert bits
- bne .found @ any now set - found zero bit
+ bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(_find_first_zero_bit_le)
/*
* Purpose : Find next 'zero' bit
@@ -43,13 +47,16 @@ ENTRY(_find_next_zero_bit_le)
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
- ldrb r3, [r0, r2, lsr #3]
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
eor r3, r3, #0xff @ now looking for a 1 bit
movs r3, r3, lsr ip @ shift off unused bits
- bne .found
+ bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
+ENDPROC(_find_next_zero_bit_le)
/*
* Purpose : Find a 'one' bit
@@ -59,14 +66,18 @@ ENTRY(_find_first_bit_le)
teq r1, #0
beq 3f
mov r2, #0
-1: ldrb r3, [r0, r2, lsr #3]
+1:
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
movs r3, r3
- bne .found @ any now set - found zero bit
+ bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(_find_first_bit_le)
/*
* Purpose : Find next 'one' bit
@@ -77,12 +88,15 @@ ENTRY(_find_next_bit_le)
beq 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
- ldrb r3, [r0, r2, lsr #3]
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
movs r3, r3, lsr ip @ shift off unused bits
- bne .found
+ bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
+ENDPROC(_find_next_bit_le)
#ifdef __ARMEB__
@@ -91,14 +105,17 @@ ENTRY(_find_first_zero_bit_be)
beq 3f
mov r2, #0
1: eor r3, r2, #0x18 @ big endian byte ordering
- ldrb r3, [r0, r3, lsr #3]
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
eors r3, r3, #0xff @ invert bits
- bne .found @ any now set - found zero bit
+ bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
teq r1, #0
@@ -106,27 +123,33 @@ ENTRY(_find_next_zero_bit_be)
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
- ldrb r3, [r0, r3, lsr #3]
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
eor r3, r3, #0xff @ now looking for a 1 bit
movs r3, r3, lsr ip @ shift off unused bits
- bne .found
+ bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
+ENDPROC(_find_next_zero_bit_be)
ENTRY(_find_first_bit_be)
teq r1, #0
beq 3f
mov r2, #0
1: eor r3, r2, #0x18 @ big endian byte ordering
- ldrb r3, [r0, r3, lsr #3]
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
movs r3, r3
- bne .found @ any now set - found zero bit
+ bne .L_found @ any now set - found zero bit
add r2, r2, #8 @ next bit pointer
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
teq r1, #0
@@ -134,22 +157,25 @@ ENTRY(_find_next_bit_be)
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
- ldrb r3, [r0, r3, lsr #3]
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
movs r3, r3, lsr ip @ shift off unused bits
- bne .found
+ bne .L_found
orr r2, r2, #7 @ if zero, then no bits here
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
+ENDPROC(_find_next_bit_be)
#endif
/*
* One or more bits in the LSB of r3 are assumed to be set.
*/
-.found:
+.L_found:
#if __LINUX_ARM_ARCH__ >= 5
- rsb r1, r3, #0
- and r3, r3, r1
+ rsb r0, r3, #0
+ and r3, r3, r0
clz r3, r3
rsb r3, r3, #31
add r0, r2, r3
@@ -164,5 +190,7 @@ ENTRY(_find_next_bit_be)
addeq r2, r2, #1
mov r0, r2
#endif
- RETINSTR(mov,pc,lr)
+ cmp r1, r0 @ Clamp to maxbit
+ movlo r0, r1
+ mov pc, lr
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index d204018070a..9b06bb41fca 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -16,63 +16,65 @@
* __get_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2, r3 contains the zero-extended value
+ * r2 contains the zero-extended value
* lr corrupted
*
- * No other registers must be altered. (see include/asm-arm/uaccess.h
+ * No other registers must be altered. (see <asm/uaccess.h>
* for specific ASM register usage).
*
* Note that ADDR_LIMIT is either 0 or 0xc0000000.
* Note also that it is intended that __get_user_bad is not global.
*/
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
- .global __get_user_1
-__get_user_1:
-1: ldrbt r2, [r0]
+ENTRY(__get_user_1)
+ check_uaccess r0, 1, r1, r2, __get_user_bad
+1: TUSER(ldrb) r2, [r0]
mov r0, #0
mov pc, lr
+ENDPROC(__get_user_1)
- .global __get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
+ check_uaccess r0, 2, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb .req ip
2: ldrbt r2, [r0], #1
-3: ldrbt r3, [r0]
+3: ldrbt rb, [r0], #0
+#else
+rb .req r0
+2: ldrb r2, [r0]
+3: ldrb rb, [r0, #1]
+#endif
#ifndef __ARMEB__
- orr r2, r2, r3, lsl #8
+ orr r2, r2, rb, lsl #8
#else
- orr r2, r3, r2, lsl #8
+ orr r2, rb, r2, lsl #8
#endif
mov r0, #0
mov pc, lr
+ENDPROC(__get_user_2)
- .global __get_user_4
-__get_user_4:
-4: ldrt r2, [r0]
- mov r0, #0
- mov pc, lr
-
- .global __get_user_8
-__get_user_8:
-5: ldrt r2, [r0], #4
-6: ldrt r3, [r0]
+ENTRY(__get_user_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad
+4: TUSER(ldr) r2, [r0]
mov r0, #0
mov pc, lr
+ENDPROC(__get_user_4)
-__get_user_bad_8:
- mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
+ENDPROC(__get_user_bad)
-.section __ex_table, "a"
+.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
- .long 5b, __get_user_bad_8
- .long 6b, __get_user_bad_8
-.previous
+.popsection
diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S
index 3aacd01d40e..69719bad674 100644
--- a/arch/arm/lib/io-acorn.S
+++ b/arch/arm/lib/io-acorn.S
@@ -11,14 +11,14 @@
*
*/
#include <linux/linkage.h>
+#include <linux/kern_levels.h>
#include <asm/assembler.h>
-#include <asm/hardware.h>
.text
.align
-.iosl_warning:
- .ascii "<4>insl/outsl not implemented, called from %08lX\0"
+.Liosl_warning:
+ .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0"
.align
/*
@@ -27,6 +27,6 @@
*/
ENTRY(insl)
ENTRY(outsl)
- adr r0, .iosl_warning
+ adr r0, .Liosl_warning
mov r1, lr
b printk
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 081ef749298..9f4238987fe 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -10,7 +10,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
-.insb_align: rsb ip, ip, #4
+.Linsb_align: rsb ip, ip, #4
cmp ip, r2
movgt ip, r2
cmp ip, #2
@@ -21,20 +21,20 @@
ldrgtb r3, [r0]
strgtb r3, [r1], #1
subs r2, r2, ip
- bne .insb_aligned
+ bne .Linsb_aligned
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
ands ip, r1, #3
- bne .insb_align
+ bne .Linsb_align
-.insb_aligned: stmfd sp!, {r4 - r6, lr}
+.Linsb_aligned: stmfd sp!, {r4 - r6, lr}
subs r2, r2, #16
- bmi .insb_no_16
+ bmi .Linsb_no_16
-.insb_16_lp: ldrb r3, [r0]
+.Linsb_16_lp: ldrb r3, [r0]
ldrb r4, [r0]
ldrb r5, [r0]
mov r3, r3, put_byte_0
@@ -69,13 +69,13 @@ ENTRY(__raw_readsb)
stmia r1!, {r3 - r6}
subs r2, r2, #16
- bpl .insb_16_lp
+ bpl .Linsb_16_lp
tst r2, #15
- LOADREGS(eqfd, sp!, {r4 - r6, pc})
+ ldmeqfd sp!, {r4 - r6, pc}
-.insb_no_16: tst r2, #8
- beq .insb_no_8
+.Linsb_no_16: tst r2, #8
+ beq .Linsb_no_8
ldrb r3, [r0]
ldrb r4, [r0]
@@ -95,8 +95,8 @@ ENTRY(__raw_readsb)
orr r4, r4, ip, put_byte_3
stmia r1!, {r3, r4}
-.insb_no_8: tst r2, #4
- beq .insb_no_4
+.Linsb_no_8: tst r2, #4
+ beq .Linsb_no_4
ldrb r3, [r0]
ldrb r4, [r0]
@@ -108,8 +108,8 @@ ENTRY(__raw_readsb)
orr r3, r3, r6, put_byte_3
str r3, [r1], #4
-.insb_no_4: ands r2, r2, #3
- LOADREGS(eqfd, sp!, {r4 - r6, pc})
+.Linsb_no_4: ands r2, r2, #3
+ ldmeqfd sp!, {r4 - r6, pc}
cmp r2, #2
ldrb r3, [r0]
@@ -119,4 +119,5 @@ ENTRY(__raw_readsb)
ldrgtb r3, [r0]
strgtb r3, [r1]
- LOADREGS(fd, sp!, {r4 - r6, pc})
+ ldmfd sp!, {r4 - r6, pc}
+ENDPROC(__raw_readsb)
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 75a9121cb23..7a7430950c7 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
strb ip, [r1], #1
4: subs r2, r2, #1
- mov ip, r3, pull #24
+ mov ip, r3, lspull #24
ldrne r3, [r0]
- orrne ip, ip, r3, push #8
+ orrne ip, ip, r3, lspush #8
strne ip, [r1], #4
bne 4b
b 8f
5: subs r2, r2, #1
- mov ip, r3, pull #16
+ mov ip, r3, lspull #16
ldrne r3, [r0]
- orrne ip, ip, r3, push #16
+ orrne ip, ip, r3, lspush #16
strne ip, [r1], #4
bne 5b
b 7f
6: subs r2, r2, #1
- mov ip, r3, pull #8
+ mov ip, r3, lspull #8
ldrne r3, [r0]
- orrne ip, ip, r3, push #24
+ orrne ip, ip, r3, lspush #24
strne ip, [r1], #4
bne 6b
@@ -76,3 +76,4 @@ ENTRY(__raw_readsl)
8: mov r3, ip, get_byte_0
strb r3, [r1, #0]
mov pc, lr
+ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 476cf7f8a63..88487c8c4f2 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -9,18 +9,17 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <asm/hardware.h>
-.insw_bad_alignment:
- adr r0, .insw_bad_align_msg
+.Linsw_bad_alignment:
+ adr r0, .Linsw_bad_align_msg
mov r2, lr
b panic
-.insw_bad_align_msg:
+.Linsw_bad_align_msg:
.asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
.align
-.insw_align: tst r1, #1
- bne .insw_bad_alignment
+.Linsw_align: tst r1, #1
+ bne .Linsw_bad_alignment
ldr r3, [r0]
strb r3, [r1], #1
@@ -28,22 +27,22 @@
strb r3, [r1], #1
subs r2, r2, #1
- RETINSTR(moveq, pc, lr)
+ moveq pc, lr
ENTRY(__raw_readsw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
tst r1, #3
- bne .insw_align
+ bne .Linsw_align
-.insw_aligned: mov ip, #0xff
+.Linsw_aligned: mov ip, #0xff
orr ip, ip, ip, lsl #8
stmfd sp!, {r4, r5, r6, lr}
subs r2, r2, #8
- bmi .no_insw_8
+ bmi .Lno_insw_8
-.insw_8_lp: ldr r3, [r0]
+.Linsw_8_lp: ldr r3, [r0]
and r3, r3, ip
ldr r4, [r0]
orr r3, r3, r4, lsl #16
@@ -66,13 +65,13 @@ ENTRY(__raw_readsw)
stmia r1!, {r3 - r6}
subs r2, r2, #8
- bpl .insw_8_lp
+ bpl .Linsw_8_lp
tst r2, #7
- LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+ ldmeqfd sp!, {r4, r5, r6, pc}
-.no_insw_8: tst r2, #4
- beq .no_insw_4
+.Lno_insw_8: tst r2, #4
+ beq .Lno_insw_4
ldr r3, [r0]
and r3, r3, ip
@@ -86,8 +85,8 @@ ENTRY(__raw_readsw)
stmia r1!, {r3, r4}
-.no_insw_4: tst r2, #2
- beq .no_insw_2
+.Lno_insw_4: tst r2, #2
+ beq .Lno_insw_2
ldr r3, [r0]
and r3, r3, ip
@@ -96,12 +95,12 @@ ENTRY(__raw_readsw)
str r3, [r1], #4
-.no_insw_2: tst r2, #1
+.Lno_insw_2: tst r2, #1
ldrne r3, [r0]
strneb r3, [r1], #1
movne r3, r3, lsr #8
strneb r3, [r1]
- LOADREGS(fd, sp!, {r4, r5, r6, pc})
+ ldmfd sp!, {r4, r5, r6, pc}
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index c92b66ecbe8..1f393d42593 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -18,8 +18,8 @@
#endif
.endm
-.insw_align: movs ip, r1, lsl #31
- bne .insw_noalign
+.Linsw_align: movs ip, r1, lsl #31
+ bne .Linsw_noalign
ldrh ip, [r0]
sub r2, r2, #1
strh ip, [r1], #2
@@ -28,14 +28,14 @@ ENTRY(__raw_readsw)
teq r2, #0
moveq pc, lr
tst r1, #3
- bne .insw_align
+ bne .Linsw_align
stmfd sp!, {r4, r5, lr}
subs r2, r2, #8
- bmi .no_insw_8
+ bmi .Lno_insw_8
-.insw_8_lp: ldrh r3, [r0]
+.Linsw_8_lp: ldrh r3, [r0]
ldrh r4, [r0]
pack r3, r3, r4
@@ -53,10 +53,10 @@ ENTRY(__raw_readsw)
subs r2, r2, #8
stmia r1!, {r3 - r5, ip}
- bpl .insw_8_lp
+ bpl .Linsw_8_lp
-.no_insw_8: tst r2, #4
- beq .no_insw_4
+.Lno_insw_8: tst r2, #4
+ beq .Lno_insw_4
ldrh r3, [r0]
ldrh r4, [r0]
@@ -68,15 +68,15 @@ ENTRY(__raw_readsw)
stmia r1!, {r3, r4}
-.no_insw_4: movs r2, r2, lsl #31
- bcc .no_insw_2
+.Lno_insw_4: movs r2, r2, lsl #31
+ bcc .Lno_insw_2
ldrh r3, [r0]
ldrh ip, [r0]
pack r3, r3, ip
str r3, [r1], #4
-.no_insw_2: ldrneh r3, [r0]
+.Lno_insw_2: ldrneh r3, [r0]
strneh r3, [r1]
ldmfd sp!, {r4, r5, pc}
@@ -93,7 +93,7 @@ ENTRY(__raw_readsw)
#define pull_hbyte1 lsr #8
#endif
-.insw_noalign: stmfd sp!, {r4, lr}
+.Linsw_noalign: stmfd sp!, {r4, lr}
ldrccb ip, [r1, #-1]!
bcc 1f
@@ -128,3 +128,4 @@ ENTRY(__raw_readsw)
_BE_ONLY_( movne ip, ip, lsr #24 )
strneb ip, [r1]
ldmfd sp!, {r4, pc}
+ENDPROC(__raw_readsw)
diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c
deleted file mode 100644
index 824253948f5..00000000000
--- a/arch/arm/lib/io-shark.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * linux/arch/arm/lib/io-shark.c
- *
- * by Alexander Schulz
- *
- * derived from:
- * linux/arch/arm/lib/io-ebsa.S
- * Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 70b2561bdb0..68b92f4acae 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -30,7 +30,7 @@
#endif
.endm
-.outsb_align: rsb ip, ip, #4
+.Loutsb_align: rsb ip, ip, #4
cmp ip, r2
movgt ip, r2
cmp ip, #2
@@ -41,45 +41,46 @@
ldrgtb r3, [r1], #1
strgtb r3, [r0]
subs r2, r2, ip
- bne .outsb_aligned
+ bne .Loutsb_aligned
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
ands ip, r1, #3
- bne .outsb_align
+ bne .Loutsb_align
-.outsb_aligned: stmfd sp!, {r4, r5, lr}
+.Loutsb_aligned:
+ stmfd sp!, {r4, r5, lr}
subs r2, r2, #16
- bmi .outsb_no_16
+ bmi .Loutsb_no_16
-.outsb_16_lp: ldmia r1!, {r3, r4, r5, ip}
+.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip}
outword r3
outword r4
outword r5
outword ip
subs r2, r2, #16
- bpl .outsb_16_lp
+ bpl .Loutsb_16_lp
tst r2, #15
- LOADREGS(eqfd, sp!, {r4, r5, pc})
+ ldmeqfd sp!, {r4, r5, pc}
-.outsb_no_16: tst r2, #8
- beq .outsb_no_8
+.Loutsb_no_16: tst r2, #8
+ beq .Loutsb_no_8
ldmia r1!, {r3, r4}
outword r3
outword r4
-.outsb_no_8: tst r2, #4
- beq .outsb_no_4
+.Loutsb_no_8: tst r2, #4
+ beq .Loutsb_no_4
ldr r3, [r1], #4
outword r3
-.outsb_no_4: ands r2, r2, #3
- LOADREGS(eqfd, sp!, {r4, r5, pc})
+.Loutsb_no_4: ands r2, r2, #3
+ ldmeqfd sp!, {r4, r5, pc}
cmp r2, #2
ldrb r3, [r1], #1
@@ -89,4 +90,5 @@ ENTRY(__raw_writesb)
ldrgtb r3, [r1]
strgtb r3, [r0]
- LOADREGS(fd, sp!, {r4, r5, pc})
+ ldmfd sp!, {r4, r5, pc}
+ENDPROC(__raw_writesb)
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index f8f14dd227c..d0d104a0dd1 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,27 @@ ENTRY(__raw_writesl)
blt 5f
bgt 6f
-4: mov ip, r3, pull #16
+4: mov ip, r3, lspull #16
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #16
+ orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
mov pc, lr
-5: mov ip, r3, pull #8
+5: mov ip, r3, lspull #8
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #24
+ orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
mov pc, lr
-6: mov ip, r3, pull #24
+6: mov ip, r3, lspull #24
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #8
+ orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
mov pc, lr
+ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index 950e7e310f1..49b800419e3 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -9,18 +9,17 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <asm/hardware.h>
-.outsw_bad_alignment:
- adr r0, .outsw_bad_align_msg
+.Loutsw_bad_alignment:
+ adr r0, .Loutsw_bad_align_msg
mov r2, lr
b panic
-.outsw_bad_align_msg:
+.Loutsw_bad_align_msg:
.asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
.align
-.outsw_align: tst r1, #1
- bne .outsw_bad_alignment
+.Loutsw_align: tst r1, #1
+ bne .Loutsw_bad_alignment
add r1, r1, #2
@@ -29,20 +28,20 @@
orr r3, r3, r3, lsl #16
str r3, [r0]
subs r2, r2, #1
- RETINSTR(moveq, pc, lr)
+ moveq pc, lr
ENTRY(__raw_writesw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
tst r1, #3
- bne .outsw_align
+ bne .Loutsw_align
-.outsw_aligned: stmfd sp!, {r4, r5, r6, lr}
+ stmfd sp!, {r4, r5, r6, lr}
subs r2, r2, #8
- bmi .no_outsw_8
+ bmi .Lno_outsw_8
-.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6}
+.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6}
mov ip, r3, lsl #16
orr ip, ip, ip, lsr #16
@@ -77,13 +76,13 @@ ENTRY(__raw_writesw)
str ip, [r0]
subs r2, r2, #8
- bpl .outsw_8_lp
+ bpl .Loutsw_8_lp
tst r2, #7
- LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+ ldmeqfd sp!, {r4, r5, r6, pc}
-.no_outsw_8: tst r2, #4
- beq .no_outsw_4
+.Lno_outsw_8: tst r2, #4
+ beq .Lno_outsw_4
ldmia r1!, {r3, r4}
@@ -103,8 +102,8 @@ ENTRY(__raw_writesw)
orr ip, ip, ip, lsl #16
str ip, [r0]
-.no_outsw_4: tst r2, #2
- beq .no_outsw_2
+.Lno_outsw_4: tst r2, #2
+ beq .Lno_outsw_2
ldr r3, [r1], #4
@@ -116,7 +115,7 @@ ENTRY(__raw_writesw)
orr ip, ip, ip, lsl #16
str ip, [r0]
-.no_outsw_2: tst r2, #1
+.Lno_outsw_2: tst r2, #1
ldrne r3, [r1]
@@ -124,4 +123,4 @@ ENTRY(__raw_writesw)
orrne ip, ip, ip, lsr #16
strne ip, [r0]
- LOADREGS(fd, sp!, {r4, r5, r6, pc})
+ ldmfd sp!, {r4, r5, r6, pc}
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index 5e240e452af..ff4f71b579e 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -22,8 +22,8 @@
#endif
.endm
-.outsw_align: movs ip, r1, lsl #31
- bne .outsw_noalign
+.Loutsw_align: movs ip, r1, lsl #31
+ bne .Loutsw_noalign
ldrh r3, [r1], #2
sub r2, r2, #1
@@ -33,35 +33,35 @@ ENTRY(__raw_writesw)
teq r2, #0
moveq pc, lr
ands r3, r1, #3
- bne .outsw_align
+ bne .Loutsw_align
stmfd sp!, {r4, r5, lr}
subs r2, r2, #8
- bmi .no_outsw_8
+ bmi .Lno_outsw_8
-.outsw_8_lp: ldmia r1!, {r3, r4, r5, ip}
+.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip}
subs r2, r2, #8
outword r3
outword r4
outword r5
outword ip
- bpl .outsw_8_lp
+ bpl .Loutsw_8_lp
-.no_outsw_8: tst r2, #4
- beq .no_outsw_4
+.Lno_outsw_8: tst r2, #4
+ beq .Lno_outsw_4
ldmia r1!, {r3, ip}
outword r3
outword ip
-.no_outsw_4: movs r2, r2, lsl #31
- bcc .no_outsw_2
+.Lno_outsw_4: movs r2, r2, lsl #31
+ bcc .Lno_outsw_2
ldr r3, [r1], #4
outword r3
-.no_outsw_2: ldrneh r3, [r1]
+.Lno_outsw_2: ldrneh r3, [r1]
strneh r3, [r0]
ldmfd sp!, {r4, r5, pc}
@@ -74,7 +74,11 @@ ENTRY(__raw_writesw)
#define push_hbyte1 lsl #8
#endif
-.outsw_noalign: ldr r3, [r1, -r3]!
+.Loutsw_noalign:
+ ARM( ldr r3, [r1, -r3]! )
+ THUMB( rsb r3, r3, #0 )
+ THUMB( ldr r3, [r1, r3] )
+ THUMB( sub r1, r3 )
subcs r2, r2, #1
bcs 2f
subs r2, r2, #2
@@ -93,3 +97,4 @@ ENTRY(__raw_writesw)
3: movne ip, r3, lsr #8
strneh ip, [r0]
mov pc, lr
+ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 59026029d01..c562f649734 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -1,7 +1,7 @@
/*
* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
*
- * Author: Nicolas Pitre <nico@cam.org>
+ * Author: Nicolas Pitre <nico@fluxnic.net>
* - contributed to gcc-3.4 on Sep 30, 2003
* - adapted for the Linux kernel on Oct 2, 2003
*/
@@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
-
+#include <asm/unwind.h>
.macro ARM_DIV_BODY dividend, divisor, result, curbit
@@ -206,6 +206,8 @@ Boston, MA 02111-1307, USA. */
ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
subs r2, r1, #1
moveq pc, lr
@@ -229,8 +231,12 @@ ENTRY(__udivsi3)
mov r0, r0, lsr r2
mov pc, lr
+UNWIND(.fnend)
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
ENTRY(__umodsi3)
+UNWIND(.fnstart)
subs r2, r1, #1 @ compare divisor with 1
bcc Ldiv0
@@ -244,8 +250,12 @@ ENTRY(__umodsi3)
mov pc, lr
+UNWIND(.fnend)
+ENDPROC(__umodsi3)
ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
@@ -282,8 +292,12 @@ ENTRY(__divsi3)
rsbmi r0, r0, #0
mov pc, lr
+UNWIND(.fnend)
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
ENTRY(__modsi3)
+UNWIND(.fnstart)
cmp r1, #0
beq Ldiv0
@@ -303,12 +317,47 @@ ENTRY(__modsi3)
rsbmi r0, r0, #0
mov pc, lr
+UNWIND(.fnend)
+ENDPROC(__modsi3)
-Ldiv0:
+#ifdef CONFIG_AEABI
- str lr, [sp, #-4]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #4
+ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
+UNWIND(.fnend)
+ENDPROC(__aeabi_idivmod)
+
+#endif
+
+Ldiv0:
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+ str lr, [sp, #-8]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #8
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index 46c2ed19ec9..f83d449141f 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */
#endif
ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
subs r3, r2, #32
rsb ip, r2, #32
movmi al, al, lsr r2
movpl al, ah, lsr r3
- orrmi al, al, ah, lsl ip
+ ARM( orrmi al, al, ah, lsl ip )
+ THUMB( lslmi r3, ah, ip )
+ THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
mov pc, lr
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index ac34fe55d21..1da86991d70 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -22,4 +22,5 @@ ENTRY(memchr)
bne 1b
sub r0, r0, #1
2: movne r0, #0
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7e71d6708a8..a9b9e2287a0 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -13,8 +13,11 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#define LDR1W_SHIFT 0
+#define STR1W_SHIFT 0
+
.macro ldr1w ptr reg abort
- ldr \reg, [\ptr], #4
+ W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -30,7 +33,7 @@
.endm
.macro str1w ptr reg abort
- str \reg, [\ptr], #4
+ W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -57,3 +60,4 @@ ENTRY(memcpy)
#include "copy_template.S"
+ENDPROC(memcpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index ef7fddc14ac..d1fc0c0c342 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -13,14 +13,6 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do. That might be different in the future.
- */
-//#define CALGN(code...) code
-#define CALGN(code...)
-
.text
/*
@@ -55,11 +47,12 @@ ENTRY(memmove)
stmfd sp!, {r5 - r8}
blt 5f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+ CALGN( rsb ip, ip, #32 )
CALGN( add pc, r4, ip )
PLD( pld [r1, #-4] )
@@ -81,25 +74,25 @@ ENTRY(memmove)
rsb ip, ip, #32
addne pc, pc, ip @ C is always clear here
b 7f
-6: nop
- ldr r3, [r1, #-4]!
- ldr r4, [r1, #-4]!
- ldr r5, [r1, #-4]!
- ldr r6, [r1, #-4]!
- ldr r7, [r1, #-4]!
- ldr r8, [r1, #-4]!
- ldr lr, [r1, #-4]!
+6: W(nop)
+ W(ldr) r3, [r1, #-4]!
+ W(ldr) r4, [r1, #-4]!
+ W(ldr) r5, [r1, #-4]!
+ W(ldr) r6, [r1, #-4]!
+ W(ldr) r7, [r1, #-4]!
+ W(ldr) r8, [r1, #-4]!
+ W(ldr) lr, [r1, #-4]!
add pc, pc, ip
nop
- nop
- str r3, [r0, #-4]!
- str r4, [r0, #-4]!
- str r5, [r0, #-4]!
- str r6, [r0, #-4]!
- str r7, [r0, #-4]!
- str r8, [r0, #-4]!
- str lr, [r0, #-4]!
+ W(nop)
+ W(str) r3, [r0, #-4]!
+ W(str) r4, [r0, #-4]!
+ W(str) r5, [r0, #-4]!
+ W(str) r6, [r0, #-4]!
+ W(str) r7, [r0, #-4]!
+ W(str) r8, [r0, #-4]!
+ W(str) lr, [r0, #-4]!
CALGN( bcs 2b )
@@ -138,8 +131,7 @@ ENTRY(memmove)
subs r2, r2, #28
blt 14f
- CALGN( ands ip, r1, #31 )
- CALGN( rsb ip, ip, #32 )
+ CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
@@ -155,24 +147,24 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, lspush #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, lspull #\pull
+ mov ip, ip, lspush #\push
+ orr ip, ip, r9, lspull #\pull
+ mov r9, r9, lspush #\push
+ orr r9, r9, r8, lspull #\pull
+ mov r8, r8, lspush #\push
+ orr r8, r8, r7, lspull #\pull
+ mov r7, r7, lspush #\push
+ orr r7, r7, r6, lspull #\pull
+ mov r6, r6, lspush #\push
+ orr r6, r6, r5, lspull #\pull
+ mov r5, r5, lspush #\push
+ orr r5, r5, r4, lspull #\pull
+ mov r4, r4, lspush #\push
+ orr r4, r4, r3, lspull #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
@@ -183,10 +175,10 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, lspush #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, lspull #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
@@ -204,3 +196,4 @@ ENTRY(memmove)
18: backward_copy_shift push=24 pull=8
+ENDPROC(memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index a1795f59993..94b0650ea98 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,67 +14,110 @@
.text
.align 5
- .word 0
-
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [r0], #1 @ 1
- strleb r1, [r0], #1 @ 1
- strb r1, [r0], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memzero again.
- */
ENTRY(memset)
ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
/*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
*/
- orr r1, r1, r1, lsl #8
+1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #16
blt 4f
+
+#if ! CALGN(1)+0
+
/*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
*/
- str lr, [sp, #-4]!
- mov ip, r1
+ stmfd sp!, {r8, lr}
+ mov r8, r1
mov lr, r1
2: subs r2, r2, #64
- stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
bgt 2b
- LOADREGS(eqfd, sp!, {pc}) @ Now <64 bytes to go.
+ ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
- stmneia r0!, {r1, r3, ip, lr}
- stmneia r0!, {r1, r3, ip, lr}
+ stmneia ip!, {r1, r3, r8, lr}
+ stmneia ip!, {r1, r3, r8, lr}
tst r2, #16
- stmneia r0!, {r1, r3, ip, lr}
- ldr lr, [sp], #4
+ stmneia ip!, {r1, r3, r8, lr}
+ ldmfd sp!, {r8, lr}
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r8, lr}
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r7, r1
+ mov r8, r1
+ mov lr, r1
+
+ cmp r2, #96
+ tstgt ip, #31
+ ble 3f
+
+ and r8, ip, #31
+ rsb r8, r8, #32
+ sub r2, r2, r8
+ movs r8, r8, lsl #(32 - 4)
+ stmcsia ip!, {r4, r5, r6, r7}
+ stmmiia ip!, {r4, r5}
+ tst r8, #(1 << 30)
+ mov r8, r1
+ strne r1, [ip], #4
+
+3: subs r2, r2, #64
+ stmgeia ip!, {r1, r3-r8, lr}
+ stmgeia ip!, {r1, r3-r8, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r8, pc}
+
+ tst r2, #32
+ stmneia ip!, {r1, r3-r8, lr}
+ tst r2, #16
+ stmneia ip!, {r4-r7}
+ ldmfd sp!, {r4-r8, lr}
+
+#endif
4: tst r2, #8
- stmneia r0!, {r1, r3}
+ stmneia ip!, {r1, r3}
tst r2, #4
- strne r1, [r0], #4
+ strne r1, [ip], #4
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
- strneb r1, [r0], #1
- strneb r1, [r0], #1
+ strneb r1, [ip], #1
+ strneb r1, [ip], #1
tst r2, #1
- strneb r1, [r0], #1
- RETINSTR(mov,pc,lr)
+ strneb r1, [ip], #1
+ mov pc, lr
+
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5b @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b
+ENDPROC(memset)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 51ccc60160f..3fbdef5f802 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -39,6 +39,9 @@ ENTRY(__memzero)
*/
cmp r1, #16 @ 1 we can skip this chunk if we
blt 4f @ 1 have < 16 bytes
+
+#if ! CALGN(1)+0
+
/*
* We need an extra register for this loop - save the return address and
* use the LR
@@ -53,7 +56,7 @@ ENTRY(__memzero)
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
bgt 3b @ 1
- LOADREGS(eqfd, sp!, {pc}) @ 1/2 quick exit
+ ldmeqfd sp!, {pc} @ 1/2 quick exit
/*
* No need to correct the count; we're only testing bits from now on
*/
@@ -64,6 +67,47 @@ ENTRY(__memzero)
stmneia r0!, {r2, r3, ip, lr} @ 4
ldr lr, [sp], #4 @ 1
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r2
+ mov r5, r2
+ mov r6, r2
+ mov r7, r2
+ mov ip, r2
+ mov lr, r2
+
+ cmp r1, #96
+ andgts ip, r0, #31
+ ble 3f
+
+ rsb ip, ip, #32
+ sub r1, r1, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ movs ip, ip, lsl #2
+ strcs r2, [r0], #4
+
+3: subs r1, r1, #64
+ stmgeia r0!, {r2-r7, ip, lr}
+ stmgeia r0!, {r2-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
+
+ tst r1, #32
+ stmneia r0!, {r2-r7, ip, lr}
+ tst r1, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
+
+#endif
+
4: tst r1, #8 @ 1 8 bytes or more?
stmneia r0!, {r2, r3} @ 2
tst r1, #4 @ 1 4 bytes or more?
@@ -77,4 +121,5 @@ ENTRY(__memzero)
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
strneb r2, [r0], #1 @ 1
- RETINSTR(mov,pc,lr) @ 1
+ mov pc, lr @ 1
+ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index c7fbdf00531..36c91b4957e 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -25,11 +25,12 @@
#endif
ENTRY(__muldi3)
+ENTRY(__aeabi_lmul)
mul xh, yl, xh
mla xh, xl, yh, xh
- mov ip, xl, asr #16
- mov yh, yl, asr #16
+ mov ip, xl, lsr #16
+ mov yh, yl, lsr #16
bic xl, xl, ip, lsl #16
bic yl, yl, yh, lsl #16
mla xh, yh, ip, xh
@@ -42,3 +43,5 @@ ENTRY(__muldi3)
adc xh, xh, ip, lsr #16
mov pc, lr
+ENDPROC(__muldi3)
+ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 4593e9c07f0..3d73dcb959b 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -16,61 +16,83 @@
* __put_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* r2, r3 contains the value
* Outputs: r0 is the error code
* lr corrupted
*
- * No other registers must be altered. (see include/asm-arm/uaccess.h
+ * No other registers must be altered. (see <asm/uaccess.h>
* for specific ASM register usage).
*
* Note that ADDR_LIMIT is either 0 or 0xc0000000
* Note also that it is intended that __put_user_bad is not global.
*/
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
- .global __put_user_1
-__put_user_1:
-1: strbt r2, [r0]
+ENTRY(__put_user_1)
+ check_uaccess r0, 1, r1, ip, __put_user_bad
+1: TUSER(strb) r2, [r0]
mov r0, #0
mov pc, lr
+ENDPROC(__put_user_1)
- .global __put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
+ check_uaccess r0, 2, r1, ip, __put_user_bad
mov ip, r2, lsr #8
+#ifdef CONFIG_THUMB2_KERNEL
#ifndef __ARMEB__
-2: strbt r2, [r0], #1
-3: strbt ip, [r0]
+2: TUSER(strb) r2, [r0]
+3: TUSER(strb) ip, [r0, #1]
#else
-2: strbt ip, [r0], #1
-3: strbt r2, [r0]
+2: TUSER(strb) ip, [r0]
+3: TUSER(strb) r2, [r0, #1]
#endif
+#else /* !CONFIG_THUMB2_KERNEL */
+#ifndef __ARMEB__
+2: TUSER(strb) r2, [r0], #1
+3: TUSER(strb) ip, [r0]
+#else
+2: TUSER(strb) ip, [r0], #1
+3: TUSER(strb) r2, [r0]
+#endif
+#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
mov pc, lr
+ENDPROC(__put_user_2)
- .global __put_user_4
-__put_user_4:
-4: strt r2, [r0]
+ENTRY(__put_user_4)
+ check_uaccess r0, 4, r1, ip, __put_user_bad
+4: TUSER(str) r2, [r0]
mov r0, #0
mov pc, lr
+ENDPROC(__put_user_4)
- .global __put_user_8
-__put_user_8:
-5: strt r2, [r0], #4
-6: strt r3, [r0]
+ENTRY(__put_user_8)
+ check_uaccess r0, 8, r1, ip, __put_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(str) r2, [r0]
+6: TUSER(str) r3, [r0, #4]
+#else
+5: TUSER(str) r2, [r0], #4
+6: TUSER(str) r3, [r0]
+#endif
mov r0, #0
mov pc, lr
+ENDPROC(__put_user_8)
__put_user_bad:
mov r0, #-EFAULT
mov pc, lr
+ENDPROC(__put_user_bad)
-.section __ex_table, "a"
+.pushsection __ex_table, "a"
.long 1b, __put_user_bad
.long 2b, __put_user_bad
.long 3b, __put_user_bad
.long 4b, __put_user_bad
.long 5b, __put_user_bad
.long 6b, __put_user_bad
-.previous
+.popsection
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
index 83bc23d5b03..618fedae4b3 100644
--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -12,11 +12,4 @@
#include "bitops.h"
.text
-/*
- * Purpose : Function to set a bit
- * Prototype: int set_bit(int bit, void *addr)
- */
-ENTRY(_set_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_set_bit_le)
- bitop orr
+bitop _set_bit, orr
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
deleted file mode 100644
index ff6ece487ff..00000000000
--- a/arch/arm/lib/sha1.S
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * linux/arch/arm/lib/sha1.S
- *
- * SHA transform optimized for ARM
- *
- * Copyright: (C) 2005 by Nicolas Pitre <nico@cam.org>
- * Created: September 17, 2005
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * The reference implementation for this code is linux/lib/sha1.c
- */
-
-#include <linux/linkage.h>
-
- .text
-
-
-/*
- * void sha_transform(__u32 *digest, const char *in, __u32 *W)
- *
- * Note: the "in" ptr may be unaligned.
- */
-
-ENTRY(sha_transform)
-
- stmfd sp!, {r4 - r8, lr}
-
- @ for (i = 0; i < 16; i++)
- @ W[i] = be32_to_cpu(in[i]); */
-
-#ifdef __ARMEB__
- mov r4, r0
- mov r0, r2
- mov r2, #64
- bl memcpy
- mov r2, r0
- mov r0, r4
-#else
- mov r3, r2
- mov lr, #16
-1: ldrb r4, [r1], #1
- ldrb r5, [r1], #1
- ldrb r6, [r1], #1
- ldrb r7, [r1], #1
- subs lr, lr, #1
- orr r5, r5, r4, lsl #8
- orr r6, r6, r5, lsl #8
- orr r7, r7, r6, lsl #8
- str r7, [r3], #4
- bne 1b
-#endif
-
- @ for (i = 0; i < 64; i++)
- @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
-
- sub r3, r2, #4
- mov lr, #64
-2: ldr r4, [r3, #4]!
- subs lr, lr, #1
- ldr r5, [r3, #8]
- ldr r6, [r3, #32]
- ldr r7, [r3, #52]
- eor r4, r4, r5
- eor r4, r4, r6
- eor r4, r4, r7
- mov r4, r4, ror #31
- str r4, [r3, #64]
- bne 2b
-
- /*
- * The SHA functions are:
- *
- * f1(B,C,D) = (D ^ (B & (C ^ D)))
- * f2(B,C,D) = (B ^ C ^ D)
- * f3(B,C,D) = ((B & C) | (D & (B | C)))
- *
- * Then the sub-blocks are processed as follows:
- *
- * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
- * B' = A
- * C' = ror(B, 2)
- * D' = C
- * E' = D
- *
- * We therefore unroll each loop 5 times to avoid register shuffling.
- * Also the ror for C (and also D and E which are successivelyderived
- * from it) is applied in place to cut on an additional mov insn for
- * each round.
- */
-
- .macro sha_f1, A, B, C, D, E
- ldr r3, [r2], #4
- eor ip, \C, \D
- add \E, r1, \E, ror #2
- and ip, \B, ip, ror #2
- add \E, \E, \A, ror #27
- eor ip, ip, \D, ror #2
- add \E, \E, r3
- add \E, \E, ip
- .endm
-
- .macro sha_f2, A, B, C, D, E
- ldr r3, [r2], #4
- add \E, r1, \E, ror #2
- eor ip, \B, \C, ror #2
- add \E, \E, \A, ror #27
- eor ip, ip, \D, ror #2
- add \E, \E, r3
- add \E, \E, ip
- .endm
-
- .macro sha_f3, A, B, C, D, E
- ldr r3, [r2], #4
- add \E, r1, \E, ror #2
- orr ip, \B, \C, ror #2
- add \E, \E, \A, ror #27
- and ip, ip, \D, ror #2
- add \E, \E, r3
- and r3, \B, \C, ror #2
- orr ip, ip, r3
- add \E, \E, ip
- .endm
-
- ldmia r0, {r4 - r8}
-
- mov lr, #4
- ldr r1, .L_sha_K + 0
-
- /* adjust initial values */
- mov r6, r6, ror #30
- mov r7, r7, ror #30
- mov r8, r8, ror #30
-
-3: subs lr, lr, #1
- sha_f1 r4, r5, r6, r7, r8
- sha_f1 r8, r4, r5, r6, r7
- sha_f1 r7, r8, r4, r5, r6
- sha_f1 r6, r7, r8, r4, r5
- sha_f1 r5, r6, r7, r8, r4
- bne 3b
-
- ldr r1, .L_sha_K + 4
- mov lr, #4
-
-4: subs lr, lr, #1
- sha_f2 r4, r5, r6, r7, r8
- sha_f2 r8, r4, r5, r6, r7
- sha_f2 r7, r8, r4, r5, r6
- sha_f2 r6, r7, r8, r4, r5
- sha_f2 r5, r6, r7, r8, r4
- bne 4b
-
- ldr r1, .L_sha_K + 8
- mov lr, #4
-
-5: subs lr, lr, #1
- sha_f3 r4, r5, r6, r7, r8
- sha_f3 r8, r4, r5, r6, r7
- sha_f3 r7, r8, r4, r5, r6
- sha_f3 r6, r7, r8, r4, r5
- sha_f3 r5, r6, r7, r8, r4
- bne 5b
-
- ldr r1, .L_sha_K + 12
- mov lr, #4
-
-6: subs lr, lr, #1
- sha_f2 r4, r5, r6, r7, r8
- sha_f2 r8, r4, r5, r6, r7
- sha_f2 r7, r8, r4, r5, r6
- sha_f2 r6, r7, r8, r4, r5
- sha_f2 r5, r6, r7, r8, r4
- bne 6b
-
- ldmia r0, {r1, r2, r3, ip, lr}
- add r4, r1, r4
- add r5, r2, r5
- add r6, r3, r6, ror #2
- add r7, ip, r7, ror #2
- add r8, lr, r8, ror #2
- stmia r0, {r4 - r8}
-
- ldmfd sp!, {r4 - r8, pc}
-
-.L_sha_K:
- .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
-
-
-/*
- * void sha_init(__u32 *buf)
- */
-
-.L_sha_initial_digest:
- .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
-
-ENTRY(sha_init)
-
- str lr, [sp, #-4]!
- adr r1, .L_sha_initial_digest
- ldmia r1, {r1, r2, r3, ip, lr}
- stmia r0, {r1, r2, r3, ip, lr}
- ldr pc, [sp], #4
-
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index 5b9b493733f..d8f2a1c1aea 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -23,4 +23,5 @@ ENTRY(strchr)
teq r2, r1
movne r0, #0
subeq r0, r0, #1
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(strchr)
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
deleted file mode 100644
index 629cc877527..00000000000
--- a/arch/arm/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * linux/arch/arm/lib/strncpy_from_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
- .text
- .align 5
-
-/*
- * Copy a string from user space to kernel space.
- * r0 = dst, r1 = src, r2 = byte length
- * returns the number of characters copied (strlen of copied string),
- * -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__arch_strncpy_from_user)
- save_lr
- mov ip, r1
-1: subs r2, r2, #1
-USER( ldrplbt r3, [r1], #1)
- bmi 2f
- strb r3, [r0], #1
- teq r3, #0
- bne 1b
- sub r1, r1, #1 @ take NUL character out of count
-2: sub r0, r1, ip
- restore_pc
-
- .section .fixup,"ax"
- .align 0
-9001: mov r3, #0
- strb r3, [r0, #0] @ null terminate
- mov r0, #-EFAULT
- restore_pc
- .previous
-
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
deleted file mode 100644
index 67bcd826812..00000000000
--- a/arch/arm/lib/strnlen_user.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * linux/arch/arm/lib/strnlen_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
- .text
- .align 5
-
-/* Prototype: unsigned long __arch_strnlen_user(const char *str, long n)
- * Purpose : get length of a string in user memory
- * Params : str - address of string in user memory
- * Returns : length of string *including terminator*
- * or zero on exception, or n + 1 if too long
- */
-ENTRY(__arch_strnlen_user)
- save_lr
- mov r2, r0
-1:
-USER( ldrbt r3, [r0], #1)
- teq r3, #0
- beq 2f
- subs r1, r1, #1
- bne 1b
- add r0, r0, #1
-2: sub r0, r0, r2
- restore_pc
-
- .section .fixup,"ax"
- .align 0
-9001: mov r0, #0
- restore_pc
- .previous
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index fa923f026f1..302f20cd242 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -22,4 +22,5 @@ ENTRY(strrchr)
teq r2, #0
bne 1b
mov r0, r3
- RETINSTR(mov,pc,lr)
+ mov pc, lr
+ENDPROC(strrchr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index b25dcd2be53..4becdc3a59c 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -12,7 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_change_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_change_bit_le)
- testop eor, strb
+testop _test_and_change_bit, eor, str
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 2dcc4b16b68..918841dcce7 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -12,7 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_clear_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_clear_bit_le)
- testop bicne, strneb
+testop _test_and_clear_bit, bicne, strne
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 9011c969761..8d1b2fe9e48 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -12,7 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_set_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_set_bit_le)
- testop orreq, streqb
+testop _test_and_set_bit, orreq, streq
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 6f1b5b49fe4..e50520904b7 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -14,12 +14,13 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
.text
#define PAGE_SHIFT 12
-/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n)
+/* Prototype: int __copy_to_user(void *to, const char *from, size_t n)
* Purpose : copy a block to user memory from kernel memory
* Params : to - user memory
* : from - kernel memory
@@ -27,42 +28,42 @@
* Returns : Number of bytes NOT copied.
*/
-.c2u_dest_not_aligned:
+.Lc2u_dest_not_aligned:
rsb ip, ip, #4
cmp ip, #2
ldrb r3, [r1], #1
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #1
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
sub r2, r2, ip
- b .c2u_dest_aligned
+ b .Lc2u_dest_aligned
-ENTRY(__arch_copy_to_user)
+ENTRY(__copy_to_user)
stmfd sp!, {r2, r4 - r7, lr}
cmp r2, #4
- blt .c2u_not_enough
+ blt .Lc2u_not_enough
ands ip, r0, #3
- bne .c2u_dest_not_aligned
-.c2u_dest_aligned:
+ bne .Lc2u_dest_not_aligned
+.Lc2u_dest_aligned:
ands ip, r1, #3
- bne .c2u_src_not_aligned
+ bne .Lc2u_src_not_aligned
/*
* Seeing as there has to be at least 8 bytes to copy, we can
* copy one word, and force a user-mode page fault...
*/
-.c2u_0fupi: subs r2, r2, #4
+.Lc2u_0fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .c2u_0nowords
+ bmi .Lc2u_0nowords
ldr r3, [r1], #4
-USER( strt r3, [r0], #4) @ May fault
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .c2u_0fupi
+ beq .Lc2u_0fupi
/*
* ip = max no. of bytes to copy before needing another "strt" insn
*/
@@ -70,16 +71,16 @@ USER( strt r3, [r0], #4) @ May fault
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #32
- blt .c2u_0rem8lp
+ blt .Lc2u_0rem8lp
-.c2u_0cpy8lp: ldmia r1!, {r3 - r6}
+.Lc2u_0cpy8lp: ldmia r1!, {r3 - r6}
stmia r0!, {r3 - r6} @ Shouldnt fault
ldmia r1!, {r3 - r6}
subs ip, ip, #32
stmia r0!, {r3 - r6} @ Shouldnt fault
- bpl .c2u_0cpy8lp
+ bpl .Lc2u_0cpy8lp
-.c2u_0rem8lp: cmn ip, #16
+.Lc2u_0rem8lp: cmn ip, #16
ldmgeia r1!, {r3 - r6}
stmgeia r0!, {r3 - r6} @ Shouldnt fault
tst ip, #8
@@ -87,244 +88,246 @@ USER( strt r3, [r0], #4) @ May fault
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
ldrne r3, [r1], #4
- strnet r3, [r0], #4 @ Shouldnt fault
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
- beq .c2u_0fupi
-.c2u_0nowords: teq ip, #0
- beq .c2u_finished
-.c2u_nowords: cmp ip, #2
+ beq .Lc2u_0fupi
+.Lc2u_0nowords: teq ip, #0
+ beq .Lc2u_finished
+.Lc2u_nowords: cmp ip, #2
ldrb r3, [r1], #1
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #1
-USER( strgtbt r3, [r0], #1) @ May fault
- b .c2u_finished
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
+ b .Lc2u_finished
-.c2u_not_enough:
+.Lc2u_not_enough:
movs ip, r2
- bne .c2u_nowords
-.c2u_finished: mov r0, #0
- LOADREGS(fd,sp!,{r2, r4 - r7, pc})
+ bne .Lc2u_nowords
+.Lc2u_finished: mov r0, #0
+ ldmfd sp!, {r2, r4 - r7, pc}
-.c2u_src_not_aligned:
+.Lc2u_src_not_aligned:
bic r1, r1, #3
ldr r7, [r1], #4
cmp ip, #2
- bgt .c2u_3fupi
- beq .c2u_2fupi
-.c2u_1fupi: subs r2, r2, #4
+ bgt .Lc2u_3fupi
+ beq .Lc2u_2fupi
+.Lc2u_1fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .c2u_1nowords
- mov r3, r7, pull #8
+ bmi .Lc2u_1nowords
+ mov r3, r7, lspull #8
ldr r7, [r1], #4
- orr r3, r3, r7, push #24
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #24
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .c2u_1fupi
+ beq .Lc2u_1fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .c2u_1rem8lp
+ blt .Lc2u_1rem8lp
-.c2u_1cpy8lp: mov r3, r7, pull #8
+.Lc2u_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6} @ Shouldnt fault
- bpl .c2u_1cpy8lp
+ bpl .Lc2u_1cpy8lp
-.c2u_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+.Lc2u_1rem8lp: tst ip, #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #24
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #24
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
- beq .c2u_1fupi
-.c2u_1nowords: mov r3, r7, get_byte_1
+ beq .Lc2u_1fupi
+.Lc2u_1nowords: mov r3, r7, get_byte_1
teq ip, #0
- beq .c2u_finished
+ beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
movge r3, r7, get_byte_2
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
movgt r3, r7, get_byte_3
-USER( strgtbt r3, [r0], #1) @ May fault
- b .c2u_finished
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
+ b .Lc2u_finished
-.c2u_2fupi: subs r2, r2, #4
+.Lc2u_2fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .c2u_2nowords
- mov r3, r7, pull #16
+ bmi .Lc2u_2nowords
+ mov r3, r7, lspull #16
ldr r7, [r1], #4
- orr r3, r3, r7, push #16
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #16
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .c2u_2fupi
+ beq .Lc2u_2fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .c2u_2rem8lp
+ blt .Lc2u_2rem8lp
-.c2u_2cpy8lp: mov r3, r7, pull #16
+.Lc2u_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6} @ Shouldnt fault
- bpl .c2u_2cpy8lp
+ bpl .Lc2u_2cpy8lp
-.c2u_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+.Lc2u_2rem8lp: tst ip, #8
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #16
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #16
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
- beq .c2u_2fupi
-.c2u_2nowords: mov r3, r7, get_byte_2
+ beq .Lc2u_2fupi
+.Lc2u_2nowords: mov r3, r7, get_byte_2
teq ip, #0
- beq .c2u_finished
+ beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
movge r3, r7, get_byte_3
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #0
-USER( strgtbt r3, [r0], #1) @ May fault
- b .c2u_finished
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
+ b .Lc2u_finished
-.c2u_3fupi: subs r2, r2, #4
+.Lc2u_3fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .c2u_3nowords
- mov r3, r7, pull #24
+ bmi .Lc2u_3nowords
+ mov r3, r7, lspull #24
ldr r7, [r1], #4
- orr r3, r3, r7, push #8
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #8
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .c2u_3fupi
+ beq .Lc2u_3fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .c2u_3rem8lp
+ blt .Lc2u_3rem8lp
-.c2u_3cpy8lp: mov r3, r7, pull #24
+.Lc2u_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6} @ Shouldnt fault
- bpl .c2u_3cpy8lp
+ bpl .Lc2u_3cpy8lp
-.c2u_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+.Lc2u_3rem8lp: tst ip, #8
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #8
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #8
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
- beq .c2u_3fupi
-.c2u_3nowords: mov r3, r7, get_byte_3
+ beq .Lc2u_3fupi
+.Lc2u_3nowords: mov r3, r7, get_byte_3
teq ip, #0
- beq .c2u_finished
+ beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #0
-USER( strgtbt r3, [r0], #1) @ May fault
- b .c2u_finished
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
+ b .Lc2u_finished
+ENDPROC(__copy_to_user)
- .section .fixup,"ax"
+ .pushsection .fixup,"ax"
.align 0
-9001: LOADREGS(fd,sp!, {r0, r4 - r7, pc})
- .previous
+9001: ldmfd sp!, {r0, r4 - r7, pc}
+ .popsection
-/* Prototype: unsigned long __arch_copy_from_user(void *to,const void *from,unsigned long n);
+/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n);
* Purpose : copy a block from user memory to kernel memory
* Params : to - kernel memory
* : from - user memory
* : n - number of bytes to copy
* Returns : Number of bytes NOT copied.
*/
-.cfu_dest_not_aligned:
+.Lcfu_dest_not_aligned:
rsb ip, ip, #4
cmp ip, #2
-USER( ldrbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrb) r3, [r1], #1) @ May fault
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
sub r2, r2, ip
- b .cfu_dest_aligned
+ b .Lcfu_dest_aligned
-ENTRY(__arch_copy_from_user)
+ENTRY(__copy_from_user)
stmfd sp!, {r0, r2, r4 - r7, lr}
cmp r2, #4
- blt .cfu_not_enough
+ blt .Lcfu_not_enough
ands ip, r0, #3
- bne .cfu_dest_not_aligned
-.cfu_dest_aligned:
+ bne .Lcfu_dest_not_aligned
+.Lcfu_dest_aligned:
ands ip, r1, #3
- bne .cfu_src_not_aligned
+ bne .Lcfu_src_not_aligned
+
/*
* Seeing as there has to be at least 8 bytes to copy, we can
* copy one word, and force a user-mode page fault...
*/
-.cfu_0fupi: subs r2, r2, #4
+.Lcfu_0fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .cfu_0nowords
-USER( ldrt r3, [r1], #4)
+ bmi .Lcfu_0nowords
+USER( TUSER( ldr) r3, [r1], #4)
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .cfu_0fupi
+ beq .Lcfu_0fupi
/*
* ip = max no. of bytes to copy before needing another "strt" insn
*/
@@ -332,216 +335,218 @@ USER( ldrt r3, [r1], #4)
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #32
- blt .cfu_0rem8lp
+ blt .Lcfu_0rem8lp
-.cfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault
+.Lcfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault
stmia r0!, {r3 - r6}
ldmia r1!, {r3 - r6} @ Shouldnt fault
subs ip, ip, #32
stmia r0!, {r3 - r6}
- bpl .cfu_0cpy8lp
+ bpl .Lcfu_0cpy8lp
-.cfu_0rem8lp: cmn ip, #16
+.Lcfu_0rem8lp: cmn ip, #16
ldmgeia r1!, {r3 - r6} @ Shouldnt fault
stmgeia r0!, {r3 - r6}
tst ip, #8
ldmneia r1!, {r3 - r4} @ Shouldnt fault
stmneia r0!, {r3 - r4}
tst ip, #4
- ldrnet r3, [r1], #4 @ Shouldnt fault
+ TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault
strne r3, [r0], #4
ands ip, ip, #3
- beq .cfu_0fupi
-.cfu_0nowords: teq ip, #0
- beq .cfu_finished
-.cfu_nowords: cmp ip, #2
-USER( ldrbt r3, [r1], #1) @ May fault
+ beq .Lcfu_0fupi
+.Lcfu_0nowords: teq ip, #0
+ beq .Lcfu_finished
+.Lcfu_nowords: cmp ip, #2
+USER( TUSER( ldrb) r3, [r1], #1) @ May fault
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
- b .cfu_finished
+ b .Lcfu_finished
-.cfu_not_enough:
+.Lcfu_not_enough:
movs ip, r2
- bne .cfu_nowords
-.cfu_finished: mov r0, #0
+ bne .Lcfu_nowords
+.Lcfu_finished: mov r0, #0
add sp, sp, #8
- LOADREGS(fd,sp!,{r4 - r7, pc})
+ ldmfd sp!, {r4 - r7, pc}
-.cfu_src_not_aligned:
+.Lcfu_src_not_aligned:
bic r1, r1, #3
-USER( ldrt r7, [r1], #4) @ May fault
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
cmp ip, #2
- bgt .cfu_3fupi
- beq .cfu_2fupi
-.cfu_1fupi: subs r2, r2, #4
+ bgt .Lcfu_3fupi
+ beq .Lcfu_2fupi
+.Lcfu_1fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .cfu_1nowords
- mov r3, r7, pull #8
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #24
+ bmi .Lcfu_1nowords
+ mov r3, r7, lspull #8
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #24
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .cfu_1fupi
+ beq .Lcfu_1fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .cfu_1rem8lp
+ blt .Lcfu_1rem8lp
-.cfu_1cpy8lp: mov r3, r7, pull #8
+.Lcfu_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6}
- bpl .cfu_1cpy8lp
+ bpl .Lcfu_1cpy8lp
-.cfu_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+.Lcfu_1rem8lp: tst ip, #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #8
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #24
+ movne r3, r7, lspull #8
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #24
strne r3, [r0], #4
ands ip, ip, #3
- beq .cfu_1fupi
-.cfu_1nowords: mov r3, r7, get_byte_1
+ beq .Lcfu_1fupi
+.Lcfu_1nowords: mov r3, r7, get_byte_1
teq ip, #0
- beq .cfu_finished
+ beq .Lcfu_finished
cmp ip, #2
strb r3, [r0], #1
movge r3, r7, get_byte_2
strgeb r3, [r0], #1
movgt r3, r7, get_byte_3
strgtb r3, [r0], #1
- b .cfu_finished
+ b .Lcfu_finished
-.cfu_2fupi: subs r2, r2, #4
+.Lcfu_2fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .cfu_2nowords
- mov r3, r7, pull #16
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #16
+ bmi .Lcfu_2nowords
+ mov r3, r7, lspull #16
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #16
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .cfu_2fupi
+ beq .Lcfu_2fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .cfu_2rem8lp
+ blt .Lcfu_2rem8lp
+
-.cfu_2cpy8lp: mov r3, r7, pull #16
+.Lcfu_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6}
- bpl .cfu_2cpy8lp
+ bpl .Lcfu_2cpy8lp
-.cfu_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+.Lcfu_2rem8lp: tst ip, #8
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #16
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #16
+ movne r3, r7, lspull #16
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #16
strne r3, [r0], #4
ands ip, ip, #3
- beq .cfu_2fupi
-.cfu_2nowords: mov r3, r7, get_byte_2
+ beq .Lcfu_2fupi
+.Lcfu_2nowords: mov r3, r7, get_byte_2
teq ip, #0
- beq .cfu_finished
+ beq .Lcfu_finished
cmp ip, #2
strb r3, [r0], #1
movge r3, r7, get_byte_3
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #0) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault
strgtb r3, [r0], #1
- b .cfu_finished
+ b .Lcfu_finished
-.cfu_3fupi: subs r2, r2, #4
+.Lcfu_3fupi: subs r2, r2, #4
addmi ip, r2, #4
- bmi .cfu_3nowords
- mov r3, r7, pull #24
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #8
+ bmi .Lcfu_3nowords
+ mov r3, r7, lspull #24
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #8
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
- beq .cfu_3fupi
+ beq .Lcfu_3fupi
cmp r2, ip
movlt ip, r2
sub r2, r2, ip
subs ip, ip, #16
- blt .cfu_3rem8lp
+ blt .Lcfu_3rem8lp
-.cfu_3cpy8lp: mov r3, r7, pull #24
+.Lcfu_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7} @ Shouldnt fault
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6}
subs ip, ip, #16
- bpl .cfu_3cpy8lp
+ bpl .Lcfu_3cpy8lp
-.cfu_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+.Lcfu_3rem8lp: tst ip, #8
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #24
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #8
+ movne r3, r7, lspull #24
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #8
strne r3, [r0], #4
ands ip, ip, #3
- beq .cfu_3fupi
-.cfu_3nowords: mov r3, r7, get_byte_3
+ beq .Lcfu_3fupi
+.Lcfu_3nowords: mov r3, r7, get_byte_3
teq ip, #0
- beq .cfu_finished
+ beq .Lcfu_finished
cmp ip, #2
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
- b .cfu_finished
+ b .Lcfu_finished
+ENDPROC(__copy_from_user)
- .section .fixup,"ax"
+ .pushsection .fixup,"ax"
.align 0
/*
* We took an exception. r0 contains a pointer to
@@ -554,6 +559,6 @@ USER( ldrgtbt r3, [r1], #1) @ May fault
movne r1, r4
blne __memzero
mov r0, r4
- LOADREGS(fd,sp!, {r4 - r7, pc})
- .previous
+ ldmfd sp!, {r4 - r7, pc}
+ .popsection
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
new file mode 100644
index 00000000000..3e58d710013
--- /dev/null
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -0,0 +1,270 @@
+/*
+ * linux/arch/arm/lib/uaccess_with_memcpy.c
+ *
+ * Written by: Lennert Buytenhek and Nicolas Pitre
+ * Copyright (C) 2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h> /* for in_atomic() */
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <asm/current.h>
+#include <asm/page.h>
+
+static int
+pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
+{
+ unsigned long addr = (unsigned long)_addr;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ pud_t *pud;
+ spinlock_t *ptl;
+
+ pgd = pgd_offset(current->mm, addr);
+ if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
+ return 0;
+
+ pud = pud_offset(pgd, addr);
+ if (unlikely(pud_none(*pud) || pud_bad(*pud)))
+ return 0;
+
+ pmd = pmd_offset(pud, addr);
+ if (unlikely(pmd_none(*pmd)))
+ return 0;
+
+ /*
+ * A pmd can be bad if it refers to a HugeTLB or THP page.
+ *
+ * Both THP and HugeTLB pages have the same pmd layout
+ * and should not be manipulated by the pte functions.
+ *
+ * Lock the page table for the destination and check
+ * to see that it's still huge and whether or not we will
+ * need to fault on write, or if we have a splitting THP.
+ */
+ if (unlikely(pmd_thp_or_huge(*pmd))) {
+ ptl = &current->mm->page_table_lock;
+ spin_lock(ptl);
+ if (unlikely(!pmd_thp_or_huge(*pmd)
+ || pmd_hugewillfault(*pmd)
+ || pmd_trans_splitting(*pmd))) {
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ *ptep = NULL;
+ *ptlp = ptl;
+ return 1;
+ }
+
+ if (unlikely(pmd_bad(*pmd)))
+ return 0;
+
+ pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
+ if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
+ !pte_write(*pte) || !pte_dirty(*pte))) {
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+ }
+
+ *ptep = pte;
+ *ptlp = ptl;
+
+ return 1;
+}
+
+static unsigned long noinline
+__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
+{
+ int atomic;
+
+ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+ memcpy((void *)to, from, n);
+ return 0;
+ }
+
+ /* the mmap semaphore is taken only if not in an atomic context */
+ atomic = in_atomic();
+
+ if (!atomic)
+ down_read(&current->mm->mmap_sem);
+ while (n) {
+ pte_t *pte;
+ spinlock_t *ptl;
+ int tocopy;
+
+ while (!pin_page_for_write(to, &pte, &ptl)) {
+ if (!atomic)
+ up_read(&current->mm->mmap_sem);
+ if (__put_user(0, (char __user *)to))
+ goto out;
+ if (!atomic)
+ down_read(&current->mm->mmap_sem);
+ }
+
+ tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1;
+ if (tocopy > n)
+ tocopy = n;
+
+ memcpy((void *)to, from, tocopy);
+ to += tocopy;
+ from += tocopy;
+ n -= tocopy;
+
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
+ }
+ if (!atomic)
+ up_read(&current->mm->mmap_sem);
+
+out:
+ return n;
+}
+
+unsigned long
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ /*
+ * This test is stubbed out of the main function above to keep
+ * the overhead for small copies low by avoiding a large
+ * register dump on the stack just to reload them right away.
+ * With frame pointer disabled, tail call optimization kicks in
+ * as well making this test almost invisible.
+ */
+ if (n < 64)
+ return __copy_to_user_std(to, from, n);
+ return __copy_to_user_memcpy(to, from, n);
+}
+
+static unsigned long noinline
+__clear_user_memset(void __user *addr, unsigned long n)
+{
+ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
+ memset((void *)addr, 0, n);
+ return 0;
+ }
+
+ down_read(&current->mm->mmap_sem);
+ while (n) {
+ pte_t *pte;
+ spinlock_t *ptl;
+ int tocopy;
+
+ while (!pin_page_for_write(addr, &pte, &ptl)) {
+ up_read(&current->mm->mmap_sem);
+ if (__put_user(0, (char __user *)addr))
+ goto out;
+ down_read(&current->mm->mmap_sem);
+ }
+
+ tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1;
+ if (tocopy > n)
+ tocopy = n;
+
+ memset((void *)addr, 0, tocopy);
+ addr += tocopy;
+ n -= tocopy;
+
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
+ }
+ up_read(&current->mm->mmap_sem);
+
+out:
+ return n;
+}
+
+unsigned long __clear_user(void __user *addr, unsigned long n)
+{
+ /* See rational for this in __copy_to_user() above. */
+ if (n < 64)
+ return __clear_user_std(addr, n);
+ return __clear_user_memset(addr, n);
+}
+
+#if 0
+
+/*
+ * This code is disabled by default, but kept around in case the chosen
+ * thresholds need to be revalidated. Some overhead (small but still)
+ * would be implied by a runtime determined variable threshold, and
+ * so far the measurement on concerned targets didn't show a worthwhile
+ * variation.
+ *
+ * Note that a fairly precise sched_clock() implementation is needed
+ * for results to make some sense.
+ */
+
+#include <linux/vmalloc.h>
+
+static int __init test_size_treshold(void)
+{
+ struct page *src_page, *dst_page;
+ void *user_ptr, *kernel_ptr;
+ unsigned long long t0, t1, t2;
+ int size, ret;
+
+ ret = -ENOMEM;
+ src_page = alloc_page(GFP_KERNEL);
+ if (!src_page)
+ goto no_src;
+ dst_page = alloc_page(GFP_KERNEL);
+ if (!dst_page)
+ goto no_dst;
+ kernel_ptr = page_address(src_page);
+ user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
+ if (!user_ptr)
+ goto no_vmap;
+
+ /* warm up the src page dcache */
+ ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE);
+
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
+ t0 = sched_clock();
+ ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size);
+ t1 = sched_clock();
+ ret |= __copy_to_user_std(user_ptr, kernel_ptr, size);
+ t2 = sched_clock();
+ printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+ }
+
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
+ t0 = sched_clock();
+ ret |= __clear_user_memset(user_ptr, size);
+ t1 = sched_clock();
+ ret |= __clear_user_std(user_ptr, size);
+ t2 = sched_clock();
+ printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+ }
+
+ if (ret)
+ ret = -EFAULT;
+
+ vunmap(user_ptr);
+no_vmap:
+ put_page(dst_page);
+no_dst:
+ put_page(src_page);
+no_src:
+ return ret;
+}
+
+subsys_initcall(test_size_treshold);
+
+#endif
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index 112630f93e5..f0df6a91db0 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -33,3 +33,20 @@ ENTRY(__ucmpdi2)
movhi r0, #2
mov pc, lr
+ENDPROC(__ucmpdi2)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_ulcmp)
+
+ cmp xh, yh
+ cmpeq xl, yl
+ movlo r0, #-1
+ moveq r0, #0
+ movhi r0, #1
+ mov pc, lr
+
+ENDPROC(__aeabi_ulcmp)
+
+#endif
+
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
new file mode 100644
index 00000000000..2c40aeab3ea
--- /dev/null
+++ b/arch/arm/lib/xor-neon.c
@@ -0,0 +1,46 @@
+/*
+ * linux/arch/arm/lib/xor-neon.c
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+
+#ifndef __ARM_NEON__
+#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
+#endif
+
+/*
+ * Pull in the reference implementations while instructing GCC (through
+ * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
+ * NEON instructions.
+ */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC optimize "tree-vectorize"
+#else
+/*
+ * While older versions of GCC do not generate incorrect code, they fail to
+ * recognize the parallel nature of these functions, and emit plain ARM code,
+ * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
+ */
+#warning This code requires at least version 4.6 of GCC
+#endif
+
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#include <asm-generic/xor.h>
+
+struct xor_block_template const xor_block_neon_inner = {
+ .name = "__inner_neon__",
+ .do_2 = xor_8regs_2,
+ .do_3 = xor_8regs_3,
+ .do_4 = xor_8regs_4,
+ .do_5 = xor_8regs_5,
+};
+EXPORT_SYMBOL(xor_block_neon_inner);