aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/Makefile15
-rw-r--r--arch/arm/lib/backtrace.S26
-rw-r--r--arch/arm/lib/bitops.h84
-rw-r--r--arch/arm/lib/bswapsdi2.S36
-rw-r--r--arch/arm/lib/call_with_stack.S44
-rw-r--r--arch/arm/lib/changebit.S10
-rw-r--r--arch/arm/lib/clearbit.S11
-rw-r--r--arch/arm/lib/copy_template.S36
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S96
-rw-r--r--arch/arm/lib/delay-loop.S (renamed from arch/arm/lib/delay.S)25
-rw-r--r--arch/arm/lib/delay.c93
-rw-r--r--arch/arm/lib/div64.S8
-rw-r--r--arch/arm/lib/ecard.S1
-rw-r--r--arch/arm/lib/findbit.S6
-rw-r--r--arch/arm/lib/getuser.S28
-rw-r--r--arch/arm/lib/io-acorn.S3
-rw-r--r--arch/arm/lib/io-readsl.S12
-rw-r--r--arch/arm/lib/io-readsw-armv3.S1
-rw-r--r--arch/arm/lib/io-shark.c13
-rw-r--r--arch/arm/lib/io-writesl.S12
-rw-r--r--arch/arm/lib/io-writesw-armv3.S1
-rw-r--r--arch/arm/lib/lib1funcs.S25
-rw-r--r--arch/arm/lib/memmove.S36
-rw-r--r--arch/arm/lib/memset.S100
-rw-r--r--arch/arm/lib/putuser.S35
-rw-r--r--arch/arm/lib/setbit.S11
-rw-r--r--arch/arm/lib/sha1.S211
-rw-r--r--arch/arm/lib/strncpy_from_user.S43
-rw-r--r--arch/arm/lib/strnlen_user.S40
-rw-r--r--arch/arm/lib/testchangebit.S7
-rw-r--r--arch/arm/lib/testclearbit.S7
-rw-r--r--arch/arm/lib/testsetbit.S7
-rw-r--r--arch/arm/lib/uaccess.S275
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c49
-rw-r--r--arch/arm/lib/xor-neon.c46
35 files changed, 720 insertions, 733 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 59ff42ddf0a..0573faab96a 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,14 +6,14 @@
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
- delay.o findbit.o memchr.o memcpy.o \
+ delay.o delay-loop.o findbit.o memchr.o memcpy.o \
memmove.o memset.o memzero.o setbit.o \
- strncpy_from_user.o strnlen_user.o \
strchr.o strrchr.o \
testchangebit.o testclearbit.o testsetbit.o \
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
- ucmpdi2.o lib1funcs.o div64.o sha1.o \
- io-readsb.o io-writesb.o io-readsl.o io-writesl.o
+ ucmpdi2.o lib1funcs.o div64.o \
+ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
+ call_with_stack.o bswapsdi2.o
mmu-y := clear_user.o copy_page.o getuser.o putuser.o
@@ -41,7 +41,12 @@ else
endif
lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o
-lib-$(CONFIG_ARCH_SHARK) += io-shark.o
$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
+
+ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
+ NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon
+ CFLAGS_xor-neon.o += $(NEON_FLAGS)
+ obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
+endif
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index a673297b0cf..4102be617fc 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -22,15 +22,10 @@
#define mask r7
#define offset r8
-ENTRY(__backtrace)
- mov r1, #0x10
- mov r0, fp
-
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
mov pc, lr
-ENDPROC(__backtrace)
ENDPROC(c_backtrace)
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
@@ -85,14 +80,14 @@ for_each_frame: tst frame, mask @ Check for address exceptions
ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
ldr r3, .Ldsi+4
- teq r3, r1, lsr #10
+ teq r3, r1, lsr #11
ldreq r0, [frame, #-8] @ get sp
subeq r0, r0, #4 @ point at the last arg
bleq .Ldumpstm @ dump saved registers
1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc}
ldr r3, .Ldsi @ instruction exists,
- teq r3, r1, lsr #10
+ teq r3, r1, lsr #11
subeq r0, frame, #16
bleq .Ldumpstm @ dump saved registers
@@ -107,7 +102,6 @@ for_each_frame: tst frame, mask @ Check for address exceptions
mov r1, frame
bl printk
no_frame: ldmfd sp!, {r4 - r8, pc}
-ENDPROC(__backtrace)
ENDPROC(c_backtrace)
.pushsection __ex_table,"a"
@@ -134,11 +128,11 @@ ENDPROC(c_backtrace)
beq 2f
add r7, r7, #1
teq r7, #6
- moveq r7, #1
- moveq r1, #'\n'
- movne r1, #' '
- ldr r3, [stack], #-4
- mov r2, reg
+ moveq r7, #0
+ adr r3, .Lcr
+ addne r3, r3, #1 @ skip newline
+ ldr r2, [stack], #-4
+ mov r1, reg
adr r0, .Lfp
bl printk
2: subs reg, reg, #1
@@ -148,11 +142,11 @@ ENDPROC(c_backtrace)
blne printk
ldmfd sp!, {instr, reg, stack, r7, pc}
-.Lfp: .asciz "%cr%d:%08x"
+.Lfp: .asciz " r%d:%08x%s"
.Lcr: .asciz "\n"
.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n"
.align
-.Ldsi: .word 0xe92dd800 >> 10 @ stmfd sp!, {... fp, ip, lr, pc}
- .word 0xe92d0000 >> 10 @ stmfd sp!, {}
+.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc}
+ .word 0xe92d0000 >> 11 @ stmfd sp!, {}
#endif
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index d42252918bf..9f12ed1eea8 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,46 +1,78 @@
+#include <asm/unwind.h>
-#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K)
- .macro bitop, instr
+#if __LINUX_ARM_ARCH__ >= 6
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
mov r2, #1
- and r3, r0, #7 @ Get bit offset
- add r1, r1, r0, lsr #3 @ Get byte offset
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
mov r3, r2, lsl r3
-1: ldrexb r2, [r1]
+1: ldrex r2, [r1]
\instr r2, r2, r3
- strexb r0, r2, [r1]
+ strex r0, r2, [r1]
cmp r0, #0
bne 1b
- mov pc, lr
+ bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
- .macro testop, instr, store
- and r3, r0, #7 @ Get bit offset
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
mov r2, #1
- add r1, r1, r0, lsr #3 @ Get byte offset
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
smp_dmb
-1: ldrexb r2, [r1]
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
+1: ldrex r2, [r1]
ands r0, r2, r3 @ save old value of bit
- \instr r2, r2, r3 @ toggle bit
- strexb ip, r2, [r1]
+ \instr r2, r2, r3 @ toggle bit
+ strex ip, r2, [r1]
cmp ip, #0
bne 1b
smp_dmb
cmp r0, #0
movne r0, #1
-2: mov pc, lr
+2: bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
#else
- .macro bitop, instr
- and r2, r0, #7
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r2, r0, #31
+ mov r0, r0, lsr #5
mov r3, #1
mov r3, r3, lsl r2
save_and_disable_irqs ip
- ldrb r2, [r1, r0, lsr #3]
+ ldr r2, [r1, r0, lsl #2]
\instr r2, r2, r3
- strb r2, [r1, r0, lsr #3]
+ str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
/**
@@ -51,17 +83,23 @@
* Note: we can trivially conditionalise the store instruction
* to avoid dirtying the data cache.
*/
- .macro testop, instr, store
- add r1, r1, r0, lsr #3
- and r3, r0, #7
- mov r0, #1
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r3, r0, #31
+ mov r0, r0, lsr #5
save_and_disable_irqs ip
- ldrb r2, [r1]
+ ldr r2, [r1, r0, lsl #2]!
+ mov r0, #1
tst r2, r0, lsl r3
\instr r2, r2, r0, lsl r3
\store r2, [r1]
moveq r0, #0
restore_irqs ip
mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
.endm
#endif
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
new file mode 100644
index 00000000000..9fcdd154eff
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.S
@@ -0,0 +1,36 @@
+#include <linux/linkage.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ENTRY(__bswapsi2)
+ rev r0, r0
+ bx lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ rev r3, r0
+ rev r0, r1
+ mov r1, r3
+ bx lr
+ENDPROC(__bswapdi2)
+#else
+ENTRY(__bswapsi2)
+ eor r3, r0, r0, ror #16
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ eor r0, r3, r0, ror #8
+ mov pc, lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ mov ip, r1
+ eor r3, ip, ip, ror #16
+ eor r1, r0, r0, ror #16
+ mov r1, r1, lsr #8
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ bic r1, r1, #0xff00
+ eor r1, r1, r0, ror #8
+ eor r0, r3, ip, ror #8
+ mov pc, lr
+ENDPROC(__bswapdi2)
+#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
new file mode 100644
index 00000000000..916c80f13ae
--- /dev/null
+++ b/arch/arm/lib/call_with_stack.S
@@ -0,0 +1,44 @@
+/*
+ * arch/arm/lib/call_with_stack.S
+ *
+ * Copyright (C) 2011 ARM Ltd.
+ * Written by Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * void call_with_stack(void (*fn)(void *), void *arg, void *sp)
+ *
+ * Change the stack to that pointed at by sp, then invoke fn(arg) with
+ * the new stack.
+ */
+ENTRY(call_with_stack)
+ str sp, [r2, #-4]!
+ str lr, [r2, #-4]!
+
+ mov sp, r2
+ mov r2, r0
+ mov r0, r1
+
+ adr lr, BSYM(1f)
+ mov pc, r2
+
+1: ldr lr, [sp]
+ ldr sp, [sp, #4]
+ mov pc, lr
+ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
index 80f3115cbee..f4027862172 100644
--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -12,12 +12,4 @@
#include "bitops.h"
.text
-/* Purpose : Function to change a bit
- * Prototype: int change_bit(int bit, void *addr)
- */
-ENTRY(_change_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_change_bit_le)
- bitop eor
-ENDPROC(_change_bit_be)
-ENDPROC(_change_bit_le)
+bitop _change_bit, eor
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
index 1a63e43a1df..f6b75fb64d3 100644
--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -12,13 +12,4 @@
#include "bitops.h"
.text
-/*
- * Purpose : Function to clear a bit
- * Prototype: int clear_bit(int bit, void *addr)
- */
-ENTRY(_clear_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_clear_bit_le)
- bitop bic
-ENDPROC(_clear_bit_be)
-ENDPROC(_clear_bit_le)
+bitop _clear_bit, bic
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb00..3bc8eb811a7 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
12: PLD( pld [r1, #124] )
13: ldr4w r1, r4, r5, r6, r7, abort=19f
- mov r3, lr, pull #\pull
+ mov r3, lr, lspull #\pull
subs r2, r2, #32
ldr4w r1, r8, r9, ip, lr, abort=19f
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, lspush #\push
+ mov r4, r4, lspull #\pull
+ orr r4, r4, r5, lspush #\push
+ mov r5, r5, lspull #\pull
+ orr r5, r5, r6, lspush #\push
+ mov r6, r6, lspull #\pull
+ orr r6, r6, r7, lspush #\push
+ mov r7, r7, lspull #\pull
+ orr r7, r7, r8, lspush #\push
+ mov r8, r8, lspull #\pull
+ orr r8, r8, r9, lspush #\push
+ mov r9, r9, lspull #\pull
+ orr r9, r9, ip, lspush #\push
+ mov ip, ip, lspull #\pull
+ orr ip, ip, lr, lspush #\push
str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
bge 12b
PLD( cmn r2, #96 )
@@ -225,10 +225,10 @@
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, lspull #\pull
ldr1w r1, lr, abort=21f
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, lspush #\push
str1w r0, r3, abort=21f
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a0..d6e742d2400 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
tst len, #2
mov r5, r4, get_byte_0
beq .Lexit
- adcs sum, sum, r4, push #16
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
cmp ip, #2
beq .Lsrc2_aligned
bhi .Lsrc3_aligned
- mov r4, r5, pull #8 @ C = 0
+ mov r4, r5, lspull #8 @ C = 0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
- mov r7, r7, pull #8
- orr r7, r7, r8, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
+ mov r7, r7, lspull #8
+ orr r7, r7, r8, lspush #24
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #8
+ mov r4, r8, lspull #8
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -196,50 +196,50 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #8
+ mov r4, r6, lspull #8
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #24
+ orr r4, r4, r5, lspush #24
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #8
+ mov r4, r5, lspull #8
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
tst len, #2
beq .Lexit
- adcs sum, sum, r4, push #16
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
mov r5, r4, get_byte_2
b .Lexit
-.Lsrc2_aligned: mov r4, r5, pull #16
+.Lsrc2_aligned: mov r4, r5, lspull #16
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
- mov r7, r7, pull #16
- orr r7, r7, r8, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
+ mov r7, r7, lspull #16
+ orr r7, r7, r8, lspush #16
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #16
+ mov r4, r8, lspull #16
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -248,20 +248,20 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #16
+ mov r4, r6, lspull #16
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #16
+ orr r4, r4, r5, lspush #16
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #16
+ mov r4, r5, lspull #16
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
load1b r5
b .Lexit
-.Lsrc3_aligned: mov r4, r5, pull #24
+.Lsrc3_aligned: mov r4, r5, lspull #24
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
- mov r7, r7, pull #24
- orr r7, r7, r8, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
+ mov r7, r7, lspull #24
+ orr r7, r7, r8, lspush #8
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #24
+ mov r4, r8, lspull #24
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -302,20 +302,20 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #24
+ mov r4, r6, lspull #24
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #8
+ orr r4, r4, r5, lspush #8
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #24
+ mov r4, r5, lspull #24
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
load1l r4
mov r5, r4, get_byte_0
strb r5, [dst], #1
- adcs sum, sum, r4, push #24
+ adcs sum, sum, r4, lspush #24
mov r5, r4, get_byte_1
b .Lexit
FN_EXIT
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay-loop.S
index 8d6a8762ab8..bc1033b897b 100644
--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay-loop.S
@@ -9,11 +9,11 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <asm/param.h>
+#include <asm/delay.h>
.text
.LC0: .word loops_per_jiffy
-.LC1: .word (2199023*HZ)>>11
+.LC1: .word UDELAY_MULT
/*
* r0 <= 2000
@@ -21,26 +21,29 @@
* HZ <= 1000
*/
-ENTRY(__udelay)
+ENTRY(__loop_udelay)
ldr r2, .LC1
mul r0, r2, r0
-ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
+ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
+ mov r1, #-1
ldr r2, .LC0
ldr r2, [r2] @ max = 0x01ffffff
+ add r0, r0, r1, lsr #32-14
mov r0, r0, lsr #14 @ max = 0x0001ffff
+ add r2, r2, r1, lsr #32-10
mov r2, r2, lsr #10 @ max = 0x00007fff
mul r0, r2, r0 @ max = 2^32-1
+ add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
moveq pc, lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
- *
- * Oh, if only we had a cycle counter...
*/
+ .align 3
@ Delay routine
-ENTRY(__delay)
+ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
movls pc, lr
@@ -58,8 +61,8 @@ ENTRY(__delay)
movls pc, lr
subs r0, r0, #1
#endif
- bhi __delay
+ bhi __loop_delay
mov pc, lr
-ENDPROC(__udelay)
-ENDPROC(__const_udelay)
-ENDPROC(__delay)
+ENDPROC(__loop_udelay)
+ENDPROC(__loop_const_udelay)
+ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
new file mode 100644
index 00000000000..5306de35013
--- /dev/null
+++ b/arch/arm/lib/delay.c
@@ -0,0 +1,93 @@
+/*
+ * Delay loops based on the OpenRISC implementation.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timex.h>
+
+/*
+ * Default to the loop-based delay implementation.
+ */
+struct arm_delay_ops arm_delay_ops = {
+ .delay = __loop_delay,
+ .const_udelay = __loop_const_udelay,
+ .udelay = __loop_udelay,
+};
+
+static const struct delay_timer *delay_timer;
+static bool delay_calibrated;
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (!delay_timer)
+ return -ENXIO;
+
+ *timer_val = delay_timer->read_current_timer();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(read_current_timer);
+
+static void __timer_delay(unsigned long cycles)
+{
+ cycles_t start = get_cycles();
+
+ while ((get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static void __timer_const_udelay(unsigned long xloops)
+{
+ unsigned long long loops = xloops;
+ loops *= arm_delay_ops.ticks_per_jiffy;
+ __timer_delay(loops >> UDELAY_SHIFT);
+}
+
+static void __timer_udelay(unsigned long usecs)
+{
+ __timer_const_udelay(usecs * UDELAY_MULT);
+}
+
+void __init register_current_timer_delay(const struct delay_timer *timer)
+{
+ if (!delay_calibrated) {
+ pr_info("Switching to timer-based delay loop\n");
+ delay_timer = timer;
+ lpj_fine = timer->freq / HZ;
+
+ /* cpufreq may scale loops_per_jiffy, so keep a private copy */
+ arm_delay_ops.ticks_per_jiffy = lpj_fine;
+ arm_delay_ops.delay = __timer_delay;
+ arm_delay_ops.const_udelay = __timer_const_udelay;
+ arm_delay_ops.udelay = __timer_udelay;
+
+ delay_calibrated = true;
+ } else {
+ pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
+ }
+}
+
+unsigned long calibrate_delay_is_known(void)
+{
+ delay_calibrated = true;
+ return lpj_fine;
+}
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index faa7748142d..e55c4842c29 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
+#include <asm/unwind.h>
#ifdef __ARMEB__
#define xh r0
@@ -44,6 +45,7 @@
*/
ENTRY(__do_div64)
+UNWIND(.fnstart)
@ Test for easy paths first.
subs ip, r4, #1
@@ -189,7 +191,12 @@ ENTRY(__do_div64)
moveq yh, xh
moveq xh, #0
moveq pc, lr
+UNWIND(.fnend)
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
@ Division by 0:
str lr, [sp, #-8]!
bl __div0
@@ -200,4 +207,5 @@ ENTRY(__do_div64)
mov xh, #0
ldr pc, [sp], #8
+UNWIND(.fnend)
ENDPROC(__do_div64)
diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S
index 8678eb2b7a6..e6057fa851b 100644
--- a/arch/arm/lib/ecard.S
+++ b/arch/arm/lib/ecard.S
@@ -12,7 +12,6 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <mach/hardware.h>
#define CPSR2SPSR(rt) \
mrs rt, cpsr; \
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 1e4cbd4e7be..64f6bc1a913 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -174,8 +174,8 @@ ENDPROC(_find_next_bit_be)
*/
.L_found:
#if __LINUX_ARM_ARCH__ >= 5
- rsb r1, r3, #0
- and r3, r3, r1
+ rsb r0, r3, #0
+ and r3, r3, r0
clz r3, r3
rsb r3, r3, #31
add r0, r2, r3
@@ -190,5 +190,7 @@ ENDPROC(_find_next_bit_be)
addeq r2, r2, #1
mov r0, r2
#endif
+ cmp r1, r0 @ Clamp to maxbit
+ movlo r0, r1
mov pc, lr
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index b1631a7dbe7..9b06bb41fca 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -16,8 +16,9 @@
* __get_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
- * r2, r3 contains the zero-extended value
+ * r2 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@@ -27,33 +28,40 @@
* Note also that it is intended that __get_user_bad is not global.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
ENTRY(__get_user_1)
-1: ldrbt r2, [r0]
+ check_uaccess r0, 1, r1, r2, __get_user_bad
+1: TUSER(ldrb) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
-#ifdef CONFIG_THUMB2_KERNEL
-2: ldrbt r2, [r0]
-3: ldrbt r3, [r0, #1]
-#else
+ check_uaccess r0, 2, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb .req ip
2: ldrbt r2, [r0], #1
-3: ldrbt r3, [r0]
+3: ldrbt rb, [r0], #0
+#else
+rb .req r0
+2: ldrb r2, [r0]
+3: ldrb rb, [r0, #1]
#endif
#ifndef __ARMEB__
- orr r2, r2, r3, lsl #8
+ orr r2, r2, rb, lsl #8
#else
- orr r2, r3, r2, lsl #8
+ orr r2, rb, r2, lsl #8
#endif
mov r0, #0
mov pc, lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
-4: ldrt r2, [r0]
+ check_uaccess r0, 4, r1, r2, __get_user_bad
+4: TUSER(ldr) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__get_user_4)
diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S
index 1b197ea7aab..69719bad674 100644
--- a/arch/arm/lib/io-acorn.S
+++ b/arch/arm/lib/io-acorn.S
@@ -11,13 +11,14 @@
*
*/
#include <linux/linkage.h>
+#include <linux/kern_levels.h>
#include <asm/assembler.h>
.text
.align
.Liosl_warning:
- .ascii "<4>insl/outsl not implemented, called from %08lX\0"
+ .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0"
.align
/*
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4..7a7430950c7 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
strb ip, [r1], #1
4: subs r2, r2, #1
- mov ip, r3, pull #24
+ mov ip, r3, lspull #24
ldrne r3, [r0]
- orrne ip, ip, r3, push #8
+ orrne ip, ip, r3, lspush #8
strne ip, [r1], #4
bne 4b
b 8f
5: subs r2, r2, #1
- mov ip, r3, pull #16
+ mov ip, r3, lspull #16
ldrne r3, [r0]
- orrne ip, ip, r3, push #16
+ orrne ip, ip, r3, lspush #16
strne ip, [r1], #4
bne 5b
b 7f
6: subs r2, r2, #1
- mov ip, r3, pull #8
+ mov ip, r3, lspull #8
ldrne r3, [r0]
- orrne ip, ip, r3, push #24
+ orrne ip, ip, r3, lspush #24
strne ip, [r1], #4
bne 6b
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 9aaf7c72065..88487c8c4f2 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -9,7 +9,6 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <mach/hardware.h>
.Linsw_bad_alignment:
adr r0, .Linsw_bad_align_msg
diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c
deleted file mode 100644
index 824253948f5..00000000000
--- a/arch/arm/lib/io-shark.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * linux/arch/arm/lib/io-shark.c
- *
- * by Alexander Schulz
- *
- * derived from:
- * linux/arch/arm/lib/io-ebsa.S
- * Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725..d0d104a0dd1 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
blt 5f
bgt 6f
-4: mov ip, r3, pull #16
+4: mov ip, r3, lspull #16
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #16
+ orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
mov pc, lr
-5: mov ip, r3, pull #8
+5: mov ip, r3, lspull #8
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #24
+ orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
mov pc, lr
-6: mov ip, r3, pull #24
+6: mov ip, r3, lspull #24
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #8
+ orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
mov pc, lr
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index cd34503e424..49b800419e3 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -9,7 +9,6 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <mach/hardware.h>
.Loutsw_bad_alignment:
adr r0, .Loutsw_bad_align_msg
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 6dc06487f3c..c562f649734 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
-
+#include <asm/unwind.h>
.macro ARM_DIV_BODY dividend, divisor, result, curbit
@@ -207,6 +207,7 @@ Boston, MA 02111-1307, USA. */
ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
subs r2, r1, #1
moveq pc, lr
@@ -230,10 +231,12 @@ ENTRY(__aeabi_uidiv)
mov r0, r0, lsr r2
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__udivsi3)
ENDPROC(__aeabi_uidiv)
ENTRY(__umodsi3)
+UNWIND(.fnstart)
subs r2, r1, #1 @ compare divisor with 1
bcc Ldiv0
@@ -247,10 +250,12 @@ ENTRY(__umodsi3)
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__umodsi3)
ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
@@ -287,10 +292,12 @@ ENTRY(__aeabi_idiv)
rsbmi r0, r0, #0
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__divsi3)
ENDPROC(__aeabi_idiv)
ENTRY(__modsi3)
+UNWIND(.fnstart)
cmp r1, #0
beq Ldiv0
@@ -310,11 +317,14 @@ ENTRY(__modsi3)
rsbmi r0, r0, #0
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__modsi3)
#ifdef CONFIG_AEABI
ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
stmfd sp!, {r0, r1, ip, lr}
bl __aeabi_uidiv
@@ -323,10 +333,12 @@ ENTRY(__aeabi_uidivmod)
sub r1, r1, r3
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
ENTRY(__aeabi_idivmod)
-
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
stmfd sp!, {r0, r1, ip, lr}
bl __aeabi_idiv
ldmfd sp!, {r1, r2, ip, lr}
@@ -334,15 +346,18 @@ ENTRY(__aeabi_idivmod)
sub r1, r1, r3
mov pc, lr
+UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
#endif
Ldiv0:
-
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
str lr, [sp, #-8]!
bl __div0
mov r0, #0 @ About as wrong as it could be.
ldr pc, [sp], #8
-
-
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962..d1fc0c0c342 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, lspush #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, lspull #\pull
+ mov ip, ip, lspush #\push
+ orr ip, ip, r9, lspull #\pull
+ mov r9, r9, lspush #\push
+ orr r9, r9, r8, lspull #\pull
+ mov r8, r8, lspush #\push
+ orr r8, r8, r7, lspull #\pull
+ mov r7, r7, lspush #\push
+ orr r7, r7, r6, lspull #\pull
+ mov r6, r6, lspush #\push
+ orr r6, r6, r5, lspull #\pull
+ mov r5, r5, lspush #\push
+ orr r5, r5, r4, lspull #\pull
+ mov r4, r4, lspush #\push
+ orr r4, r4, r3, lspull #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
@@ -175,10 +175,10 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, lspush #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, lspull #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 650d5923ab8..94b0650ea98 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,27 +14,15 @@
.text
.align 5
- .word 0
-
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [r0], #1 @ 1
- strleb r1, [r0], #1 @ 1
- strb r1, [r0], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memset again.
- */
ENTRY(memset)
ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
/*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
*/
- orr r1, r1, r1, lsl #8
+1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #16
@@ -43,29 +31,28 @@ ENTRY(memset)
#if ! CALGN(1)+0
/*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
*/
- str lr, [sp, #-4]!
- mov ip, r1
+ stmfd sp!, {r8, lr}
+ mov r8, r1
mov lr, r1
2: subs r2, r2, #64
- stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
+ stmgeia ip!, {r1, r3, r8, lr}
bgt 2b
- ldmeqfd sp!, {pc} @ Now <64 bytes to go.
+ ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
- stmneia r0!, {r1, r3, ip, lr}
- stmneia r0!, {r1, r3, ip, lr}
+ stmneia ip!, {r1, r3, r8, lr}
+ stmneia ip!, {r1, r3, r8, lr}
tst r2, #16
- stmneia r0!, {r1, r3, ip, lr}
- ldr lr, [sp], #4
+ stmneia ip!, {r1, r3, r8, lr}
+ ldmfd sp!, {r8, lr}
#else
@@ -74,54 +61,63 @@ ENTRY(memset)
* whole cache lines at once.
*/
- stmfd sp!, {r4-r7, lr}
+ stmfd sp!, {r4-r8, lr}
mov r4, r1
mov r5, r1
mov r6, r1
mov r7, r1
- mov ip, r1
+ mov r8, r1
mov lr, r1
cmp r2, #96
- tstgt r0, #31
+ tstgt ip, #31
ble 3f
- and ip, r0, #31
- rsb ip, ip, #32
- sub r2, r2, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- tst ip, #(1 << 30)
- mov ip, r1
- strne r1, [r0], #4
+ and r8, ip, #31
+ rsb r8, r8, #32
+ sub r2, r2, r8
+ movs r8, r8, lsl #(32 - 4)
+ stmcsia ip!, {r4, r5, r6, r7}
+ stmmiia ip!, {r4, r5}
+ tst r8, #(1 << 30)
+ mov r8, r1
+ strne r1, [ip], #4
3: subs r2, r2, #64
- stmgeia r0!, {r1, r3-r7, ip, lr}
- stmgeia r0!, {r1, r3-r7, ip, lr}
+ stmgeia ip!, {r1, r3-r8, lr}
+ stmgeia ip!, {r1, r3-r8, lr}
bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
+ ldmeqfd sp!, {r4-r8, pc}
tst r2, #32
- stmneia r0!, {r1, r3-r7, ip, lr}
+ stmneia ip!, {r1, r3-r8, lr}
tst r2, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
+ stmneia ip!, {r4-r7}
+ ldmfd sp!, {r4-r8, lr}
#endif
4: tst r2, #8
- stmneia r0!, {r1, r3}
+ stmneia ip!, {r1, r3}
tst r2, #4
- strne r1, [r0], #4
+ strne r1, [ip], #4
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
- strneb r1, [r0], #1
- strneb r1, [r0], #1
+ strneb r1, [ip], #1
+ strneb r1, [ip], #1
tst r2, #1
- strneb r1, [r0], #1
+ strneb r1, [ip], #1
mov pc, lr
+
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5b @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [ip], #1 @ 1
+ strleb r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b
ENDPROC(memset)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 5a01a23c6c0..3d73dcb959b 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -16,6 +16,7 @@
* __put_user_X
*
* Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
* r2, r3 contains the value
* Outputs: r0 is the error code
* lr corrupted
@@ -27,31 +28,35 @@
* Note also that it is intended that __put_user_bad is not global.
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
ENTRY(__put_user_1)
-1: strbt r2, [r0]
+ check_uaccess r0, 1, r1, ip, __put_user_bad
+1: TUSER(strb) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
+ check_uaccess r0, 2, r1, ip, __put_user_bad
mov ip, r2, lsr #8
#ifdef CONFIG_THUMB2_KERNEL
#ifndef __ARMEB__
-2: strbt r2, [r0]
-3: strbt ip, [r0, #1]
+2: TUSER(strb) r2, [r0]
+3: TUSER(strb) ip, [r0, #1]
#else
-2: strbt ip, [r0]
-3: strbt r2, [r0, #1]
+2: TUSER(strb) ip, [r0]
+3: TUSER(strb) r2, [r0, #1]
#endif
#else /* !CONFIG_THUMB2_KERNEL */
#ifndef __ARMEB__
-2: strbt r2, [r0], #1
-3: strbt ip, [r0]
+2: TUSER(strb) r2, [r0], #1
+3: TUSER(strb) ip, [r0]
#else
-2: strbt ip, [r0], #1
-3: strbt r2, [r0]
+2: TUSER(strb) ip, [r0], #1
+3: TUSER(strb) r2, [r0]
#endif
#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
@@ -59,18 +64,20 @@ ENTRY(__put_user_2)
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
-4: strt r2, [r0]
+ check_uaccess r0, 4, r1, ip, __put_user_bad
+4: TUSER(str) r2, [r0]
mov r0, #0
mov pc, lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
+ check_uaccess r0, 8, r1, ip, __put_user_bad
#ifdef CONFIG_THUMB2_KERNEL
-5: strt r2, [r0]
-6: strt r3, [r0, #4]
+5: TUSER(str) r2, [r0]
+6: TUSER(str) r3, [r0, #4]
#else
-5: strt r2, [r0], #4
-6: strt r3, [r0]
+5: TUSER(str) r2, [r0], #4
+6: TUSER(str) r3, [r0]
#endif
mov r0, #0
mov pc, lr
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
index 1dd7176c4b2..618fedae4b3 100644
--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -12,13 +12,4 @@
#include "bitops.h"
.text
-/*
- * Purpose : Function to set a bit
- * Prototype: int set_bit(int bit, void *addr)
- */
-ENTRY(_set_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_set_bit_le)
- bitop orr
-ENDPROC(_set_bit_be)
-ENDPROC(_set_bit_le)
+bitop _set_bit, orr
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
deleted file mode 100644
index eb0edb80d7b..00000000000
--- a/arch/arm/lib/sha1.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * linux/arch/arm/lib/sha1.S
- *
- * SHA transform optimized for ARM
- *
- * Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net>
- * Created: September 17, 2005
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * The reference implementation for this code is linux/lib/sha1.c
- */
-
-#include <linux/linkage.h>
-
- .text
-
-
-/*
- * void sha_transform(__u32 *digest, const char *in, __u32 *W)
- *
- * Note: the "in" ptr may be unaligned.
- */
-
-ENTRY(sha_transform)
-
- stmfd sp!, {r4 - r8, lr}
-
- @ for (i = 0; i < 16; i++)
- @ W[i] = be32_to_cpu(in[i]);
-
-#ifdef __ARMEB__
- mov r4, r0
- mov r0, r2
- mov r2, #64
- bl memcpy
- mov r2, r0
- mov r0, r4
-#else
- mov r3, r2
- mov lr, #16
-1: ldrb r4, [r1], #1
- ldrb r5, [r1], #1
- ldrb r6, [r1], #1
- ldrb r7, [r1], #1
- subs lr, lr, #1
- orr r5, r5, r4, lsl #8
- orr r6, r6, r5, lsl #8
- orr r7, r7, r6, lsl #8
- str r7, [r3], #4
- bne 1b
-#endif
-
- @ for (i = 0; i < 64; i++)
- @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
-
- sub r3, r2, #4
- mov lr, #64
-2: ldr r4, [r3, #4]!
- subs lr, lr, #1
- ldr r5, [r3, #8]
- ldr r6, [r3, #32]
- ldr r7, [r3, #52]
- eor r4, r4, r5
- eor r4, r4, r6
- eor r4, r4, r7
- mov r4, r4, ror #31
- str r4, [r3, #64]
- bne 2b
-
- /*
- * The SHA functions are:
- *
- * f1(B,C,D) = (D ^ (B & (C ^ D)))
- * f2(B,C,D) = (B ^ C ^ D)
- * f3(B,C,D) = ((B & C) | (D & (B | C)))
- *
- * Then the sub-blocks are processed as follows:
- *
- * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
- * B' = A
- * C' = ror(B, 2)
- * D' = C
- * E' = D
- *
- * We therefore unroll each loop 5 times to avoid register shuffling.
- * Also the ror for C (and also D and E which are successivelyderived
- * from it) is applied in place to cut on an additional mov insn for
- * each round.
- */
-
- .macro sha_f1, A, B, C, D, E
- ldr r3, [r2], #4
- eor ip, \C, \D
- add \E, r1, \E, ror #2
- and ip, \B, ip, ror #2
- add \E, \E, \A, ror #27
- eor ip, ip, \D, ror #2
- add \E, \E, r3
- add \E, \E, ip
- .endm
-
- .macro sha_f2, A, B, C, D, E
- ldr r3, [r2], #4
- add \E, r1, \E, ror #2
- eor ip, \B, \C, ror #2
- add \E, \E, \A, ror #27
- eor ip, ip, \D, ror #2
- add \E, \E, r3
- add \E, \E, ip
- .endm
-
- .macro sha_f3, A, B, C, D, E
- ldr r3, [r2], #4
- add \E, r1, \E, ror #2
- orr ip, \B, \C, ror #2
- add \E, \E, \A, ror #27
- and ip, ip, \D, ror #2
- add \E, \E, r3
- and r3, \B, \C, ror #2
- orr ip, ip, r3
- add \E, \E, ip
- .endm
-
- ldmia r0, {r4 - r8}
-
- mov lr, #4
- ldr r1, .L_sha_K + 0
-
- /* adjust initial values */
- mov r6, r6, ror #30
- mov r7, r7, ror #30
- mov r8, r8, ror #30
-
-3: subs lr, lr, #1
- sha_f1 r4, r5, r6, r7, r8
- sha_f1 r8, r4, r5, r6, r7
- sha_f1 r7, r8, r4, r5, r6
- sha_f1 r6, r7, r8, r4, r5
- sha_f1 r5, r6, r7, r8, r4
- bne 3b
-
- ldr r1, .L_sha_K + 4
- mov lr, #4
-
-4: subs lr, lr, #1
- sha_f2 r4, r5, r6, r7, r8
- sha_f2 r8, r4, r5, r6, r7
- sha_f2 r7, r8, r4, r5, r6
- sha_f2 r6, r7, r8, r4, r5
- sha_f2 r5, r6, r7, r8, r4
- bne 4b
-
- ldr r1, .L_sha_K + 8
- mov lr, #4
-
-5: subs lr, lr, #1
- sha_f3 r4, r5, r6, r7, r8
- sha_f3 r8, r4, r5, r6, r7
- sha_f3 r7, r8, r4, r5, r6
- sha_f3 r6, r7, r8, r4, r5
- sha_f3 r5, r6, r7, r8, r4
- bne 5b
-
- ldr r1, .L_sha_K + 12
- mov lr, #4
-
-6: subs lr, lr, #1
- sha_f2 r4, r5, r6, r7, r8
- sha_f2 r8, r4, r5, r6, r7
- sha_f2 r7, r8, r4, r5, r6
- sha_f2 r6, r7, r8, r4, r5
- sha_f2 r5, r6, r7, r8, r4
- bne 6b
-
- ldmia r0, {r1, r2, r3, ip, lr}
- add r4, r1, r4
- add r5, r2, r5
- add r6, r3, r6, ror #2
- add r7, ip, r7, ror #2
- add r8, lr, r8, ror #2
- stmia r0, {r4 - r8}
-
- ldmfd sp!, {r4 - r8, pc}
-
-ENDPROC(sha_transform)
-
- .align 2
-.L_sha_K:
- .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
-
-
-/*
- * void sha_init(__u32 *buf)
- */
-
- .align 2
-.L_sha_initial_digest:
- .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
-
-ENTRY(sha_init)
-
- str lr, [sp, #-4]!
- adr r1, .L_sha_initial_digest
- ldmia r1, {r1, r2, r3, ip, lr}
- stmia r0, {r1, r2, r3, ip, lr}
- ldr pc, [sp], #4
-
-ENDPROC(sha_init)
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
deleted file mode 100644
index f202d7bd164..00000000000
--- a/arch/arm/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * linux/arch/arm/lib/strncpy_from_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
- .text
- .align 5
-
-/*
- * Copy a string from user space to kernel space.
- * r0 = dst, r1 = src, r2 = byte length
- * returns the number of characters copied (strlen of copied string),
- * -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__strncpy_from_user)
- mov ip, r1
-1: subs r2, r2, #1
- ldrusr r3, r1, 1, pl
- bmi 2f
- strb r3, [r0], #1
- teq r3, #0
- bne 1b
- sub r1, r1, #1 @ take NUL character out of count
-2: sub r0, r1, ip
- mov pc, lr
-ENDPROC(__strncpy_from_user)
-
- .pushsection .fixup,"ax"
- .align 0
-9001: mov r3, #0
- strb r3, [r0, #0] @ null terminate
- mov r0, #-EFAULT
- mov pc, lr
- .popsection
-
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
deleted file mode 100644
index 0ecbb459c4f..00000000000
--- a/arch/arm/lib/strnlen_user.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * linux/arch/arm/lib/strnlen_user.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
- .text
- .align 5
-
-/* Prototype: unsigned long __strnlen_user(const char *str, long n)
- * Purpose : get length of a string in user memory
- * Params : str - address of string in user memory
- * Returns : length of string *including terminator*
- * or zero on exception, or n + 1 if too long
- */
-ENTRY(__strnlen_user)
- mov r2, r0
-1:
- ldrusr r3, r0, 1
- teq r3, #0
- beq 2f
- subs r1, r1, #1
- bne 1b
- add r0, r0, #1
-2: sub r0, r0, r2
- mov pc, lr
-ENDPROC(__strnlen_user)
-
- .pushsection .fixup,"ax"
- .align 0
-9001: mov r0, #0
- mov pc, lr
- .popsection
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index 5c98dc567f0..4becdc3a59c 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -12,9 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_change_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_change_bit_le)
- testop eor, strb
-ENDPROC(_test_and_change_bit_be)
-ENDPROC(_test_and_change_bit_le)
+testop _test_and_change_bit, eor, str
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 543d7094d18..918841dcce7 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -12,9 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_clear_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_clear_bit_le)
- testop bicne, strneb
-ENDPROC(_test_and_clear_bit_be)
-ENDPROC(_test_and_clear_bit_le)
+testop _test_and_clear_bit, bicne, strne
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 0b3f390401c..8d1b2fe9e48 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -12,9 +12,4 @@
#include "bitops.h"
.text
-ENTRY(_test_and_set_bit_be)
- eor r0, r0, #0x18 @ big endian byte ordering
-ENTRY(_test_and_set_bit_le)
- testop orreq, streqb
-ENDPROC(_test_and_set_bit_be)
-ENDPROC(_test_and_set_bit_le)
+testop _test_and_set_bit, orreq, streq
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index fee9f6f88ad..e50520904b7 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -14,6 +14,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
+#include <asm/domain.h>
.text
@@ -31,11 +32,11 @@
rsb ip, ip, #4
cmp ip, #2
ldrb r3, [r1], #1
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #1
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
sub r2, r2, ip
b .Lc2u_dest_aligned
@@ -58,7 +59,7 @@ ENTRY(__copy_to_user)
addmi ip, r2, #4
bmi .Lc2u_0nowords
ldr r3, [r1], #4
-USER( strt r3, [r0], #4) @ May fault
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
@@ -87,18 +88,18 @@ USER( strt r3, [r0], #4) @ May fault
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
ldrne r3, [r1], #4
- strnet r3, [r0], #4 @ Shouldnt fault
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_0fupi
.Lc2u_0nowords: teq ip, #0
beq .Lc2u_finished
.Lc2u_nowords: cmp ip, #2
ldrb r3, [r1], #1
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #1
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
b .Lc2u_finished
.Lc2u_not_enough:
@@ -116,10 +117,10 @@ USER( strgtbt r3, [r0], #1) @ May fault
.Lc2u_1fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_1nowords
- mov r3, r7, pull #8
+ mov r3, r7, lspull #8
ldr r7, [r1], #4
- orr r3, r3, r7, push #24
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #24
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
@@ -130,51 +131,51 @@ USER( strt r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_1rem8lp
-.Lc2u_1cpy8lp: mov r3, r7, pull #8
+.Lc2u_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_1cpy8lp
.Lc2u_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #24
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #24
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_1fupi
.Lc2u_1nowords: mov r3, r7, get_byte_1
teq ip, #0
beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
movge r3, r7, get_byte_2
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
movgt r3, r7, get_byte_3
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
b .Lc2u_finished
.Lc2u_2fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_2nowords
- mov r3, r7, pull #16
+ mov r3, r7, lspull #16
ldr r7, [r1], #4
- orr r3, r3, r7, push #16
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #16
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
@@ -185,51 +186,51 @@ USER( strt r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_2rem8lp
-.Lc2u_2cpy8lp: mov r3, r7, pull #16
+.Lc2u_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_2cpy8lp
.Lc2u_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #16
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #16
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_2fupi
.Lc2u_2nowords: mov r3, r7, get_byte_2
teq ip, #0
beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
movge r3, r7, get_byte_3
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #0
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
b .Lc2u_finished
.Lc2u_3fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_3nowords
- mov r3, r7, pull #24
+ mov r3, r7, lspull #24
ldr r7, [r1], #4
- orr r3, r3, r7, push #8
-USER( strt r3, [r0], #4) @ May fault
+ orr r3, r3, r7, lspush #8
+USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
movs ip, ip, lsr #32 - PAGE_SHIFT
@@ -240,42 +241,42 @@ USER( strt r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_3rem8lp
-.Lc2u_3cpy8lp: mov r3, r7, pull #24
+.Lc2u_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_3cpy8lp
.Lc2u_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #8
- strnet r3, [r0], #4 @ Shouldnt fault
+ orrne r3, r3, r7, lspush #8
+ TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_3fupi
.Lc2u_3nowords: mov r3, r7, get_byte_3
teq ip, #0
beq .Lc2u_finished
cmp ip, #2
-USER( strbt r3, [r0], #1) @ May fault
+USER( TUSER( strb) r3, [r0], #1) @ May fault
ldrgeb r3, [r1], #1
-USER( strgebt r3, [r0], #1) @ May fault
+USER( TUSER( strgeb) r3, [r0], #1) @ May fault
ldrgtb r3, [r1], #0
-USER( strgtbt r3, [r0], #1) @ May fault
+USER( TUSER( strgtb) r3, [r0], #1) @ May fault
b .Lc2u_finished
ENDPROC(__copy_to_user)
@@ -294,11 +295,11 @@ ENDPROC(__copy_to_user)
.Lcfu_dest_not_aligned:
rsb ip, ip, #4
cmp ip, #2
-USER( ldrbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrb) r3, [r1], #1) @ May fault
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
sub r2, r2, ip
b .Lcfu_dest_aligned
@@ -321,7 +322,7 @@ ENTRY(__copy_from_user)
.Lcfu_0fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_0nowords
-USER( ldrt r3, [r1], #4)
+USER( TUSER( ldr) r3, [r1], #4)
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction
rsb ip, ip, #0
@@ -350,18 +351,18 @@ USER( ldrt r3, [r1], #4)
ldmneia r1!, {r3 - r4} @ Shouldnt fault
stmneia r0!, {r3 - r4}
tst ip, #4
- ldrnet r3, [r1], #4 @ Shouldnt fault
+ TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_0fupi
.Lcfu_0nowords: teq ip, #0
beq .Lcfu_finished
.Lcfu_nowords: cmp ip, #2
-USER( ldrbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrb) r3, [r1], #1) @ May fault
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
b .Lcfu_finished
@@ -374,16 +375,16 @@ USER( ldrgtbt r3, [r1], #1) @ May fault
.Lcfu_src_not_aligned:
bic r1, r1, #3
-USER( ldrt r7, [r1], #4) @ May fault
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
cmp ip, #2
bgt .Lcfu_3fupi
beq .Lcfu_2fupi
.Lcfu_1fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_1nowords
- mov r3, r7, pull #8
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #24
+ mov r3, r7, lspull #8
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #24
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -395,30 +396,30 @@ USER( ldrt r7, [r1], #4) @ May fault
subs ip, ip, #16
blt .Lcfu_1rem8lp
-.Lcfu_1cpy8lp: mov r3, r7, pull #8
+.Lcfu_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6}
bpl .Lcfu_1cpy8lp
.Lcfu_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #8
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #24
+ movne r3, r7, lspull #8
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #24
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_1fupi
@@ -436,9 +437,9 @@ USER( ldrnet r7, [r1], #4) @ May fault
.Lcfu_2fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_2nowords
- mov r3, r7, pull #16
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #16
+ mov r3, r7, lspull #16
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #16
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -451,30 +452,30 @@ USER( ldrt r7, [r1], #4) @ May fault
blt .Lcfu_2rem8lp
-.Lcfu_2cpy8lp: mov r3, r7, pull #16
+.Lcfu_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6}
bpl .Lcfu_2cpy8lp
.Lcfu_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #16
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #16
+ movne r3, r7, lspull #16
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #16
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_2fupi
@@ -485,16 +486,16 @@ USER( ldrnet r7, [r1], #4) @ May fault
strb r3, [r0], #1
movge r3, r7, get_byte_3
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #0) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault
strgtb r3, [r0], #1
b .Lcfu_finished
.Lcfu_3fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_3nowords
- mov r3, r7, pull #24
-USER( ldrt r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #8
+ mov r3, r7, lspull #24
+USER( TUSER( ldr) r7, [r1], #4) @ May fault
+ orr r3, r3, r7, lspush #8
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -506,30 +507,30 @@ USER( ldrt r7, [r1], #4) @ May fault
subs ip, ip, #16
blt .Lcfu_3rem8lp
-.Lcfu_3cpy8lp: mov r3, r7, pull #24
+.Lcfu_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7} @ Shouldnt fault
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6}
subs ip, ip, #16
bpl .Lcfu_3cpy8lp
.Lcfu_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #24
-USER( ldrnet r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #8
+ movne r3, r7, lspull #24
+USER( TUSER( ldrne) r7, [r1], #4) @ May fault
+ orrne r3, r3, r7, lspush #8
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_3fupi
@@ -538,9 +539,9 @@ USER( ldrnet r7, [r1], #4) @ May fault
beq .Lcfu_finished
cmp ip, #2
strb r3, [r0], #1
-USER( ldrgebt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault
strgeb r3, [r0], #1
-USER( ldrgtbt r3, [r1], #1) @ May fault
+USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault
strgtb r3, [r0], #1
b .Lcfu_finished
ENDPROC(__copy_from_user)
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index e2d2f2cd0c4..3e58d710013 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -17,6 +17,8 @@
#include <linux/sched.h>
#include <linux/hardirq.h> /* for in_atomic() */
#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
#include <asm/current.h>
#include <asm/page.h>
@@ -27,14 +29,47 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte;
+ pud_t *pud;
spinlock_t *ptl;
pgd = pgd_offset(current->mm, addr);
if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
return 0;
- pmd = pmd_offset(pgd, addr);
- if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
+ pud = pud_offset(pgd, addr);
+ if (unlikely(pud_none(*pud) || pud_bad(*pud)))
+ return 0;
+
+ pmd = pmd_offset(pud, addr);
+ if (unlikely(pmd_none(*pmd)))
+ return 0;
+
+ /*
+ * A pmd can be bad if it refers to a HugeTLB or THP page.
+ *
+ * Both THP and HugeTLB pages have the same pmd layout
+ * and should not be manipulated by the pte functions.
+ *
+ * Lock the page table for the destination and check
+ * to see that it's still huge and whether or not we will
+ * need to fault on write, or if we have a splitting THP.
+ */
+ if (unlikely(pmd_thp_or_huge(*pmd))) {
+ ptl = &current->mm->page_table_lock;
+ spin_lock(ptl);
+ if (unlikely(!pmd_thp_or_huge(*pmd)
+ || pmd_hugewillfault(*pmd)
+ || pmd_trans_splitting(*pmd))) {
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ *ptep = NULL;
+ *ptlp = ptl;
+ return 1;
+ }
+
+ if (unlikely(pmd_bad(*pmd)))
return 0;
pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
@@ -88,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
from += tocopy;
n -= tocopy;
- pte_unmap_unlock(pte, ptl);
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
}
if (!atomic)
up_read(&current->mm->mmap_sem);
@@ -141,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n)
addr += tocopy;
n -= tocopy;
- pte_unmap_unlock(pte, ptl);
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
}
up_read(&current->mm->mmap_sem);
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
new file mode 100644
index 00000000000..2c40aeab3ea
--- /dev/null
+++ b/arch/arm/lib/xor-neon.c
@@ -0,0 +1,46 @@
+/*
+ * linux/arch/arm/lib/xor-neon.c
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+
+#ifndef __ARM_NEON__
+#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
+#endif
+
+/*
+ * Pull in the reference implementations while instructing GCC (through
+ * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
+ * NEON instructions.
+ */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC optimize "tree-vectorize"
+#else
+/*
+ * While older versions of GCC do not generate incorrect code, they fail to
+ * recognize the parallel nature of these functions, and emit plain ARM code,
+ * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
+ */
+#warning This code requires at least version 4.6 of GCC
+#endif
+
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#include <asm-generic/xor.h>
+
+struct xor_block_template const xor_block_neon_inner = {
+ .name = "__inner_neon__",
+ .do_2 = xor_8regs_2,
+ .do_3 = xor_8regs_3,
+ .do_4 = xor_8regs_4,
+ .do_5 = xor_8regs_5,
+};
+EXPORT_SYMBOL(xor_block_neon_inner);