diff options
Diffstat (limited to 'arch/arm/lib')
58 files changed, 1763 insertions, 1294 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 391f3ab3ff3..0573faab96a 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,28 +6,34 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ - copy_page.o delay.o findbit.o memchr.o memcpy.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ - strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ - getuser.o putuser.o clear_user.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ - ucmpdi2.o lib1funcs.o div64.o sha1.o \ - io-readsb.o io-writesb.o io-readsl.o io-writesl.o + ucmpdi2.o lib1funcs.o div64.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ + call_with_stack.o bswapsdi2.o + +mmu-y := clear_user.o copy_page.o getuser.o putuser.o # the code in uaccess.S is not preemption safe and # probably faster on ARMv3 only -ifeq ($CONFIG_PREEMPT,y) - lib-y += copy_from_user.o copy_to_user.o +ifeq ($(CONFIG_PREEMPT),y) + mmu-y += copy_from_user.o copy_to_user.o else ifneq ($(CONFIG_CPU_32v3),y) - lib-y += copy_from_user.o copy_to_user.o + mmu-y += copy_from_user.o copy_to_user.o else - lib-y += uaccess.o + mmu-y += uaccess.o endif endif +# using lib_ here won't override already available weak symbols +obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o + +lib-$(CONFIG_MMU) += $(mmu-y) + ifeq ($(CONFIG_CPU_32v3),y) lib-y += io-readsw-armv3.o io-writesw-armv3.o else @@ -35,9 +41,12 @@ else endif lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o -lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o -lib-$(CONFIG_ARCH_L7200) += io-acorn.o -lib-$(CONFIG_ARCH_SHARK) += io-shark.o $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S + +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) + NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon + CFLAGS_xor-neon.o += $(NEON_FLAGS) + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o +endif diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 561e20717b3..638deb13da1 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */ #endif ENTRY(__ashldi3) +ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 movmi ah, ah, lsl r2 movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 mov pc, lr +ENDPROC(__ashldi3) +ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 86fb2a90c30..015e8aa5a1d 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */ #endif ENTRY(__ashrdi3) +ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 mov pc, lr +ENDPROC(__ashrdi3) +ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index 68a21c0f3f5..4102be617fc 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -10,7 +10,6 @@ * 27/03/03 Ian Molton Clean up CONFIG_CPU * */ -#include <linux/config.h> #include <linux/linkage.h> #include <asm/assembler.h> .text @@ -18,105 +17,100 @@ @ fp is 0 or stack frame #define frame r4 -#define next r5 -#define save r6 +#define sv_fp r5 +#define sv_pc r6 #define mask r7 #define offset r8 -ENTRY(__backtrace) - mov r1, #0x10 - mov r0, fp - ENTRY(c_backtrace) -#ifndef CONFIG_FRAME_POINTER +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) mov pc, lr +ENDPROC(c_backtrace) #else - stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... - tst r1, #0x10 @ 26 or 32-bit? - moveq mask, #0xfc000003 - movne mask, #0 - tst mask, r0 - movne r0, #0 - movs frame, r0 -1: moveq r0, #-2 - LOADREGS(eqfd, sp!, {r4 - r8, pc}) - -2: stmfd sp!, {pc} @ calculate offset of PC in STMIA instruction - ldr r0, [sp], #4 - adr r1, 2b - 4 + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + + tst r1, #0x10 @ 26 or 32-bit mode? + ARM( moveq mask, #0xfc000003 ) + THUMB( moveq mask, #0xfc000000 ) + THUMB( orreq mask, #0x03 ) + movne mask, #0 @ mask for 32-bit + +1: stmfd sp!, {pc} @ calculate offset of PC stored + ldr r0, [sp], #4 @ by stmfd for this CPU + adr r1, 1b sub offset, r0, r1 -3: tst frame, mask @ Check for address exceptions... - bne 1b +/* + * Stack frame layout: + * optionally saved caller registers (r4 - r10) + * saved fp + * saved sp + * saved lr + * frame => saved pc + * optionally saved arguments (r0 - r3) + * saved sp => <next word> + * + * Functions start with the following code sequence: + * mov ip, sp + * stmfd sp!, {r0 - r3} (optional) + * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} + */ +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +1001: ldr sv_pc, [frame, #0] @ get saved pc +1002: ldr sv_fp, [frame, #-12] @ get saved fp -1001: ldr next, [frame, #-12] @ get fp -1002: ldr r2, [frame, #-4] @ get lr -1003: ldr r3, [frame, #0] @ get pc - sub save, r3, offset @ Correct PC for prefetching - bic save, save, mask -1004: ldr r1, [save, #0] @ get instruction at function - mov r1, r1, lsr #10 - ldr r3, .Ldsi+4 - teq r1, r3 - subeq save, save, #4 - mov r0, save - bic r1, r2, mask + sub sv_pc, sv_pc, offset @ Correct PC for prefetching + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 @ adjust saved 'pc' back one + teq r3, r2, lsr #10 @ instruction + subne r0, sv_pc, #4 @ allow for mov + subeq r0, sv_pc, #8 @ allow for mov + stmia + + ldr r1, [frame, #-4] @ get saved lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode bl dump_backtrace_entry - ldr r0, [frame, #-8] @ get sp - sub r0, r0, #4 -1005: ldr r1, [save, #4] @ get instruction at function+4 - mov r3, r1, lsr #10 - ldr r2, .Ldsi+4 - teq r3, r2 @ Check for stmia sp!, {args} - addeq save, save, #4 @ next instruction - bleq .Ldumpstm - - sub r0, frame, #16 -1006: ldr r1, [save, #4] @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction - mov r3, r1, lsr #10 - ldr r2, .Ldsi - teq r3, r2 - bleq .Ldumpstm - - /* - * A zero next framepointer means we're done. - */ - teq next, #0 - LOADREGS(eqfd, sp!, {r4 - r8, pc}) - - /* - * The next framepointer must be above the - * current framepointer. - */ - cmp next, frame - mov frame, next - bhi 3b - b 1007f + ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 + teq r3, r1, lsr #11 + ldreq r0, [frame, #-8] @ get sp + subeq r0, r0, #4 @ point at the last arg + bleq .Ldumpstm @ dump saved registers -/* - * Fixup for LDMDB - */ - .section .fixup,"ax" - .align 0 -1007: ldr r0, =.Lbad +1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} + ldr r3, .Ldsi @ instruction exists, + teq r3, r1, lsr #11 + subeq r0, frame, #16 + bleq .Ldumpstm @ dump saved registers + + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame + bhi for_each_frame + +1006: adr r0, .Lbad mov r1, frame bl printk - LOADREGS(fd, sp!, {r4 - r8, pc}) - .ltorg - .previous +no_frame: ldmfd sp!, {r4 - r8, pc} +ENDPROC(c_backtrace) - .section __ex_table,"a" + .pushsection __ex_table,"a" .align 3 - .long 1001b, 1007b - .long 1002b, 1007b - .long 1003b, 1007b - .long 1004b, 1007b - .long 1005b, 1007b - .long 1006b, 1007b - .previous + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .popsection #define instr r4 #define reg r5 @@ -125,16 +119,18 @@ ENTRY(c_backtrace) .Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} mov stack, r0 mov instr, r1 - mov reg, #9 + mov reg, #10 mov r7, #0 1: mov r3, #1 - tst instr, r3, lsl reg + ARM( tst instr, r3, lsl reg ) + THUMB( lsl r3, reg ) + THUMB( tst instr, r3 ) beq 2f add r7, r7, #1 - teq r7, #4 + teq r7, #6 moveq r7, #0 - moveq r3, #'\n' - movne r3, #' ' + adr r3, .Lcr + addne r3, r3, #1 @ skip newline ldr r2, [stack], #-4 mov r1, reg adr r0, .Lfp @@ -144,14 +140,13 @@ ENTRY(c_backtrace) teq r7, #0 adrne r0, .Lcr blne printk - mov r0, stack - LOADREGS(fd, sp!, {instr, reg, stack, r7, pc}) + ldmfd sp!, {instr, reg, stack, r7, pc} -.Lfp: .asciz " r%d = %08X%c" +.Lfp: .asciz " r%d:%08x%s" .Lcr: .asciz "\n" .Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" .align -.Ldsi: .word 0x00e92dd8 >> 2 - .word 0x00e92d00 >> 2 +.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} + .word 0xe92d0000 >> 11 @ stmfd sp!, {} #endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index f35d91fbe11..9f12ed1eea8 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,45 +1,78 @@ -#include <linux/config.h> +#include <asm/unwind.h> -#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K) - .macro bitop, instr +#if __LINUX_ARM_ARCH__ >= 6 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned mov r2, #1 - and r3, r0, #7 @ Get bit offset - add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif mov r3, r2, lsl r3 -1: ldrexb r2, [r1] +1: ldrex r2, [r1] \instr r2, r2, r3 - strexb r0, r2, [r1] + strex r0, r2, [r1] cmp r0, #0 bne 1b - mov pc, lr + bx lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm - .macro testop, instr, store - and r3, r0, #7 @ Get bit offset + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned mov r2, #1 - add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask -1: ldrexb r2, [r1] + smp_dmb +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif +1: ldrex r2, [r1] ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit - strexb ip, r2, [r1] + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] cmp ip, #0 bne 1b + smp_dmb cmp r0, #0 movne r0, #1 -2: mov pc, lr +2: bx lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm #else - .macro bitop, instr - and r2, r0, #7 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 mov r3, #1 mov r3, r3, lsl r2 - save_and_disable_irqs ip, r2 - ldrb r2, [r1, r0, lsr #3] + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2] \instr r2, r2, r3 - strb r2, [r1, r0, lsr #3] + str r2, [r1, r0, lsl #2] restore_irqs ip mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm /** @@ -48,19 +81,25 @@ * @store: store instruction * * Note: we can trivially conditionalise the store instruction - * to avoid dirting the data cache. + * to avoid dirtying the data cache. */ - .macro testop, instr, store - add r1, r1, r0, lsr #3 - and r3, r0, #7 + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2]! mov r0, #1 - save_and_disable_irqs ip, r2 - ldrb r2, [r1] tst r2, r0, lsl r3 \instr r2, r2, r0, lsl r3 \store r2, [r1] - restore_irqs ip moveq r0, #0 + restore_irqs ip mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm #endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S new file mode 100644 index 00000000000..9fcdd154eff --- /dev/null +++ b/arch/arm/lib/bswapsdi2.S @@ -0,0 +1,36 @@ +#include <linux/linkage.h> + +#if __LINUX_ARM_ARCH__ >= 6 +ENTRY(__bswapsi2) + rev r0, r0 + bx lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + rev r3, r0 + rev r0, r1 + mov r1, r3 + bx lr +ENDPROC(__bswapdi2) +#else +ENTRY(__bswapsi2) + eor r3, r0, r0, ror #16 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + eor r0, r3, r0, ror #8 + mov pc, lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + mov ip, r1 + eor r3, ip, ip, ror #16 + eor r1, r0, r0, ror #16 + mov r1, r1, lsr #8 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + bic r1, r1, #0xff00 + eor r1, r1, r0, ror #8 + eor r0, r3, ip, ror #8 + mov pc, lr +ENDPROC(__bswapdi2) +#endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S new file mode 100644 index 00000000000..916c80f13ae --- /dev/null +++ b/arch/arm/lib/call_with_stack.S @@ -0,0 +1,44 @@ +/* + * arch/arm/lib/call_with_stack.S + * + * Copyright (C) 2011 ARM Ltd. + * Written by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * void call_with_stack(void (*fn)(void *), void *arg, void *sp) + * + * Change the stack to that pointed at by sp, then invoke fn(arg) with + * the new stack. + */ +ENTRY(call_with_stack) + str sp, [r2, #-4]! + str lr, [r2, #-4]! + + mov sp, r2 + mov r2, r0 + mov r0, r1 + + adr lr, BSYM(1f) + mov pc, r2 + +1: ldr lr, [sp] + ldr sp, [sp, #4] + mov pc, lr +ENDPROC(call_with_stack) diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S index 389567c2409..f4027862172 100644 --- a/arch/arm/lib/changebit.S +++ b/arch/arm/lib/changebit.S @@ -12,10 +12,4 @@ #include "bitops.h" .text -/* Purpose : Function to change a bit - * Prototype: int change_bit(int bit, void *addr) - */ -ENTRY(_change_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_change_bit_le) - bitop eor +bitop _change_bit, eor diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 7ff9f831b3f..14a0d988c82 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -12,13 +12,14 @@ .text -/* Prototype: int __arch_clear_user(void *addr, size_t sz) +/* Prototype: int __clear_user(void *addr, size_t sz) * Purpose : clear some user memory * Params : addr - user memory address to clear * : sz - number of bytes to clear * Returns : number of bytes NOT cleared */ -ENTRY(__arch_clear_user) +ENTRY(__clear_user_std) +WEAK(__clear_user) stmfd sp!, {r1, lr} mov r2, #0 cmp r1, #4 @@ -26,27 +27,28 @@ ENTRY(__arch_clear_user) ands ip, r0, #3 beq 1f cmp ip, #2 -USER( strbt r2, [r0], #1) -USER( strlebt r2, [r0], #1) -USER( strltbt r2, [r0], #1) + strusr r2, r0, 1 + strusr r2, r0, 1, le + strusr r2, r0, 1, lt rsb ip, ip, #4 sub r1, r1, ip @ 7 6 5 4 3 2 1 1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 -USER( strplt r2, [r0], #4) -USER( strplt r2, [r0], #4) + strusr r2, r0, 4, pl, rept=2 bpl 1b adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 -USER( strplt r2, [r0], #4) + strusr r2, r0, 4, pl 2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x -USER( strnebt r2, [r0], #1) -USER( strnebt r2, [r0], #1) + strusr r2, r0, 1, ne, rept=2 tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 -USER( strnebt r2, [r0], #1) + it ne @ explicit IT needed for the label +USER( strnebt r2, [r0]) mov r0, #0 - LOADREGS(fd,sp!, {r1, pc}) + ldmfd sp!, {r1, pc} +ENDPROC(__clear_user) +ENDPROC(__clear_user_std) - .section .fixup,"ax" + .pushsection .fixup,"ax" .align 0 -9001: LOADREGS(fd,sp!, {r0, pc}) - .previous +9001: ldmfd sp!, {r0, pc} + .popsection diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S index 34751653302..f6b75fb64d3 100644 --- a/arch/arm/lib/clearbit.S +++ b/arch/arm/lib/clearbit.S @@ -12,11 +12,4 @@ #include "bitops.h" .text -/* - * Purpose : Function to clear a bit - * Prototype: int clear_bit(int bit, void *addr) - */ -ENTRY(_clear_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_clear_bit_le) - bitop bic +bitop _clear_bit, bic diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 7497393a0e8..66a477a3e3c 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __arch_copy_from_user(void *to, const void *from, size_t n) + * size_t __copy_from_user(void *to, const void *from, size_t n) * * Purpose: * @@ -33,11 +33,15 @@ * Number of bytes NOT copied. */ +#ifndef CONFIG_THUMB2_KERNEL +#define LDR1W_SHIFT 0 +#else +#define LDR1W_SHIFT 1 +#endif +#define STR1W_SHIFT 0 + .macro ldr1w ptr reg abort -100: ldrt \reg, [\ptr], #4 - .section __ex_table, "a" - .long 100b, \abort - .previous + ldrusr \reg, \ptr, 4, abort=\abort .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -53,14 +57,11 @@ .endm .macro ldr1b ptr reg cond=al abort -100: ldr\cond\()bt \reg, [\ptr], #1 - .section __ex_table, "a" - .long 100b, \abort - .previous + ldrusr \reg, \ptr, 1, \cond, abort=\abort .endm .macro str1w ptr reg abort - str \reg, [\ptr], #4 + W(str) \reg, [\ptr], #4 .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -83,11 +84,13 @@ .text -ENTRY(__arch_copy_from_user) +ENTRY(__copy_from_user) #include "copy_template.S" - .section .fixup,"ax" +ENDPROC(__copy_from_user) + + .pushsection .fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2} @@ -97,5 +100,5 @@ ENTRY(__arch_copy_from_user) bl __memzero ldr r0, [sp], #4 copy_abort_end - .previous + .popsection diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 68117968482..6ee2f6706f8 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -12,8 +12,9 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> -#define COPY_COUNT (PAGE_SZ/64 PLD( -1 )) +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) .text .align 5 @@ -26,21 +27,21 @@ ENTRY(copy_page) stmfd sp!, {r4, lr} @ 2 PLD( pld [r1, #0] ) - PLD( pld [r1, #32] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) mov r2, #COPY_COUNT @ 1 ldmia r1!, {r3, r4, ip, lr} @ 4+1 -1: PLD( pld [r1, #64] ) - PLD( pld [r1, #96] ) -2: stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) stmia r0!, {r3, r4, ip, lr} @ 4 ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 ldmgtia r1!, {r3, r4, ip, lr} @ 4 bgt 1b @ 1 PLD( ldmeqia r1!, {r3, r4, ip, lr} ) PLD( beq 2b ) - LOADREGS(fd, sp!, {r4, pc}) @ 3 + ldmfd sp!, {r4, pc} @ 3 +ENDPROC(copy_page) diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 838e435e492..3bc8eb811a7 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -13,14 +13,6 @@ */ /* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - -/* * Theory of operation * ------------------- * @@ -65,6 +57,13 @@ * * Restore registers with the values previously saved with the * 'preserv' macro. Called upon code termination. + * + * LDR1W_SHIFT + * STR1W_SHIFT + * + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) */ @@ -82,7 +81,7 @@ stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb r3, ip, #32 ) CALGN( sbcnes r4, r3, r2 ) @ C is always set here CALGN( bcs 2f ) @@ -107,9 +106,15 @@ 5: ands ip, r2, #28 rsb ip, ip, #32 +#if LDR1W_SHIFT > 0 + lsl ip, ip, #LDR1W_SHIFT +#endif addne pc, pc, ip @ C is always clear here b 7f -6: nop +6: + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr ldr1w r1, r3, abort=20f ldr1w r1, r4, abort=20f ldr1w r1, r5, abort=20f @@ -118,9 +123,16 @@ ldr1w r1, r8, abort=20f ldr1w r1, lr, abort=20f +#if LDR1W_SHIFT < STR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT +#elif LDR1W_SHIFT > STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT +#endif add pc, pc, ip nop - nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr str1w r0, r3, abort=20f str1w r0, r4, abort=20f str1w r0, r5, abort=20f @@ -168,7 +180,7 @@ subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb ip, ip, #32 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) @@ -185,24 +197,24 @@ 12: PLD( pld [r1, #124] ) 13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull + mov r3, lr, lspull #\pull subs r2, r2, #32 ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f bge 12b PLD( cmn r2, #96 ) @@ -213,10 +225,10 @@ 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, lspull #\pull ldr1w r1, lr, abort=21f subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, lspush #\push str1w r0, r3, abort=21f bgt 15b CALGN( cmp r2, #0 ) @@ -236,7 +248,7 @@ /* - * Abort preanble and completion macros. + * Abort preamble and completion macros. * If a fixup handler is required then those macros must surround it. * It is assumed that the fixup code will handle the private part of * the exit macro. diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 4a6d8ea1402..d066df686e1 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __arch_copy_to_user(void *to, const void *from, size_t n) + * size_t __copy_to_user(void *to, const void *from, size_t n) * * Purpose: * @@ -33,8 +33,15 @@ * Number of bytes NOT copied. */ +#define LDR1W_SHIFT 0 +#ifndef CONFIG_THUMB2_KERNEL +#define STR1W_SHIFT 0 +#else +#define STR1W_SHIFT 1 +#endif + .macro ldr1w ptr reg abort - ldr \reg, [\ptr], #4 + W(ldr) \reg, [\ptr], #4 .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -50,10 +57,7 @@ .endm .macro str1w ptr reg abort -100: strt \reg, [\ptr], #4 - .section __ex_table, "a" - .long 100b, \abort - .previous + strusr \reg, \ptr, 4, abort=\abort .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -68,10 +72,7 @@ .endm .macro str1b ptr reg cond=al abort -100: str\cond\()bt \reg, [\ptr], #1 - .section __ex_table, "a" - .long 100b, \abort - .previous + strusr \reg, \ptr, 1, \cond, abort=\abort .endm .macro enter reg1 reg2 @@ -86,16 +87,20 @@ .text -ENTRY(__arch_copy_to_user) +ENTRY(__copy_to_user_std) +WEAK(__copy_to_user) #include "copy_template.S" - .section .fixup,"ax" +ENDPROC(__copy_to_user) +ENDPROC(__copy_to_user_std) + + .pushsection .fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} sub r0, r0, r1 rsb r0, r0, r2 copy_abort_end - .previous + .popsection diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S index 7065a20ee8a..3ac6ef01bc4 100644 --- a/arch/arm/lib/csumipv6.S +++ b/arch/arm/lib/csumipv6.S @@ -28,5 +28,6 @@ ENTRY(__csum_ipv6_magic) adcs r0, r0, r3 adcs r0, r0, r2 adcs r0, r0, #0 - LOADREGS(fd, sp!, {pc}) + ldmfd sp!, {pc} +ENDPROC(__csum_ipv6_magic) diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index cb5e3708f11..31d3cb34740 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -26,7 +26,7 @@ td1 .req r4 @ save before use td2 .req r5 @ save before use td3 .req lr -.zero: mov r0, sum +.Lzero: mov r0, sum add sp, sp, #4 ldr pc, [sp], #4 @@ -34,21 +34,22 @@ td3 .req lr * Handle 0 to 7 bytes, with any alignment of source and * destination pointers. Note that when we get here, C = 0 */ -.less8: teq len, #0 @ check for zero count - beq .zero +.Lless8: teq len, #0 @ check for zero count + beq .Lzero /* we must have at least one byte. */ tst buf, #1 @ odd address? + movne sum, sum, ror #8 ldrneb td0, [buf], #1 subne len, len, #1 adcnes sum, sum, td0, put_byte_1 -.less4: tst len, #6 - beq .less8_byte +.Lless4: tst len, #6 + beq .Lless8_byte /* we are now half-word aligned */ -.less8_wordlp: +.Lless8_wordlp: #if __LINUX_ARM_ARCH__ >= 4 ldrh td0, [buf], #2 sub len, len, #2 @@ -64,19 +65,19 @@ td3 .req lr #endif adcs sum, sum, td0 tst len, #6 - bne .less8_wordlp + bne .Lless8_wordlp -.less8_byte: tst len, #1 @ odd number of bytes +.Lless8_byte: tst len, #1 @ odd number of bytes ldrneb td0, [buf], #1 @ include last byte adcnes sum, sum, td0, put_byte_0 @ update checksum -.done: adc r0, sum, #0 @ collect up the last carry +.Ldone: adc r0, sum, #0 @ collect up the last carry ldr td0, [sp], #4 tst td0, #1 @ check buffer alignment movne r0, r0, ror #8 @ rotate checksum by 8 bits ldr pc, [sp], #4 @ return -.not_aligned: tst buf, #1 @ odd address +.Lnot_aligned: tst buf, #1 @ odd address ldrneb td0, [buf], #1 @ make even subne len, len, #1 adcnes sum, sum, td0, put_byte_1 @ update checksum @@ -101,11 +102,14 @@ td3 .req lr ENTRY(csum_partial) stmfd sp!, {buf, lr} cmp len, #8 @ Ensure that we have at least - blo .less8 @ 8 bytes to copy. + blo .Lless8 @ 8 bytes to copy. + + tst buf, #1 + movne sum, sum, ror #8 adds sum, sum, #0 @ C = 0 tst buf, #3 @ Test destination alignment - blne .not_aligned @ aligh destination, return here + blne .Lnot_aligned @ align destination, return here 1: bics ip, len, #31 beq 3f @@ -127,11 +131,12 @@ ENTRY(csum_partial) ldmfd sp!, {r4 - r5} 3: tst len, #0x1c @ should not change C - beq .less4 + beq .Lless4 4: ldr td0, [buf], #4 sub len, len, #4 adcs sum, sum, td0 tst len, #0x1c bne 4b - b .less4 + b .Lless4 +ENDPROC(csum_partial) diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S index 990ee63b246..d03fc71fc88 100644 --- a/arch/arm/lib/csumpartialcopy.S +++ b/arch/arm/lib/csumpartialcopy.S @@ -18,11 +18,11 @@ */ .macro save_regs - stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} + stmfd sp!, {r1, r4 - r8, lr} .endm - .macro load_regs,flags - LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc}) + .macro load_regs + ldmfd sp!, {r1, r4 - r8, pc} .endm .macro load1b, reg1 @@ -48,5 +48,6 @@ .endm #define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) #include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d3a2f4667db..d6e742d2400 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -22,8 +22,8 @@ dst .req r1 len .req r2 sum .req r3 -.zero: mov r0, sum - load_regs ea +.Lzero: mov r0, sum + load_regs /* * Align an unaligned destination pointer. We know that @@ -31,8 +31,9 @@ sum .req r3 * the length. Note that the source pointer hasn't been * aligned yet. */ -.dst_unaligned: tst dst, #1 - beq .dst_16bit +.Ldst_unaligned: + tst dst, #1 + beq .Ldst_16bit load1b ip sub len, len, #1 @@ -41,7 +42,7 @@ sum .req r3 tst dst, #2 moveq pc, lr @ dst is now 32bit aligned -.dst_16bit: load2b r8, ip +.Ldst_16bit: load2b r8, ip sub len, len, #2 adcs sum, sum, r8, put_byte_0 strb r8, [dst], #1 @@ -53,12 +54,12 @@ sum .req r3 * Handle 0 to 7 bytes, with any alignment of source and * destination pointers. Note that when we get here, C = 0 */ -.less8: teq len, #0 @ check for zero count - beq .zero +.Lless8: teq len, #0 @ check for zero count + beq .Lzero /* we must have at least one byte. */ tst dst, #1 @ dst 16-bit aligned - beq .less8_aligned + beq .Lless8_aligned /* Align dst */ load1b ip @@ -66,7 +67,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst len, #6 - beq .less8_byteonly + beq .Lless8_byteonly 1: load2b r8, ip sub len, len, #2 @@ -74,27 +75,26 @@ sum .req r3 strb r8, [dst], #1 adcs sum, sum, ip, put_byte_1 strb ip, [dst], #1 -.less8_aligned: tst len, #6 +.Lless8_aligned: + tst len, #6 bne 1b -.less8_byteonly: +.Lless8_byteonly: tst len, #1 - beq .done + beq .Ldone load1b r8 adcs sum, sum, r8, put_byte_0 @ update checksum strb r8, [dst], #1 - b .done + b .Ldone FN_ENTRY - mov ip, sp save_regs - sub fp, ip, #4 cmp len, #8 @ Ensure that we have at least - blo .less8 @ 8 bytes to copy. + blo .Lless8 @ 8 bytes to copy. adds sum, sum, #0 @ C = 0 tst dst, #3 @ Test destination alignment - blne .dst_unaligned @ align destination, return here + blne .Ldst_unaligned @ align destination, return here /* * Ok, the dst pointer is now 32bit aligned, and we know @@ -103,7 +103,7 @@ FN_ENTRY */ tst src, #3 @ Test source alignment - bne .src_not_aligned + bne .Lsrc_not_aligned /* Routine for src & dst aligned */ @@ -136,17 +136,17 @@ FN_ENTRY adcs sum, sum, r4 4: ands len, len, #3 - beq .done + beq .Ldone load1l r4 tst len, #2 mov r5, r4, get_byte_0 - beq .exit - adcs sum, sum, r4, push #16 + beq .Lexit + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 -.exit: tst len, #1 +.Lexit: tst len, #1 strneb r5, [dst], #1 andne r5, r5, #255 adcnes sum, sum, r5, put_byte_0 @@ -157,37 +157,37 @@ FN_ENTRY * the inefficient byte manipulations in the * architecture independent code. */ -.done: adc r0, sum, #0 +.Ldone: adc r0, sum, #0 ldr sum, [sp, #0] @ dst tst sum, #1 movne r0, r0, ror #8 - load_regs ea + load_regs -.src_not_aligned: +.Lsrc_not_aligned: adc sum, sum, #0 @ include C from dst alignment and ip, src, #3 bic src, src, #3 load1l r5 cmp ip, #2 - beq .src2_aligned - bhi .src3_aligned - mov r4, r5, pull #8 @ C = 0 + beq .Lsrc2_aligned + bhi .Lsrc3_aligned + mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - mov r7, r7, pull #8 - orr r7, r7, r8, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #8 + mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b @@ -196,50 +196,50 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #8 + mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #24 + orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #8 + mov r4, r5, lspull #8 4: ands len, len, #3 - beq .done + beq .Ldone mov r5, r4, get_byte_0 tst len, #2 - beq .exit - adcs sum, sum, r4, push #16 + beq .Lexit + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 - b .exit + b .Lexit -.src2_aligned: mov r4, r5, pull #16 +.Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - mov r7, r7, pull #16 - orr r7, r7, r8, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #16 + mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b @@ -248,52 +248,52 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #16 + mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #16 + orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #16 + mov r4, r5, lspull #16 4: ands len, len, #3 - beq .done + beq .Ldone mov r5, r4, get_byte_0 tst len, #2 - beq .exit + beq .Lexit adcs sum, sum, r4 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 tst len, #1 - beq .done + beq .Ldone load1b r5 - b .exit + b .Lexit -.src3_aligned: mov r4, r5, pull #24 +.Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - mov r7, r7, pull #24 - orr r7, r7, r8, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #24 + mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b @@ -302,30 +302,31 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #24 + mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #8 + orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #24 + mov r4, r5, lspull #24 4: ands len, len, #3 - beq .done + beq .Ldone mov r5, r4, get_byte_0 tst len, #2 - beq .exit + beq .Lexit strb r5, [dst], #1 adcs sum, sum, r4 load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 - adcs sum, sum, r4, push #24 + adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 - b .exit + b .Lexit +FN_EXIT diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 333bca292de..7d08b43d2c0 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -18,58 +18,36 @@ .text .macro save_regs - stmfd sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc} + stmfd sp!, {r1, r2, r4 - r8, lr} .endm - .macro load_regs,flags - ldm\flags fp, {r1, r2, r4-r8, fp, sp, pc} + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, pc} .endm .macro load1b, reg1 -9999: ldrbt \reg1, [r0], $1 - .section __ex_table, "a" - .align 3 - .long 9999b, 6001f - .previous + ldrusr \reg1, r0, 1 .endm .macro load2b, reg1, reg2 -9999: ldrbt \reg1, [r0], $1 -9998: ldrbt \reg2, [r0], $1 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .previous + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 .endm .macro load1l, reg1 -9999: ldrt \reg1, [r0], $4 - .section __ex_table, "a" - .align 3 - .long 9999b, 6001f - .previous + ldrusr \reg1, r0, 4 .endm .macro load2l, reg1, reg2 -9999: ldrt \reg1, [r0], $4 -9998: ldrt \reg2, [r0], $4 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .previous + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 .endm .macro load4l, reg1, reg2, reg3, reg4 -9999: ldrt \reg1, [r0], $4 -9998: ldrt \reg2, [r0], $4 -9997: ldrt \reg3, [r0], $4 -9996: ldrt \reg4, [r0], $4 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .long 9997b, 6001f - .long 9996b, 6001f - .previous + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 .endm /* @@ -80,6 +58,7 @@ */ #define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) #include "csumpartialcopygeneric.S" @@ -89,16 +68,16 @@ * so properly, we would have to add in whatever registers were loaded before * the fault, which, with the current asm above is not predictable. */ - .section .fixup,"ax" + .pushsection .fixup,"ax" .align 4 -6001: mov r4, #-EFAULT - ldr r5, [fp, #4] @ *err_ptr +9001: mov r4, #-EFAULT + ldr r5, [sp, #8*4] @ *err_ptr str r4, [r5] ldmia sp, {r1, r2} @ retrieve dst, len add r2, r2, r1 mov r0, #0 @ zero the buffer -6002: teq r2, r1 +9002: teq r2, r1 strneb r0, [r1], #1 - bne 6002b - load_regs ea - .previous + bne 9002b + load_regs + .popsection diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S new file mode 100644 index 00000000000..bc1033b897b --- /dev/null +++ b/arch/arm/lib/delay-loop.S @@ -0,0 +1,68 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/delay.h> + .text + +.LC0: .word loops_per_jiffy +.LC1: .word UDELAY_MULT + +/* + * r0 <= 2000 + * lpj <= 0x01ffffff (max. 3355 bogomips) + * HZ <= 1000 + */ + +ENTRY(__loop_udelay) + ldr r2, .LC1 + mul r0, r2, r0 +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 + mov r1, #-1 + ldr r2, .LC0 + ldr r2, [r2] @ max = 0x01ffffff + add r0, r0, r1, lsr #32-14 + mov r0, r0, lsr #14 @ max = 0x0001ffff + add r2, r2, r1, lsr #32-10 + mov r2, r2, lsr #10 @ max = 0x00007fff + mul r0, r2, r0 @ max = 2^32-1 + add r0, r0, r1, lsr #32-6 + movs r0, r0, lsr #6 + moveq pc, lr + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + */ + .align 3 + +@ Delay routine +ENTRY(__loop_delay) + subs r0, r0, #1 +#if 0 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 +#endif + bhi __loop_delay + mov pc, lr +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S deleted file mode 100644 index 3c7f7e675dd..00000000000 --- a/arch/arm/lib/delay.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * linux/arch/arm/lib/delay.S - * - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> - .text - -LC0: .word loops_per_jiffy - -/* - * 0 <= r0 <= 2000 - */ -ENTRY(__udelay) - mov r2, #0x6800 - orr r2, r2, #0x00db - mul r0, r2, r0 -ENTRY(__const_udelay) @ 0 <= r0 <= 0x01ffffff - ldr r2, LC0 - ldr r2, [r2] @ max = 0x0fffffff - mov r0, r0, lsr #11 @ max = 0x00003fff - mov r2, r2, lsr #11 @ max = 0x0003ffff - mul r0, r2, r0 @ max = 2^32-1 - movs r0, r0, lsr #6 - RETINSTR(moveq,pc,lr) - -/* - * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32 - * - * Oh, if only we had a cycle counter... - */ - -@ Delay routine -ENTRY(__delay) - subs r0, r0, #1 -#if 0 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 - RETINSTR(movls,pc,lr) - subs r0, r0, #1 -#endif - bhi __delay - RETINSTR(mov,pc,lr) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 00000000000..5306de35013 --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,93 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timex.h> + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +static const struct delay_timer *delay_timer; +static bool delay_calibrated; + +int read_current_timer(unsigned long *timer_val) +{ + if (!delay_timer) + return -ENXIO; + + *timer_val = delay_timer->read_current_timer(); + return 0; +} +EXPORT_SYMBOL_GPL(read_current_timer); + +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= arm_delay_ops.ticks_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init register_current_timer_delay(const struct delay_timer *timer) +{ + if (!delay_calibrated) { + pr_info("Switching to timer-based delay loop\n"); + delay_timer = timer; + lpj_fine = timer->freq / HZ; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; + + delay_calibrated = true; + } else { + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); + } +} + +unsigned long calibrate_delay_is_known(void) +{ + delay_calibrated = true; + return lpj_fine; +} diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index ec9a1cd6176..e55c4842c29 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/unwind.h> #ifdef __ARMEB__ #define xh r0 @@ -44,6 +45,7 @@ */ ENTRY(__do_div64) +UNWIND(.fnstart) @ Test for easy paths first. subs ip, r4, #1 @@ -177,7 +179,9 @@ ENTRY(__do_div64) mov yh, xh, lsr ip mov yl, xl, lsr ip rsb ip, ip, #32 - orr yl, yl, xh, lsl ip + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) mov xh, xl, lsl ip mov xh, xh, lsr ip mov pc, lr @@ -187,14 +191,21 @@ ENTRY(__do_div64) moveq yh, xh moveq xh, #0 moveq pc, lr +UNWIND(.fnend) +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: @ Division by 0: - str lr, [sp, #-4]! + str lr, [sp, #-8]! bl __div0 @ as wrong as it could be... mov yl, #0 mov yh, #0 mov xh, #0 - ldr pc, [sp], #4 + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(__do_div64) diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S index fb7b602a6f7..e6057fa851b 100644 --- a/arch/arm/lib/ecard.S +++ b/arch/arm/lib/ecard.S @@ -12,7 +12,6 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <asm/hardware.h> #define CPSR2SPSR(rt) \ mrs rt, cpsr; \ @@ -29,7 +28,7 @@ ENTRY(ecard_loader_read) CPSR2SPSR(r0) mov lr, pc mov pc, r2 - LOADREGS(fd, sp!, {r4 - r12, pc}) + ldmfd sp!, {r4 - r12, pc} @ Purpose: call an expansion card loader to reset the card @ Proto : void read_loader(int card_base, char *loader); @@ -41,5 +40,5 @@ ENTRY(ecard_loader_reset) CPSR2SPSR(r0) mov lr, pc add pc, r1, #8 - LOADREGS(fd, sp!, {r4 - r12, pc}) + ldmfd sp!, {r4 - r12, pc} diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index f055d56ea68..64f6bc1a913 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -25,14 +25,18 @@ ENTRY(_find_first_zero_bit_le) teq r1, #0 beq 3f mov r2, #0 -1: ldrb r3, [r0, r2, lsr #3] +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) eors r3, r3, #0xff @ invert bits - bne .found @ any now set - found zero bit + bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(_find_first_zero_bit_le) /* * Purpose : Find next 'zero' bit @@ -43,13 +47,16 @@ ENTRY(_find_next_zero_bit_le) beq 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine - ldrb r3, [r0, r2, lsr #3] + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) eor r3, r3, #0xff @ now looking for a 1 bit movs r3, r3, lsr ip @ shift off unused bits - bne .found + bne .L_found orr r2, r2, #7 @ if zero, then no bits here add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_le) /* * Purpose : Find a 'one' bit @@ -59,14 +66,18 @@ ENTRY(_find_first_bit_le) teq r1, #0 beq 3f mov r2, #0 -1: ldrb r3, [r0, r2, lsr #3] +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3 - bne .found @ any now set - found zero bit + bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(_find_first_bit_le) /* * Purpose : Find next 'one' bit @@ -77,12 +88,15 @@ ENTRY(_find_next_bit_le) beq 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine - ldrb r3, [r0, r2, lsr #3] + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3, lsr ip @ shift off unused bits - bne .found + bne .L_found orr r2, r2, #7 @ if zero, then no bits here add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit +ENDPROC(_find_next_bit_le) #ifdef __ARMEB__ @@ -91,14 +105,17 @@ ENTRY(_find_first_zero_bit_be) beq 3f mov r2, #0 1: eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) eors r3, r3, #0xff @ invert bits - bne .found @ any now set - found zero bit + bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) teq r1, #0 @@ -106,27 +123,33 @@ ENTRY(_find_next_zero_bit_be) ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) eor r3, r3, #0xff @ now looking for a 1 bit movs r3, r3, lsr ip @ shift off unused bits - bne .found + bne .L_found orr r2, r2, #7 @ if zero, then no bits here add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_be) ENTRY(_find_first_bit_be) teq r1, #0 beq 3f mov r2, #0 1: eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3 - bne .found @ any now set - found zero bit + bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(_find_first_bit_be) ENTRY(_find_next_bit_be) teq r1, #0 @@ -134,22 +157,25 @@ ENTRY(_find_next_bit_be) ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3, lsr ip @ shift off unused bits - bne .found + bne .L_found orr r2, r2, #7 @ if zero, then no bits here add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit +ENDPROC(_find_next_bit_be) #endif /* * One or more bits in the LSB of r3 are assumed to be set. */ -.found: +.L_found: #if __LINUX_ARM_ARCH__ >= 5 - rsb r1, r3, #0 - and r3, r3, r1 + rsb r0, r3, #0 + and r3, r3, r0 clz r3, r3 rsb r3, r3, #31 add r0, r2, r3 @@ -164,5 +190,7 @@ ENTRY(_find_next_bit_be) addeq r2, r2, #1 mov r0, r2 #endif - RETINSTR(mov,pc,lr) + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 + mov pc, lr diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index d204018070a..9b06bb41fca 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,63 +16,65 @@ * __get_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value + * r2 contains the zero-extended value * lr corrupted * - * No other registers must be altered. (see include/asm-arm/uaccess.h + * No other registers must be altered. (see <asm/uaccess.h> * for specific ASM register usage). * * Note that ADDR_LIMIT is either 0 or 0xc0000000. * Note also that it is intended that __get_user_bad is not global. */ -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> +#include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> +#include <asm/domain.h> - .global __get_user_1 -__get_user_1: -1: ldrbt r2, [r0] +ENTRY(__get_user_1) + check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr +ENDPROC(__get_user_1) - .global __get_user_2 -__get_user_2: +ENTRY(__get_user_2) + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip 2: ldrbt r2, [r0], #1 -3: ldrbt r3, [r0] +3: ldrbt rb, [r0], #0 +#else +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] +#endif #ifndef __ARMEB__ - orr r2, r2, r3, lsl #8 + orr r2, r2, rb, lsl #8 #else - orr r2, r3, r2, lsl #8 + orr r2, rb, r2, lsl #8 #endif mov r0, #0 mov pc, lr +ENDPROC(__get_user_2) - .global __get_user_4 -__get_user_4: -4: ldrt r2, [r0] - mov r0, #0 - mov pc, lr - - .global __get_user_8 -__get_user_8: -5: ldrt r2, [r0], #4 -6: ldrt r3, [r0] +ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr +ENDPROC(__get_user_4) -__get_user_bad_8: - mov r3, #0 __get_user_bad: mov r2, #0 mov r0, #-EFAULT mov pc, lr +ENDPROC(__get_user_bad) -.section __ex_table, "a" +.pushsection __ex_table, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad .long 3b, __get_user_bad .long 4b, __get_user_bad - .long 5b, __get_user_bad_8 - .long 6b, __get_user_bad_8 -.previous +.popsection diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S index 3aacd01d40e..69719bad674 100644 --- a/arch/arm/lib/io-acorn.S +++ b/arch/arm/lib/io-acorn.S @@ -11,14 +11,14 @@ * */ #include <linux/linkage.h> +#include <linux/kern_levels.h> #include <asm/assembler.h> -#include <asm/hardware.h> .text .align -.iosl_warning: - .ascii "<4>insl/outsl not implemented, called from %08lX\0" +.Liosl_warning: + .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0" .align /* @@ -27,6 +27,6 @@ */ ENTRY(insl) ENTRY(outsl) - adr r0, .iosl_warning + adr r0, .Liosl_warning mov r1, lr b printk diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 081ef749298..9f4238987fe 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -10,7 +10,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> -.insb_align: rsb ip, ip, #4 +.Linsb_align: rsb ip, ip, #4 cmp ip, r2 movgt ip, r2 cmp ip, #2 @@ -21,20 +21,20 @@ ldrgtb r3, [r0] strgtb r3, [r1], #1 subs r2, r2, ip - bne .insb_aligned + bne .Linsb_aligned ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? moveq pc, lr ands ip, r1, #3 - bne .insb_align + bne .Linsb_align -.insb_aligned: stmfd sp!, {r4 - r6, lr} +.Linsb_aligned: stmfd sp!, {r4 - r6, lr} subs r2, r2, #16 - bmi .insb_no_16 + bmi .Linsb_no_16 -.insb_16_lp: ldrb r3, [r0] +.Linsb_16_lp: ldrb r3, [r0] ldrb r4, [r0] ldrb r5, [r0] mov r3, r3, put_byte_0 @@ -69,13 +69,13 @@ ENTRY(__raw_readsb) stmia r1!, {r3 - r6} subs r2, r2, #16 - bpl .insb_16_lp + bpl .Linsb_16_lp tst r2, #15 - LOADREGS(eqfd, sp!, {r4 - r6, pc}) + ldmeqfd sp!, {r4 - r6, pc} -.insb_no_16: tst r2, #8 - beq .insb_no_8 +.Linsb_no_16: tst r2, #8 + beq .Linsb_no_8 ldrb r3, [r0] ldrb r4, [r0] @@ -95,8 +95,8 @@ ENTRY(__raw_readsb) orr r4, r4, ip, put_byte_3 stmia r1!, {r3, r4} -.insb_no_8: tst r2, #4 - beq .insb_no_4 +.Linsb_no_8: tst r2, #4 + beq .Linsb_no_4 ldrb r3, [r0] ldrb r4, [r0] @@ -108,8 +108,8 @@ ENTRY(__raw_readsb) orr r3, r3, r6, put_byte_3 str r3, [r1], #4 -.insb_no_4: ands r2, r2, #3 - LOADREGS(eqfd, sp!, {r4 - r6, pc}) +.Linsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4 - r6, pc} cmp r2, #2 ldrb r3, [r0] @@ -119,4 +119,5 @@ ENTRY(__raw_readsb) ldrgtb r3, [r0] strgtb r3, [r1] - LOADREGS(fd, sp!, {r4 - r6, pc}) + ldmfd sp!, {r4 - r6, pc} +ENDPROC(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 75a9121cb23..7a7430950c7 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl) strb ip, [r1], #1 4: subs r2, r2, #1 - mov ip, r3, pull #24 + mov ip, r3, lspull #24 ldrne r3, [r0] - orrne ip, ip, r3, push #8 + orrne ip, ip, r3, lspush #8 strne ip, [r1], #4 bne 4b b 8f 5: subs r2, r2, #1 - mov ip, r3, pull #16 + mov ip, r3, lspull #16 ldrne r3, [r0] - orrne ip, ip, r3, push #16 + orrne ip, ip, r3, lspush #16 strne ip, [r1], #4 bne 5b b 7f 6: subs r2, r2, #1 - mov ip, r3, pull #8 + mov ip, r3, lspull #8 ldrne r3, [r0] - orrne ip, ip, r3, push #24 + orrne ip, ip, r3, lspush #24 strne ip, [r1], #4 bne 6b @@ -76,3 +76,4 @@ ENTRY(__raw_readsl) 8: mov r3, ip, get_byte_0 strb r3, [r1, #0] mov pc, lr +ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 476cf7f8a63..88487c8c4f2 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -9,18 +9,17 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <asm/hardware.h> -.insw_bad_alignment: - adr r0, .insw_bad_align_msg +.Linsw_bad_alignment: + adr r0, .Linsw_bad_align_msg mov r2, lr b panic -.insw_bad_align_msg: +.Linsw_bad_align_msg: .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" .align -.insw_align: tst r1, #1 - bne .insw_bad_alignment +.Linsw_align: tst r1, #1 + bne .Linsw_bad_alignment ldr r3, [r0] strb r3, [r1], #1 @@ -28,22 +27,22 @@ strb r3, [r1], #1 subs r2, r2, #1 - RETINSTR(moveq, pc, lr) + moveq pc, lr ENTRY(__raw_readsw) teq r2, #0 @ do we have to check for the zero len? moveq pc, lr tst r1, #3 - bne .insw_align + bne .Linsw_align -.insw_aligned: mov ip, #0xff +.Linsw_aligned: mov ip, #0xff orr ip, ip, ip, lsl #8 stmfd sp!, {r4, r5, r6, lr} subs r2, r2, #8 - bmi .no_insw_8 + bmi .Lno_insw_8 -.insw_8_lp: ldr r3, [r0] +.Linsw_8_lp: ldr r3, [r0] and r3, r3, ip ldr r4, [r0] orr r3, r3, r4, lsl #16 @@ -66,13 +65,13 @@ ENTRY(__raw_readsw) stmia r1!, {r3 - r6} subs r2, r2, #8 - bpl .insw_8_lp + bpl .Linsw_8_lp tst r2, #7 - LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + ldmeqfd sp!, {r4, r5, r6, pc} -.no_insw_8: tst r2, #4 - beq .no_insw_4 +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 ldr r3, [r0] and r3, r3, ip @@ -86,8 +85,8 @@ ENTRY(__raw_readsw) stmia r1!, {r3, r4} -.no_insw_4: tst r2, #2 - beq .no_insw_2 +.Lno_insw_4: tst r2, #2 + beq .Lno_insw_2 ldr r3, [r0] and r3, r3, ip @@ -96,12 +95,12 @@ ENTRY(__raw_readsw) str r3, [r1], #4 -.no_insw_2: tst r2, #1 +.Lno_insw_2: tst r2, #1 ldrne r3, [r0] strneb r3, [r1], #1 movne r3, r3, lsr #8 strneb r3, [r1] - LOADREGS(fd, sp!, {r4, r5, r6, pc}) + ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index c92b66ecbe8..1f393d42593 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -18,8 +18,8 @@ #endif .endm -.insw_align: movs ip, r1, lsl #31 - bne .insw_noalign +.Linsw_align: movs ip, r1, lsl #31 + bne .Linsw_noalign ldrh ip, [r0] sub r2, r2, #1 strh ip, [r1], #2 @@ -28,14 +28,14 @@ ENTRY(__raw_readsw) teq r2, #0 moveq pc, lr tst r1, #3 - bne .insw_align + bne .Linsw_align stmfd sp!, {r4, r5, lr} subs r2, r2, #8 - bmi .no_insw_8 + bmi .Lno_insw_8 -.insw_8_lp: ldrh r3, [r0] +.Linsw_8_lp: ldrh r3, [r0] ldrh r4, [r0] pack r3, r3, r4 @@ -53,10 +53,10 @@ ENTRY(__raw_readsw) subs r2, r2, #8 stmia r1!, {r3 - r5, ip} - bpl .insw_8_lp + bpl .Linsw_8_lp -.no_insw_8: tst r2, #4 - beq .no_insw_4 +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 ldrh r3, [r0] ldrh r4, [r0] @@ -68,15 +68,15 @@ ENTRY(__raw_readsw) stmia r1!, {r3, r4} -.no_insw_4: movs r2, r2, lsl #31 - bcc .no_insw_2 +.Lno_insw_4: movs r2, r2, lsl #31 + bcc .Lno_insw_2 ldrh r3, [r0] ldrh ip, [r0] pack r3, r3, ip str r3, [r1], #4 -.no_insw_2: ldrneh r3, [r0] +.Lno_insw_2: ldrneh r3, [r0] strneh r3, [r1] ldmfd sp!, {r4, r5, pc} @@ -93,7 +93,7 @@ ENTRY(__raw_readsw) #define pull_hbyte1 lsr #8 #endif -.insw_noalign: stmfd sp!, {r4, lr} +.Linsw_noalign: stmfd sp!, {r4, lr} ldrccb ip, [r1, #-1]! bcc 1f @@ -128,3 +128,4 @@ ENTRY(__raw_readsw) _BE_ONLY_( movne ip, ip, lsr #24 ) strneb ip, [r1] ldmfd sp!, {r4, pc} +ENDPROC(__raw_readsw) diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c deleted file mode 100644 index 824253948f5..00000000000 --- a/arch/arm/lib/io-shark.c +++ /dev/null @@ -1,13 +0,0 @@ -/* - * linux/arch/arm/lib/io-shark.c - * - * by Alexander Schulz - * - * derived from: - * linux/arch/arm/lib/io-ebsa.S - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 70b2561bdb0..68b92f4acae 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -30,7 +30,7 @@ #endif .endm -.outsb_align: rsb ip, ip, #4 +.Loutsb_align: rsb ip, ip, #4 cmp ip, r2 movgt ip, r2 cmp ip, #2 @@ -41,45 +41,46 @@ ldrgtb r3, [r1], #1 strgtb r3, [r0] subs r2, r2, ip - bne .outsb_aligned + bne .Loutsb_aligned ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? moveq pc, lr ands ip, r1, #3 - bne .outsb_align + bne .Loutsb_align -.outsb_aligned: stmfd sp!, {r4, r5, lr} +.Loutsb_aligned: + stmfd sp!, {r4, r5, lr} subs r2, r2, #16 - bmi .outsb_no_16 + bmi .Loutsb_no_16 -.outsb_16_lp: ldmia r1!, {r3, r4, r5, ip} +.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip} outword r3 outword r4 outword r5 outword ip subs r2, r2, #16 - bpl .outsb_16_lp + bpl .Loutsb_16_lp tst r2, #15 - LOADREGS(eqfd, sp!, {r4, r5, pc}) + ldmeqfd sp!, {r4, r5, pc} -.outsb_no_16: tst r2, #8 - beq .outsb_no_8 +.Loutsb_no_16: tst r2, #8 + beq .Loutsb_no_8 ldmia r1!, {r3, r4} outword r3 outword r4 -.outsb_no_8: tst r2, #4 - beq .outsb_no_4 +.Loutsb_no_8: tst r2, #4 + beq .Loutsb_no_4 ldr r3, [r1], #4 outword r3 -.outsb_no_4: ands r2, r2, #3 - LOADREGS(eqfd, sp!, {r4, r5, pc}) +.Loutsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4, r5, pc} cmp r2, #2 ldrb r3, [r1], #1 @@ -89,4 +90,5 @@ ENTRY(__raw_writesb) ldrgtb r3, [r1] strgtb r3, [r0] - LOADREGS(fd, sp!, {r4, r5, pc}) + ldmfd sp!, {r4, r5, pc} +ENDPROC(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index f8f14dd227c..d0d104a0dd1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,27 @@ ENTRY(__raw_writesl) blt 5f bgt 6f -4: mov ip, r3, pull #16 +4: mov ip, r3, lspull #16 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #16 + orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b mov pc, lr -5: mov ip, r3, pull #8 +5: mov ip, r3, lspull #8 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #24 + orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b mov pc, lr -6: mov ip, r3, pull #24 +6: mov ip, r3, lspull #24 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #8 + orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b mov pc, lr +ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index 950e7e310f1..49b800419e3 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -9,18 +9,17 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <asm/hardware.h> -.outsw_bad_alignment: - adr r0, .outsw_bad_align_msg +.Loutsw_bad_alignment: + adr r0, .Loutsw_bad_align_msg mov r2, lr b panic -.outsw_bad_align_msg: +.Loutsw_bad_align_msg: .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" .align -.outsw_align: tst r1, #1 - bne .outsw_bad_alignment +.Loutsw_align: tst r1, #1 + bne .Loutsw_bad_alignment add r1, r1, #2 @@ -29,20 +28,20 @@ orr r3, r3, r3, lsl #16 str r3, [r0] subs r2, r2, #1 - RETINSTR(moveq, pc, lr) + moveq pc, lr ENTRY(__raw_writesw) teq r2, #0 @ do we have to check for the zero len? moveq pc, lr tst r1, #3 - bne .outsw_align + bne .Loutsw_align -.outsw_aligned: stmfd sp!, {r4, r5, r6, lr} + stmfd sp!, {r4, r5, r6, lr} subs r2, r2, #8 - bmi .no_outsw_8 + bmi .Lno_outsw_8 -.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6} +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} mov ip, r3, lsl #16 orr ip, ip, ip, lsr #16 @@ -77,13 +76,13 @@ ENTRY(__raw_writesw) str ip, [r0] subs r2, r2, #8 - bpl .outsw_8_lp + bpl .Loutsw_8_lp tst r2, #7 - LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + ldmeqfd sp!, {r4, r5, r6, pc} -.no_outsw_8: tst r2, #4 - beq .no_outsw_4 +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 ldmia r1!, {r3, r4} @@ -103,8 +102,8 @@ ENTRY(__raw_writesw) orr ip, ip, ip, lsl #16 str ip, [r0] -.no_outsw_4: tst r2, #2 - beq .no_outsw_2 +.Lno_outsw_4: tst r2, #2 + beq .Lno_outsw_2 ldr r3, [r1], #4 @@ -116,7 +115,7 @@ ENTRY(__raw_writesw) orr ip, ip, ip, lsl #16 str ip, [r0] -.no_outsw_2: tst r2, #1 +.Lno_outsw_2: tst r2, #1 ldrne r3, [r1] @@ -124,4 +123,4 @@ ENTRY(__raw_writesw) orrne ip, ip, ip, lsr #16 strne ip, [r0] - LOADREGS(fd, sp!, {r4, r5, r6, pc}) + ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index 5e240e452af..ff4f71b579e 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -22,8 +22,8 @@ #endif .endm -.outsw_align: movs ip, r1, lsl #31 - bne .outsw_noalign +.Loutsw_align: movs ip, r1, lsl #31 + bne .Loutsw_noalign ldrh r3, [r1], #2 sub r2, r2, #1 @@ -33,35 +33,35 @@ ENTRY(__raw_writesw) teq r2, #0 moveq pc, lr ands r3, r1, #3 - bne .outsw_align + bne .Loutsw_align stmfd sp!, {r4, r5, lr} subs r2, r2, #8 - bmi .no_outsw_8 + bmi .Lno_outsw_8 -.outsw_8_lp: ldmia r1!, {r3, r4, r5, ip} +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip} subs r2, r2, #8 outword r3 outword r4 outword r5 outword ip - bpl .outsw_8_lp + bpl .Loutsw_8_lp -.no_outsw_8: tst r2, #4 - beq .no_outsw_4 +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 ldmia r1!, {r3, ip} outword r3 outword ip -.no_outsw_4: movs r2, r2, lsl #31 - bcc .no_outsw_2 +.Lno_outsw_4: movs r2, r2, lsl #31 + bcc .Lno_outsw_2 ldr r3, [r1], #4 outword r3 -.no_outsw_2: ldrneh r3, [r1] +.Lno_outsw_2: ldrneh r3, [r1] strneh r3, [r0] ldmfd sp!, {r4, r5, pc} @@ -74,7 +74,11 @@ ENTRY(__raw_writesw) #define push_hbyte1 lsl #8 #endif -.outsw_noalign: ldr r3, [r1, -r3]! +.Loutsw_noalign: + ARM( ldr r3, [r1, -r3]! ) + THUMB( rsb r3, r3, #0 ) + THUMB( ldr r3, [r1, r3] ) + THUMB( sub r1, r3 ) subcs r2, r2, #1 bcs 2f subs r2, r2, #2 @@ -93,3 +97,4 @@ ENTRY(__raw_writesw) 3: movne ip, r3, lsr #8 strneh ip, [r0] mov pc, lr +ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 59026029d01..c562f649734 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -1,7 +1,7 @@ /* * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines * - * Author: Nicolas Pitre <nico@cam.org> + * Author: Nicolas Pitre <nico@fluxnic.net> * - contributed to gcc-3.4 on Sep 30, 2003 * - adapted for the Linux kernel on Oct 2, 2003 */ @@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA. */ #include <linux/linkage.h> #include <asm/assembler.h> - +#include <asm/unwind.h> .macro ARM_DIV_BODY dividend, divisor, result, curbit @@ -206,6 +206,8 @@ Boston, MA 02111-1307, USA. */ ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) subs r2, r1, #1 moveq pc, lr @@ -229,8 +231,12 @@ ENTRY(__udivsi3) mov r0, r0, lsr r2 mov pc, lr +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) ENTRY(__umodsi3) +UNWIND(.fnstart) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 @@ -244,8 +250,12 @@ ENTRY(__umodsi3) mov pc, lr +UNWIND(.fnend) +ENDPROC(__umodsi3) ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. @@ -282,8 +292,12 @@ ENTRY(__divsi3) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) ENTRY(__modsi3) +UNWIND(.fnstart) cmp r1, #0 beq Ldiv0 @@ -303,12 +317,47 @@ ENTRY(__modsi3) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) +ENDPROC(__modsi3) -Ldiv0: +#ifdef CONFIG_AEABI - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index 46c2ed19ec9..f83d449141f 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -37,12 +37,17 @@ Boston, MA 02110-1301, USA. */ #endif ENTRY(__lshrdi3) +ENTRY(__aeabi_llsr) subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 mov pc, lr +ENDPROC(__lshrdi3) +ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index ac34fe55d21..1da86991d70 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -22,4 +22,5 @@ ENTRY(memchr) bne 1b sub r0, r0, #1 2: movne r0, #0 - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7e71d6708a8..a9b9e2287a0 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,8 +13,11 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 + .macro ldr1w ptr reg abort - ldr \reg, [\ptr], #4 + W(ldr) \reg, [\ptr], #4 .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -30,7 +33,7 @@ .endm .macro str1w ptr reg abort - str \reg, [\ptr], #4 + W(str) \reg, [\ptr], #4 .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -57,3 +60,4 @@ ENTRY(memcpy) #include "copy_template.S" +ENDPROC(memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index ef7fddc14ac..d1fc0c0c342 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -13,14 +13,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - .text /* @@ -55,11 +47,12 @@ ENTRY(memmove) stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) CALGN( add pc, r4, ip ) PLD( pld [r1, #-4] ) @@ -81,25 +74,25 @@ ENTRY(memmove) rsb ip, ip, #32 addne pc, pc, ip @ C is always clear here b 7f -6: nop - ldr r3, [r1, #-4]! - ldr r4, [r1, #-4]! - ldr r5, [r1, #-4]! - ldr r6, [r1, #-4]! - ldr r7, [r1, #-4]! - ldr r8, [r1, #-4]! - ldr lr, [r1, #-4]! +6: W(nop) + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r7, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) lr, [r1, #-4]! add pc, pc, ip nop - nop - str r3, [r0, #-4]! - str r4, [r0, #-4]! - str r5, [r0, #-4]! - str r6, [r0, #-4]! - str r7, [r0, #-4]! - str r8, [r0, #-4]! - str lr, [r0, #-4]! + W(nop) + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r7, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) lr, [r0, #-4]! CALGN( bcs 2b ) @@ -138,8 +131,7 @@ ENTRY(memmove) subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) - CALGN( rsb ip, ip, #32 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) CALGN( bcc 15f ) @@ -155,24 +147,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, lspush #\push subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -183,10 +175,10 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, lspush #\push ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, lspull #\pull str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) @@ -204,3 +196,4 @@ ENTRY(memmove) 18: backward_copy_shift push=24 pull=8 +ENDPROC(memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index a1795f59993..94b0650ea98 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -14,67 +14,110 @@ .text .align 5 - .word 0 - -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memzero again. - */ ENTRY(memset) ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 +1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 cmp r2, #16 blt 4f + +#if ! CALGN(1)+0 + /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - LOADREGS(eqfd, sp!, {pc}) @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r8, lr} + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov r8, r1 + mov lr, r1 + + cmp r2, #96 + tstgt ip, #31 + ble 3f + + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 + +3: subs r2, r2, #64 + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} + bgt 3b + ldmeqfd sp!, {r4-r8, pc} + + tst r2, #32 + stmneia ip!, {r1, r3-r8, lr} + tst r2, #16 + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} + +#endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 - RETINSTR(mov,pc,lr) + strneb r1, [ip], #1 + mov pc, lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b +ENDPROC(memset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 51ccc60160f..3fbdef5f802 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -39,6 +39,9 @@ ENTRY(__memzero) */ cmp r1, #16 @ 1 we can skip this chunk if we blt 4f @ 1 have < 16 bytes + +#if ! CALGN(1)+0 + /* * We need an extra register for this loop - save the return address and * use the LR @@ -53,7 +56,7 @@ ENTRY(__memzero) stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 bgt 3b @ 1 - LOADREGS(eqfd, sp!, {pc}) @ 1/2 quick exit + ldmeqfd sp!, {pc} @ 1/2 quick exit /* * No need to correct the count; we're only testing bits from now on */ @@ -64,6 +67,47 @@ ENTRY(__memzero) stmneia r0!, {r2, r3, ip, lr} @ 4 ldr lr, [sp], #4 @ 1 +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r2 + mov r5, r2 + mov r6, r2 + mov r7, r2 + mov ip, r2 + mov lr, r2 + + cmp r1, #96 + andgts ip, r0, #31 + ble 3f + + rsb ip, ip, #32 + sub r1, r1, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + movs ip, ip, lsl #2 + strcs r2, [r0], #4 + +3: subs r1, r1, #64 + stmgeia r0!, {r2-r7, ip, lr} + stmgeia r0!, {r2-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r1, #32 + stmneia r0!, {r2-r7, ip, lr} + tst r1, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + 4: tst r1, #8 @ 1 8 bytes or more? stmneia r0!, {r2, r3} @ 2 tst r1, #4 @ 1 4 bytes or more? @@ -77,4 +121,5 @@ ENTRY(__memzero) strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over strneb r2, [r0], #1 @ 1 - RETINSTR(mov,pc,lr) @ 1 + mov pc, lr @ 1 +ENDPROC(__memzero) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S index c7fbdf00531..36c91b4957e 100644 --- a/arch/arm/lib/muldi3.S +++ b/arch/arm/lib/muldi3.S @@ -25,11 +25,12 @@ #endif ENTRY(__muldi3) +ENTRY(__aeabi_lmul) mul xh, yl, xh mla xh, xl, yh, xh - mov ip, xl, asr #16 - mov yh, yl, asr #16 + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 bic xl, xl, ip, lsl #16 bic yl, yl, yh, lsl #16 mla xh, yh, ip, xh @@ -42,3 +43,5 @@ ENTRY(__muldi3) adc xh, xh, ip, lsr #16 mov pc, lr +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 4593e9c07f0..3d73dcb959b 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,61 +16,83 @@ * __put_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * r2, r3 contains the value * Outputs: r0 is the error code * lr corrupted * - * No other registers must be altered. (see include/asm-arm/uaccess.h + * No other registers must be altered. (see <asm/uaccess.h> * for specific ASM register usage). * * Note that ADDR_LIMIT is either 0 or 0xc0000000 * Note also that it is intended that __put_user_bad is not global. */ -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> +#include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> +#include <asm/domain.h> - .global __put_user_1 -__put_user_1: -1: strbt r2, [r0] +ENTRY(__put_user_1) + check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr +ENDPROC(__put_user_1) - .global __put_user_2 -__put_user_2: +ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad mov ip, r2, lsr #8 +#ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: strbt r2, [r0], #1 -3: strbt ip, [r0] +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] #else -2: strbt ip, [r0], #1 -3: strbt r2, [r0] +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] #endif +#else /* !CONFIG_THUMB2_KERNEL */ +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] +#else +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] +#endif +#endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 mov pc, lr +ENDPROC(__put_user_2) - .global __put_user_4 -__put_user_4: -4: strt r2, [r0] +ENTRY(__put_user_4) + check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr +ENDPROC(__put_user_4) - .global __put_user_8 -__put_user_8: -5: strt r2, [r0], #4 -6: strt r3, [r0] +ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] +#else +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] +#endif mov r0, #0 mov pc, lr +ENDPROC(__put_user_8) __put_user_bad: mov r0, #-EFAULT mov pc, lr +ENDPROC(__put_user_bad) -.section __ex_table, "a" +.pushsection __ex_table, "a" .long 1b, __put_user_bad .long 2b, __put_user_bad .long 3b, __put_user_bad .long 4b, __put_user_bad .long 5b, __put_user_bad .long 6b, __put_user_bad -.previous +.popsection diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S index 83bc23d5b03..618fedae4b3 100644 --- a/arch/arm/lib/setbit.S +++ b/arch/arm/lib/setbit.S @@ -12,11 +12,4 @@ #include "bitops.h" .text -/* - * Purpose : Function to set a bit - * Prototype: int set_bit(int bit, void *addr) - */ -ENTRY(_set_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_set_bit_le) - bitop orr +bitop _set_bit, orr diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S deleted file mode 100644 index ff6ece487ff..00000000000 --- a/arch/arm/lib/sha1.S +++ /dev/null @@ -1,206 +0,0 @@ -/* - * linux/arch/arm/lib/sha1.S - * - * SHA transform optimized for ARM - * - * Copyright: (C) 2005 by Nicolas Pitre <nico@cam.org> - * Created: September 17, 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * The reference implementation for this code is linux/lib/sha1.c - */ - -#include <linux/linkage.h> - - .text - - -/* - * void sha_transform(__u32 *digest, const char *in, __u32 *W) - * - * Note: the "in" ptr may be unaligned. - */ - -ENTRY(sha_transform) - - stmfd sp!, {r4 - r8, lr} - - @ for (i = 0; i < 16; i++) - @ W[i] = be32_to_cpu(in[i]); */ - -#ifdef __ARMEB__ - mov r4, r0 - mov r0, r2 - mov r2, #64 - bl memcpy - mov r2, r0 - mov r0, r4 -#else - mov r3, r2 - mov lr, #16 -1: ldrb r4, [r1], #1 - ldrb r5, [r1], #1 - ldrb r6, [r1], #1 - ldrb r7, [r1], #1 - subs lr, lr, #1 - orr r5, r5, r4, lsl #8 - orr r6, r6, r5, lsl #8 - orr r7, r7, r6, lsl #8 - str r7, [r3], #4 - bne 1b -#endif - - @ for (i = 0; i < 64; i++) - @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); - - sub r3, r2, #4 - mov lr, #64 -2: ldr r4, [r3, #4]! - subs lr, lr, #1 - ldr r5, [r3, #8] - ldr r6, [r3, #32] - ldr r7, [r3, #52] - eor r4, r4, r5 - eor r4, r4, r6 - eor r4, r4, r7 - mov r4, r4, ror #31 - str r4, [r3, #64] - bne 2b - - /* - * The SHA functions are: - * - * f1(B,C,D) = (D ^ (B & (C ^ D))) - * f2(B,C,D) = (B ^ C ^ D) - * f3(B,C,D) = ((B & C) | (D & (B | C))) - * - * Then the sub-blocks are processed as follows: - * - * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ - * B' = A - * C' = ror(B, 2) - * D' = C - * E' = D - * - * We therefore unroll each loop 5 times to avoid register shuffling. - * Also the ror for C (and also D and E which are successivelyderived - * from it) is applied in place to cut on an additional mov insn for - * each round. - */ - - .macro sha_f1, A, B, C, D, E - ldr r3, [r2], #4 - eor ip, \C, \D - add \E, r1, \E, ror #2 - and ip, \B, ip, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f2, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - eor ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f3, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - orr ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - and ip, ip, \D, ror #2 - add \E, \E, r3 - and r3, \B, \C, ror #2 - orr ip, ip, r3 - add \E, \E, ip - .endm - - ldmia r0, {r4 - r8} - - mov lr, #4 - ldr r1, .L_sha_K + 0 - - /* adjust initial values */ - mov r6, r6, ror #30 - mov r7, r7, ror #30 - mov r8, r8, ror #30 - -3: subs lr, lr, #1 - sha_f1 r4, r5, r6, r7, r8 - sha_f1 r8, r4, r5, r6, r7 - sha_f1 r7, r8, r4, r5, r6 - sha_f1 r6, r7, r8, r4, r5 - sha_f1 r5, r6, r7, r8, r4 - bne 3b - - ldr r1, .L_sha_K + 4 - mov lr, #4 - -4: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 4b - - ldr r1, .L_sha_K + 8 - mov lr, #4 - -5: subs lr, lr, #1 - sha_f3 r4, r5, r6, r7, r8 - sha_f3 r8, r4, r5, r6, r7 - sha_f3 r7, r8, r4, r5, r6 - sha_f3 r6, r7, r8, r4, r5 - sha_f3 r5, r6, r7, r8, r4 - bne 5b - - ldr r1, .L_sha_K + 12 - mov lr, #4 - -6: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 6b - - ldmia r0, {r1, r2, r3, ip, lr} - add r4, r1, r4 - add r5, r2, r5 - add r6, r3, r6, ror #2 - add r7, ip, r7, ror #2 - add r8, lr, r8, ror #2 - stmia r0, {r4 - r8} - - ldmfd sp!, {r4 - r8, pc} - -.L_sha_K: - .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 - - -/* - * void sha_init(__u32 *buf) - */ - -.L_sha_initial_digest: - .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 - -ENTRY(sha_init) - - str lr, [sp, #-4]! - adr r1, .L_sha_initial_digest - ldmia r1, {r1, r2, r3, ip, lr} - stmia r0, {r1, r2, r3, ip, lr} - ldr pc, [sp], #4 - diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index 5b9b493733f..d8f2a1c1aea 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -23,4 +23,5 @@ ENTRY(strchr) teq r2, r1 movne r0, #0 subeq r0, r0, #1 - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(strchr) diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S deleted file mode 100644 index 629cc877527..00000000000 --- a/arch/arm/lib/strncpy_from_user.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * r0 = dst, r1 = src, r2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__arch_strncpy_from_user) - save_lr - mov ip, r1 -1: subs r2, r2, #1 -USER( ldrplbt r3, [r1], #1) - bmi 2f - strb r3, [r0], #1 - teq r3, #0 - bne 1b - sub r1, r1, #1 @ take NUL character out of count -2: sub r0, r1, ip - restore_pc - - .section .fixup,"ax" - .align 0 -9001: mov r3, #0 - strb r3, [r0, #0] @ null terminate - mov r0, #-EFAULT - restore_pc - .previous - diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S deleted file mode 100644 index 67bcd826812..00000000000 --- a/arch/arm/lib/strnlen_user.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * linux/arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* Prototype: unsigned long __arch_strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n + 1 if too long - */ -ENTRY(__arch_strnlen_user) - save_lr - mov r2, r0 -1: -USER( ldrbt r3, [r0], #1) - teq r3, #0 - beq 2f - subs r1, r1, #1 - bne 1b - add r0, r0, #1 -2: sub r0, r0, r2 - restore_pc - - .section .fixup,"ax" - .align 0 -9001: mov r0, #0 - restore_pc - .previous diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index fa923f026f1..302f20cd242 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -22,4 +22,5 @@ ENTRY(strrchr) teq r2, #0 bne 1b mov r0, r3 - RETINSTR(mov,pc,lr) + mov pc, lr +ENDPROC(strrchr) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S index b25dcd2be53..4becdc3a59c 100644 --- a/arch/arm/lib/testchangebit.S +++ b/arch/arm/lib/testchangebit.S @@ -12,7 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_change_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_change_bit_le) - testop eor, strb +testop _test_and_change_bit, eor, str diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 2dcc4b16b68..918841dcce7 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -12,7 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_clear_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_clear_bit_le) - testop bicne, strneb +testop _test_and_clear_bit, bicne, strne diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 9011c969761..8d1b2fe9e48 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -12,7 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_set_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_set_bit_le) - testop orreq, streqb +testop _test_and_set_bit, orreq, streq diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index 6f1b5b49fe4..e50520904b7 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -14,12 +14,13 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/errno.h> +#include <asm/domain.h> .text #define PAGE_SHIFT 12 -/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n) +/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) * Purpose : copy a block to user memory from kernel memory * Params : to - user memory * : from - kernel memory @@ -27,42 +28,42 @@ * Returns : Number of bytes NOT copied. */ -.c2u_dest_not_aligned: +.Lc2u_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( strbt r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( strgtbt r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip - b .c2u_dest_aligned + b .Lc2u_dest_aligned -ENTRY(__arch_copy_to_user) +ENTRY(__copy_to_user) stmfd sp!, {r2, r4 - r7, lr} cmp r2, #4 - blt .c2u_not_enough + blt .Lc2u_not_enough ands ip, r0, #3 - bne .c2u_dest_not_aligned -.c2u_dest_aligned: + bne .Lc2u_dest_not_aligned +.Lc2u_dest_aligned: ands ip, r1, #3 - bne .c2u_src_not_aligned + bne .Lc2u_src_not_aligned /* * Seeing as there has to be at least 8 bytes to copy, we can * copy one word, and force a user-mode page fault... */ -.c2u_0fupi: subs r2, r2, #4 +.Lc2u_0fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .c2u_0nowords + bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( strt r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .c2u_0fupi + beq .Lc2u_0fupi /* * ip = max no. of bytes to copy before needing another "strt" insn */ @@ -70,16 +71,16 @@ USER( strt r3, [r0], #4) @ May fault movlt ip, r2 sub r2, r2, ip subs ip, ip, #32 - blt .c2u_0rem8lp + blt .Lc2u_0rem8lp -.c2u_0cpy8lp: ldmia r1!, {r3 - r6} +.Lc2u_0cpy8lp: ldmia r1!, {r3 - r6} stmia r0!, {r3 - r6} @ Shouldnt fault ldmia r1!, {r3 - r6} subs ip, ip, #32 stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .c2u_0cpy8lp + bpl .Lc2u_0cpy8lp -.c2u_0rem8lp: cmn ip, #16 +.Lc2u_0rem8lp: cmn ip, #16 ldmgeia r1!, {r3 - r6} stmgeia r0!, {r3 - r6} @ Shouldnt fault tst ip, #8 @@ -87,244 +88,246 @@ USER( strt r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - strnet r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 - beq .c2u_0fupi -.c2u_0nowords: teq ip, #0 - beq .c2u_finished -.c2u_nowords: cmp ip, #2 + beq .Lc2u_0fupi +.Lc2u_0nowords: teq ip, #0 + beq .Lc2u_finished +.Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( strbt r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( strgtbt r3, [r0], #1) @ May fault - b .c2u_finished +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished -.c2u_not_enough: +.Lc2u_not_enough: movs ip, r2 - bne .c2u_nowords -.c2u_finished: mov r0, #0 - LOADREGS(fd,sp!,{r2, r4 - r7, pc}) + bne .Lc2u_nowords +.Lc2u_finished: mov r0, #0 + ldmfd sp!, {r2, r4 - r7, pc} -.c2u_src_not_aligned: +.Lc2u_src_not_aligned: bic r1, r1, #3 ldr r7, [r1], #4 cmp ip, #2 - bgt .c2u_3fupi - beq .c2u_2fupi -.c2u_1fupi: subs r2, r2, #4 + bgt .Lc2u_3fupi + beq .Lc2u_2fupi +.Lc2u_1fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .c2u_1nowords - mov r3, r7, pull #8 + bmi .Lc2u_1nowords + mov r3, r7, lspull #8 ldr r7, [r1], #4 - orr r3, r3, r7, push #24 -USER( strt r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #24 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .c2u_1fupi + beq .Lc2u_1fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .c2u_1rem8lp + blt .Lc2u_1rem8lp -.c2u_1cpy8lp: mov r3, r7, pull #8 +.Lc2u_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .c2u_1cpy8lp + bpl .Lc2u_1cpy8lp -.c2u_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 +.Lc2u_1rem8lp: tst ip, #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #24 - strnet r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #24 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 - beq .c2u_1fupi -.c2u_1nowords: mov r3, r7, get_byte_1 + beq .Lc2u_1fupi +.Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 - beq .c2u_finished + beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( strgebt r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( strgtbt r3, [r0], #1) @ May fault - b .c2u_finished +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished -.c2u_2fupi: subs r2, r2, #4 +.Lc2u_2fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .c2u_2nowords - mov r3, r7, pull #16 + bmi .Lc2u_2nowords + mov r3, r7, lspull #16 ldr r7, [r1], #4 - orr r3, r3, r7, push #16 -USER( strt r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #16 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .c2u_2fupi + beq .Lc2u_2fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .c2u_2rem8lp + blt .Lc2u_2rem8lp -.c2u_2cpy8lp: mov r3, r7, pull #16 +.Lc2u_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .c2u_2cpy8lp + bpl .Lc2u_2cpy8lp -.c2u_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 +.Lc2u_2rem8lp: tst ip, #8 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #16 - strnet r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #16 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 - beq .c2u_2fupi -.c2u_2nowords: mov r3, r7, get_byte_2 + beq .Lc2u_2fupi +.Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 - beq .c2u_finished + beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( strgebt r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( strgtbt r3, [r0], #1) @ May fault - b .c2u_finished +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished -.c2u_3fupi: subs r2, r2, #4 +.Lc2u_3fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .c2u_3nowords - mov r3, r7, pull #24 + bmi .Lc2u_3nowords + mov r3, r7, lspull #24 ldr r7, [r1], #4 - orr r3, r3, r7, push #8 -USER( strt r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #8 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .c2u_3fupi + beq .Lc2u_3fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .c2u_3rem8lp + blt .Lc2u_3rem8lp -.c2u_3cpy8lp: mov r3, r7, pull #24 +.Lc2u_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .c2u_3cpy8lp + bpl .Lc2u_3cpy8lp -.c2u_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 +.Lc2u_3rem8lp: tst ip, #8 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #8 - strnet r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #8 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 - beq .c2u_3fupi -.c2u_3nowords: mov r3, r7, get_byte_3 + beq .Lc2u_3fupi +.Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 - beq .c2u_finished + beq .Lc2u_finished cmp ip, #2 -USER( strbt r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( strgebt r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( strgtbt r3, [r0], #1) @ May fault - b .c2u_finished +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished +ENDPROC(__copy_to_user) - .section .fixup,"ax" + .pushsection .fixup,"ax" .align 0 -9001: LOADREGS(fd,sp!, {r0, r4 - r7, pc}) - .previous +9001: ldmfd sp!, {r0, r4 - r7, pc} + .popsection -/* Prototype: unsigned long __arch_copy_from_user(void *to,const void *from,unsigned long n); +/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); * Purpose : copy a block from user memory to kernel memory * Params : to - kernel memory * : from - user memory * : n - number of bytes to copy * Returns : Number of bytes NOT copied. */ -.cfu_dest_not_aligned: +.Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( ldrbt r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip - b .cfu_dest_aligned + b .Lcfu_dest_aligned -ENTRY(__arch_copy_from_user) +ENTRY(__copy_from_user) stmfd sp!, {r0, r2, r4 - r7, lr} cmp r2, #4 - blt .cfu_not_enough + blt .Lcfu_not_enough ands ip, r0, #3 - bne .cfu_dest_not_aligned -.cfu_dest_aligned: + bne .Lcfu_dest_not_aligned +.Lcfu_dest_aligned: ands ip, r1, #3 - bne .cfu_src_not_aligned + bne .Lcfu_src_not_aligned + /* * Seeing as there has to be at least 8 bytes to copy, we can * copy one word, and force a user-mode page fault... */ -.cfu_0fupi: subs r2, r2, #4 +.Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .cfu_0nowords -USER( ldrt r3, [r1], #4) + bmi .Lcfu_0nowords +USER( TUSER( ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .cfu_0fupi + beq .Lcfu_0fupi /* * ip = max no. of bytes to copy before needing another "strt" insn */ @@ -332,216 +335,218 @@ USER( ldrt r3, [r1], #4) movlt ip, r2 sub r2, r2, ip subs ip, ip, #32 - blt .cfu_0rem8lp + blt .Lcfu_0rem8lp -.cfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault +.Lcfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault stmia r0!, {r3 - r6} ldmia r1!, {r3 - r6} @ Shouldnt fault subs ip, ip, #32 stmia r0!, {r3 - r6} - bpl .cfu_0cpy8lp + bpl .Lcfu_0cpy8lp -.cfu_0rem8lp: cmn ip, #16 +.Lcfu_0rem8lp: cmn ip, #16 ldmgeia r1!, {r3 - r6} @ Shouldnt fault stmgeia r0!, {r3 - r6} tst ip, #8 ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - ldrnet r3, [r1], #4 @ Shouldnt fault + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 - beq .cfu_0fupi -.cfu_0nowords: teq ip, #0 - beq .cfu_finished -.cfu_nowords: cmp ip, #2 -USER( ldrbt r3, [r1], #1) @ May fault + beq .Lcfu_0fupi +.Lcfu_0nowords: teq ip, #0 + beq .Lcfu_finished +.Lcfu_nowords: cmp ip, #2 +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 - b .cfu_finished + b .Lcfu_finished -.cfu_not_enough: +.Lcfu_not_enough: movs ip, r2 - bne .cfu_nowords -.cfu_finished: mov r0, #0 + bne .Lcfu_nowords +.Lcfu_finished: mov r0, #0 add sp, sp, #8 - LOADREGS(fd,sp!,{r4 - r7, pc}) + ldmfd sp!, {r4 - r7, pc} -.cfu_src_not_aligned: +.Lcfu_src_not_aligned: bic r1, r1, #3 -USER( ldrt r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault cmp ip, #2 - bgt .cfu_3fupi - beq .cfu_2fupi -.cfu_1fupi: subs r2, r2, #4 + bgt .Lcfu_3fupi + beq .Lcfu_2fupi +.Lcfu_1fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .cfu_1nowords - mov r3, r7, pull #8 -USER( ldrt r7, [r1], #4) @ May fault - orr r3, r3, r7, push #24 + bmi .Lcfu_1nowords + mov r3, r7, lspull #8 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .cfu_1fupi + beq .Lcfu_1fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .cfu_1rem8lp + blt .Lcfu_1rem8lp -.cfu_1cpy8lp: mov r3, r7, pull #8 +.Lcfu_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} - bpl .cfu_1cpy8lp + bpl .Lcfu_1cpy8lp -.cfu_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 +.Lcfu_1rem8lp: tst ip, #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #8 -USER( ldrnet r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #24 + movne r3, r7, lspull #8 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #24 strne r3, [r0], #4 ands ip, ip, #3 - beq .cfu_1fupi -.cfu_1nowords: mov r3, r7, get_byte_1 + beq .Lcfu_1fupi +.Lcfu_1nowords: mov r3, r7, get_byte_1 teq ip, #0 - beq .cfu_finished + beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 movge r3, r7, get_byte_2 strgeb r3, [r0], #1 movgt r3, r7, get_byte_3 strgtb r3, [r0], #1 - b .cfu_finished + b .Lcfu_finished -.cfu_2fupi: subs r2, r2, #4 +.Lcfu_2fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .cfu_2nowords - mov r3, r7, pull #16 -USER( ldrt r7, [r1], #4) @ May fault - orr r3, r3, r7, push #16 + bmi .Lcfu_2nowords + mov r3, r7, lspull #16 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .cfu_2fupi + beq .Lcfu_2fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .cfu_2rem8lp + blt .Lcfu_2rem8lp + -.cfu_2cpy8lp: mov r3, r7, pull #16 +.Lcfu_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} - bpl .cfu_2cpy8lp + bpl .Lcfu_2cpy8lp -.cfu_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 +.Lcfu_2rem8lp: tst ip, #8 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #16 -USER( ldrnet r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #16 + movne r3, r7, lspull #16 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #16 strne r3, [r0], #4 ands ip, ip, #3 - beq .cfu_2fupi -.cfu_2nowords: mov r3, r7, get_byte_2 + beq .Lcfu_2fupi +.Lcfu_2nowords: mov r3, r7, get_byte_2 teq ip, #0 - beq .cfu_finished + beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #0) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 - b .cfu_finished + b .Lcfu_finished -.cfu_3fupi: subs r2, r2, #4 +.Lcfu_3fupi: subs r2, r2, #4 addmi ip, r2, #4 - bmi .cfu_3nowords - mov r3, r7, pull #24 -USER( ldrt r7, [r1], #4) @ May fault - orr r3, r3, r7, push #8 + bmi .Lcfu_3nowords + mov r3, r7, lspull #24 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT - beq .cfu_3fupi + beq .Lcfu_3fupi cmp r2, ip movlt ip, r2 sub r2, r2, ip subs ip, ip, #16 - blt .cfu_3rem8lp + blt .Lcfu_3rem8lp -.cfu_3cpy8lp: mov r3, r7, pull #24 +.Lcfu_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} subs ip, ip, #16 - bpl .cfu_3cpy8lp + bpl .Lcfu_3cpy8lp -.cfu_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 +.Lcfu_3rem8lp: tst ip, #8 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #24 -USER( ldrnet r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #8 + movne r3, r7, lspull #24 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #8 strne r3, [r0], #4 ands ip, ip, #3 - beq .cfu_3fupi -.cfu_3nowords: mov r3, r7, get_byte_3 + beq .Lcfu_3fupi +.Lcfu_3nowords: mov r3, r7, get_byte_3 teq ip, #0 - beq .cfu_finished + beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( ldrgebt r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( ldrgtbt r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 - b .cfu_finished + b .Lcfu_finished +ENDPROC(__copy_from_user) - .section .fixup,"ax" + .pushsection .fixup,"ax" .align 0 /* * We took an exception. r0 contains a pointer to @@ -554,6 +559,6 @@ USER( ldrgtbt r3, [r1], #1) @ May fault movne r1, r4 blne __memzero mov r0, r4 - LOADREGS(fd,sp!, {r4 - r7, pc}) - .previous + ldmfd sp!, {r4 - r7, pc} + .popsection diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 00000000000..3e58d710013 --- /dev/null +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -0,0 +1,270 @@ +/* + * linux/arch/arm/lib/uaccess_with_memcpy.c + * + * Written by: Lennert Buytenhek and Nicolas Pitre + * Copyright (C) 2009 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> +#include <linux/rwsem.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/hardirq.h> /* for in_atomic() */ +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h> +#include <asm/current.h> +#include <asm/page.h> + +static int +pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) +{ + unsigned long addr = (unsigned long)_addr; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pud_t *pud; + spinlock_t *ptl; + + pgd = pgd_offset(current->mm, addr); + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) + return 0; + + pud = pud_offset(pgd, addr); + if (unlikely(pud_none(*pud) || pud_bad(*pud))) + return 0; + + pmd = pmd_offset(pud, addr); + if (unlikely(pmd_none(*pmd))) + return 0; + + /* + * A pmd can be bad if it refers to a HugeTLB or THP page. + * + * Both THP and HugeTLB pages have the same pmd layout + * and should not be manipulated by the pte functions. + * + * Lock the page table for the destination and check + * to see that it's still huge and whether or not we will + * need to fault on write, or if we have a splitting THP. + */ + if (unlikely(pmd_thp_or_huge(*pmd))) { + ptl = ¤t->mm->page_table_lock; + spin_lock(ptl); + if (unlikely(!pmd_thp_or_huge(*pmd) + || pmd_hugewillfault(*pmd) + || pmd_trans_splitting(*pmd))) { + spin_unlock(ptl); + return 0; + } + + *ptep = NULL; + *ptlp = ptl; + return 1; + } + + if (unlikely(pmd_bad(*pmd))) + return 0; + + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); + if (unlikely(!pte_present(*pte) || !pte_young(*pte) || + !pte_write(*pte) || !pte_dirty(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + *ptep = pte; + *ptlp = ptl; + + return 1; +} + +static unsigned long noinline +__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) +{ + int atomic; + + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memcpy((void *)to, from, n); + return 0; + } + + /* the mmap semaphore is taken only if not in an atomic context */ + atomic = in_atomic(); + + if (!atomic) + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(to, &pte, &ptl)) { + if (!atomic) + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)to)) + goto out; + if (!atomic) + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memcpy((void *)to, from, tocopy); + to += tocopy; + from += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + if (!atomic) + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + /* + * This test is stubbed out of the main function above to keep + * the overhead for small copies low by avoiding a large + * register dump on the stack just to reload them right away. + * With frame pointer disabled, tail call optimization kicks in + * as well making this test almost invisible. + */ + if (n < 64) + return __copy_to_user_std(to, from, n); + return __copy_to_user_memcpy(to, from, n); +} + +static unsigned long noinline +__clear_user_memset(void __user *addr, unsigned long n) +{ + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memset((void *)addr, 0, n); + return 0; + } + + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(addr, &pte, &ptl)) { + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)addr)) + goto out; + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memset((void *)addr, 0, tocopy); + addr += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long __clear_user(void __user *addr, unsigned long n) +{ + /* See rational for this in __copy_to_user() above. */ + if (n < 64) + return __clear_user_std(addr, n); + return __clear_user_memset(addr, n); +} + +#if 0 + +/* + * This code is disabled by default, but kept around in case the chosen + * thresholds need to be revalidated. Some overhead (small but still) + * would be implied by a runtime determined variable threshold, and + * so far the measurement on concerned targets didn't show a worthwhile + * variation. + * + * Note that a fairly precise sched_clock() implementation is needed + * for results to make some sense. + */ + +#include <linux/vmalloc.h> + +static int __init test_size_treshold(void) +{ + struct page *src_page, *dst_page; + void *user_ptr, *kernel_ptr; + unsigned long long t0, t1, t2; + int size, ret; + + ret = -ENOMEM; + src_page = alloc_page(GFP_KERNEL); + if (!src_page) + goto no_src; + dst_page = alloc_page(GFP_KERNEL); + if (!dst_page) + goto no_dst; + kernel_ptr = page_address(src_page); + user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); + if (!user_ptr) + goto no_vmap; + + /* warm up the src page dcache */ + ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); + t1 = sched_clock(); + ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); + t2 = sched_clock(); + printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __clear_user_memset(user_ptr, size); + t1 = sched_clock(); + ret |= __clear_user_std(user_ptr, size); + t2 = sched_clock(); + printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + if (ret) + ret = -EFAULT; + + vunmap(user_ptr); +no_vmap: + put_page(dst_page); +no_dst: + put_page(src_page); +no_src: + return ret; +} + +subsys_initcall(test_size_treshold); + +#endif diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index 112630f93e5..f0df6a91db0 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -33,3 +33,20 @@ ENTRY(__ucmpdi2) movhi r0, #2 mov pc, lr +ENDPROC(__ucmpdi2) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_ulcmp) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #-1 + moveq r0, #0 + movhi r0, #1 + mov pc, lr + +ENDPROC(__aeabi_ulcmp) + +#endif + diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c new file mode 100644 index 00000000000..2c40aeab3ea --- /dev/null +++ b/arch/arm/lib/xor-neon.c @@ -0,0 +1,46 @@ +/* + * linux/arch/arm/lib/xor-neon.c + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); + +#ifndef __ARM_NEON__ +#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#endif + +/* + * Pull in the reference implementations while instructing GCC (through + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit + * NEON instructions. + */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC optimize "tree-vectorize" +#else +/* + * While older versions of GCC do not generate incorrect code, they fail to + * recognize the parallel nature of these functions, and emit plain ARM code, + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + */ +#warning This code requires at least version 4.6 of GCC +#endif + +#pragma GCC diagnostic ignored "-Wunused-variable" +#include <asm-generic/xor.h> + +struct xor_block_template const xor_block_neon_inner = { + .name = "__inner_neon__", + .do_2 = xor_8regs_2, + .do_3 = xor_8regs_3, + .do_4 = xor_8regs_4, + .do_5 = xor_8regs_5, +}; +EXPORT_SYMBOL(xor_block_neon_inner); |
