diff options
Diffstat (limited to 'arch/arm/lib')
34 files changed, 709 insertions, 731 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 59ff42ddf0a..0573faab96a 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,14 +6,14 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ - delay.o findbit.o memchr.o memcpy.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ - strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ - ucmpdi2.o lib1funcs.o div64.o sha1.o \ - io-readsb.o io-writesb.o io-readsl.o io-writesl.o + ucmpdi2.o lib1funcs.o div64.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ + call_with_stack.o bswapsdi2.o mmu-y := clear_user.o copy_page.o getuser.o putuser.o @@ -41,7 +41,12 @@ else endif lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o -lib-$(CONFIG_ARCH_SHARK) += io-shark.o $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S + +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) + NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon + CFLAGS_xor-neon.o += $(NEON_FLAGS) + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o +endif diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index a673297b0cf..4102be617fc 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -22,15 +22,10 @@ #define mask r7 #define offset r8 -ENTRY(__backtrace) - mov r1, #0x10 - mov r0, fp - ENTRY(c_backtrace) #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) mov pc, lr -ENDPROC(__backtrace) ENDPROC(c_backtrace) #else stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... @@ -85,14 +80,14 @@ for_each_frame: tst frame, mask @ Check for address exceptions ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 - teq r3, r1, lsr #10 + teq r3, r1, lsr #11 ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg bleq .Ldumpstm @ dump saved registers 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, - teq r3, r1, lsr #10 + teq r3, r1, lsr #11 subeq r0, frame, #16 bleq .Ldumpstm @ dump saved registers @@ -107,7 +102,6 @@ for_each_frame: tst frame, mask @ Check for address exceptions mov r1, frame bl printk no_frame: ldmfd sp!, {r4 - r8, pc} -ENDPROC(__backtrace) ENDPROC(c_backtrace) .pushsection __ex_table,"a" @@ -134,11 +128,11 @@ ENDPROC(c_backtrace) beq 2f add r7, r7, #1 teq r7, #6 - moveq r7, #1 - moveq r1, #'\n' - movne r1, #' ' - ldr r3, [stack], #-4 - mov r2, reg + moveq r7, #0 + adr r3, .Lcr + addne r3, r3, #1 @ skip newline + ldr r2, [stack], #-4 + mov r1, reg adr r0, .Lfp bl printk 2: subs reg, reg, #1 @@ -148,11 +142,11 @@ ENDPROC(c_backtrace) blne printk ldmfd sp!, {instr, reg, stack, r7, pc} -.Lfp: .asciz "%cr%d:%08x" +.Lfp: .asciz " r%d:%08x%s" .Lcr: .asciz "\n" .Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" .align -.Ldsi: .word 0xe92dd800 >> 10 @ stmfd sp!, {... fp, ip, lr, pc} - .word 0xe92d0000 >> 10 @ stmfd sp!, {} +.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} + .word 0xe92d0000 >> 11 @ stmfd sp!, {} #endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index d42252918bf..9f12ed1eea8 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,46 +1,78 @@ +#include <asm/unwind.h> -#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K) - .macro bitop, instr +#if __LINUX_ARM_ARCH__ >= 6 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned mov r2, #1 - and r3, r0, #7 @ Get bit offset - add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif mov r3, r2, lsl r3 -1: ldrexb r2, [r1] +1: ldrex r2, [r1] \instr r2, r2, r3 - strexb r0, r2, [r1] + strex r0, r2, [r1] cmp r0, #0 bne 1b - mov pc, lr + bx lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm - .macro testop, instr, store - and r3, r0, #7 @ Get bit offset + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned mov r2, #1 - add r1, r1, r0, lsr #3 @ Get byte offset + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask smp_dmb -1: ldrexb r2, [r1] +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif +1: ldrex r2, [r1] ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit - strexb ip, r2, [r1] + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] cmp ip, #0 bne 1b smp_dmb cmp r0, #0 movne r0, #1 -2: mov pc, lr +2: bx lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm #else - .macro bitop, instr - and r2, r0, #7 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 mov r3, #1 mov r3, r3, lsl r2 save_and_disable_irqs ip - ldrb r2, [r1, r0, lsr #3] + ldr r2, [r1, r0, lsl #2] \instr r2, r2, r3 - strb r2, [r1, r0, lsr #3] + str r2, [r1, r0, lsl #2] restore_irqs ip mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm /** @@ -51,17 +83,23 @@ * Note: we can trivially conditionalise the store instruction * to avoid dirtying the data cache. */ - .macro testop, instr, store - add r1, r1, r0, lsr #3 - and r3, r0, #7 - mov r0, #1 + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 save_and_disable_irqs ip - ldrb r2, [r1] + ldr r2, [r1, r0, lsl #2]! + mov r0, #1 tst r2, r0, lsl r3 \instr r2, r2, r0, lsl r3 \store r2, [r1] moveq r0, #0 restore_irqs ip mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) .endm #endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S new file mode 100644 index 00000000000..9fcdd154eff --- /dev/null +++ b/arch/arm/lib/bswapsdi2.S @@ -0,0 +1,36 @@ +#include <linux/linkage.h> + +#if __LINUX_ARM_ARCH__ >= 6 +ENTRY(__bswapsi2) + rev r0, r0 + bx lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + rev r3, r0 + rev r0, r1 + mov r1, r3 + bx lr +ENDPROC(__bswapdi2) +#else +ENTRY(__bswapsi2) + eor r3, r0, r0, ror #16 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + eor r0, r3, r0, ror #8 + mov pc, lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + mov ip, r1 + eor r3, ip, ip, ror #16 + eor r1, r0, r0, ror #16 + mov r1, r1, lsr #8 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + bic r1, r1, #0xff00 + eor r1, r1, r0, ror #8 + eor r0, r3, ip, ror #8 + mov pc, lr +ENDPROC(__bswapdi2) +#endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S new file mode 100644 index 00000000000..916c80f13ae --- /dev/null +++ b/arch/arm/lib/call_with_stack.S @@ -0,0 +1,44 @@ +/* + * arch/arm/lib/call_with_stack.S + * + * Copyright (C) 2011 ARM Ltd. + * Written by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * void call_with_stack(void (*fn)(void *), void *arg, void *sp) + * + * Change the stack to that pointed at by sp, then invoke fn(arg) with + * the new stack. + */ +ENTRY(call_with_stack) + str sp, [r2, #-4]! + str lr, [r2, #-4]! + + mov sp, r2 + mov r2, r0 + mov r0, r1 + + adr lr, BSYM(1f) + mov pc, r2 + +1: ldr lr, [sp] + ldr sp, [sp, #4] + mov pc, lr +ENDPROC(call_with_stack) diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S index 80f3115cbee..f4027862172 100644 --- a/arch/arm/lib/changebit.S +++ b/arch/arm/lib/changebit.S @@ -12,12 +12,4 @@ #include "bitops.h" .text -/* Purpose : Function to change a bit - * Prototype: int change_bit(int bit, void *addr) - */ -ENTRY(_change_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_change_bit_le) - bitop eor -ENDPROC(_change_bit_be) -ENDPROC(_change_bit_le) +bitop _change_bit, eor diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S index 1a63e43a1df..f6b75fb64d3 100644 --- a/arch/arm/lib/clearbit.S +++ b/arch/arm/lib/clearbit.S @@ -12,13 +12,4 @@ #include "bitops.h" .text -/* - * Purpose : Function to clear a bit - * Prototype: int clear_bit(int bit, void *addr) - */ -ENTRY(_clear_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_clear_bit_le) - bitop bic -ENDPROC(_clear_bit_be) -ENDPROC(_clear_bit_le) +bitop _clear_bit, bic diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb00..3bc8eb811a7 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -197,24 +197,24 @@ 12: PLD( pld [r1, #124] ) 13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull + mov r3, lr, lspull #\pull subs r2, r2, #32 ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f bge 12b PLD( cmn r2, #96 ) @@ -225,10 +225,10 @@ 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, lspull #\pull ldr1w r1, lr, abort=21f subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, lspush #\push str1w r0, r3, abort=21f bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a0..d6e742d2400 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -141,7 +141,7 @@ FN_ENTRY tst len, #2 mov r5, r4, get_byte_0 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 @@ -171,23 +171,23 @@ FN_ENTRY cmp ip, #2 beq .Lsrc2_aligned bhi .Lsrc3_aligned - mov r4, r5, pull #8 @ C = 0 + mov r4, r5, lspull #8 @ C = 0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 - mov r7, r7, pull #8 - orr r7, r7, r8, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #8 + mov r4, r8, lspull #8 sub ip, ip, #16 teq ip, #0 bne 1b @@ -196,50 +196,50 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #8 + mov r4, r6, lspull #8 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #24 + orr r4, r4, r5, lspush #24 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #8 + mov r4, r5, lspull #8 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 tst len, #2 beq .Lexit - adcs sum, sum, r4, push #16 + adcs sum, sum, r4, lspush #16 strb r5, [dst], #1 mov r5, r4, get_byte_1 strb r5, [dst], #1 mov r5, r4, get_byte_2 b .Lexit -.Lsrc2_aligned: mov r4, r5, pull #16 +.Lsrc2_aligned: mov r4, r5, lspull #16 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 - mov r7, r7, pull #16 - orr r7, r7, r8, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #16 + mov r4, r8, lspull #16 sub ip, ip, #16 teq ip, #0 bne 1b @@ -248,20 +248,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #16 + mov r4, r6, lspull #16 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #16 + orr r4, r4, r5, lspush #16 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #16 + mov r4, r5, lspull #16 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -276,24 +276,24 @@ FN_ENTRY load1b r5 b .Lexit -.Lsrc3_aligned: mov r4, r5, pull #24 +.Lsrc3_aligned: mov r4, r5, lspull #24 adds sum, sum, #0 bics ip, len, #15 beq 2f 1: load4l r5, r6, r7, r8 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 - mov r7, r7, pull #24 - orr r7, r7, r8, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 stmia dst!, {r4, r5, r6, r7} adcs sum, sum, r4 adcs sum, sum, r5 adcs sum, sum, r6 adcs sum, sum, r7 - mov r4, r8, pull #24 + mov r4, r8, lspull #24 sub ip, ip, #16 teq ip, #0 bne 1b @@ -302,20 +302,20 @@ FN_ENTRY tst ip, #8 beq 3f load2l r5, r6 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 stmia dst!, {r4, r5} adcs sum, sum, r4 adcs sum, sum, r5 - mov r4, r6, pull #24 + mov r4, r6, lspull #24 tst ip, #4 beq 4f 3: load1l r5 - orr r4, r4, r5, push #8 + orr r4, r4, r5, lspush #8 str r4, [dst], #4 adcs sum, sum, r4 - mov r4, r5, pull #24 + mov r4, r5, lspull #24 4: ands len, len, #3 beq .Ldone mov r5, r4, get_byte_0 @@ -326,7 +326,7 @@ FN_ENTRY load1l r4 mov r5, r4, get_byte_0 strb r5, [dst], #1 - adcs sum, sum, r4, push #24 + adcs sum, sum, r4, lspush #24 mov r5, r4, get_byte_1 b .Lexit FN_EXIT diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay-loop.S index 3c9a05c8d20..bc1033b897b 100644 --- a/arch/arm/lib/delay.S +++ b/arch/arm/lib/delay-loop.S @@ -9,11 +9,11 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <asm/param.h> +#include <asm/delay.h> .text .LC0: .word loops_per_jiffy -.LC1: .word (2199023*HZ)>>11 +.LC1: .word UDELAY_MULT /* * r0 <= 2000 @@ -21,10 +21,10 @@ * HZ <= 1000 */ -ENTRY(__udelay) +ENTRY(__loop_udelay) ldr r2, .LC1 mul r0, r2, r0 -ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 mov r1, #-1 ldr r2, .LC0 ldr r2, [r2] @ max = 0x01ffffff @@ -39,12 +39,11 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 /* * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * Oh, if only we had a cycle counter... */ + .align 3 @ Delay routine -ENTRY(__delay) +ENTRY(__loop_delay) subs r0, r0, #1 #if 0 movls pc, lr @@ -62,8 +61,8 @@ ENTRY(__delay) movls pc, lr subs r0, r0, #1 #endif - bhi __delay + bhi __loop_delay mov pc, lr -ENDPROC(__udelay) -ENDPROC(__const_udelay) -ENDPROC(__delay) +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 00000000000..5306de35013 --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,93 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timex.h> + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +static const struct delay_timer *delay_timer; +static bool delay_calibrated; + +int read_current_timer(unsigned long *timer_val) +{ + if (!delay_timer) + return -ENXIO; + + *timer_val = delay_timer->read_current_timer(); + return 0; +} +EXPORT_SYMBOL_GPL(read_current_timer); + +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= arm_delay_ops.ticks_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init register_current_timer_delay(const struct delay_timer *timer) +{ + if (!delay_calibrated) { + pr_info("Switching to timer-based delay loop\n"); + delay_timer = timer; + lpj_fine = timer->freq / HZ; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; + + delay_calibrated = true; + } else { + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); + } +} + +unsigned long calibrate_delay_is_known(void) +{ + delay_calibrated = true; + return lpj_fine; +} diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index faa7748142d..e55c4842c29 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/unwind.h> #ifdef __ARMEB__ #define xh r0 @@ -44,6 +45,7 @@ */ ENTRY(__do_div64) +UNWIND(.fnstart) @ Test for easy paths first. subs ip, r4, #1 @@ -189,7 +191,12 @@ ENTRY(__do_div64) moveq yh, xh moveq xh, #0 moveq pc, lr +UNWIND(.fnend) +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: @ Division by 0: str lr, [sp, #-8]! bl __div0 @@ -200,4 +207,5 @@ ENTRY(__do_div64) mov xh, #0 ldr pc, [sp], #8 +UNWIND(.fnend) ENDPROC(__do_div64) diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S index 8678eb2b7a6..e6057fa851b 100644 --- a/arch/arm/lib/ecard.S +++ b/arch/arm/lib/ecard.S @@ -12,7 +12,6 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <mach/hardware.h> #define CPSR2SPSR(rt) \ mrs rt, cpsr; \ diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 1b049cd7a49..9b06bb41fca 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,8 +16,9 @@ * __get_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value + * r2 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see <asm/uaccess.h> @@ -27,34 +28,40 @@ * Note also that it is intended that __get_user_bad is not global. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> #include <asm/domain.h> ENTRY(__get_user_1) -1: T(ldrb) r2, [r0] + check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) -#ifdef CONFIG_THUMB2_KERNEL -2: T(ldrb) r2, [r0] -3: T(ldrb) r3, [r0, #1] + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 #else -2: T(ldrb) r2, [r0], #1 -3: T(ldrb) r3, [r0] +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] #endif #ifndef __ARMEB__ - orr r2, r2, r3, lsl #8 + orr r2, r2, rb, lsl #8 #else - orr r2, r3, r2, lsl #8 + orr r2, rb, r2, lsl #8 #endif mov r0, #0 mov pc, lr ENDPROC(__get_user_2) ENTRY(__get_user_4) -4: T(ldr) r2, [r0] + check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_4) diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S index 1b197ea7aab..69719bad674 100644 --- a/arch/arm/lib/io-acorn.S +++ b/arch/arm/lib/io-acorn.S @@ -11,13 +11,14 @@ * */ #include <linux/linkage.h> +#include <linux/kern_levels.h> #include <asm/assembler.h> .text .align .Liosl_warning: - .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0" .align /* diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4..7a7430950c7 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl) strb ip, [r1], #1 4: subs r2, r2, #1 - mov ip, r3, pull #24 + mov ip, r3, lspull #24 ldrne r3, [r0] - orrne ip, ip, r3, push #8 + orrne ip, ip, r3, lspush #8 strne ip, [r1], #4 bne 4b b 8f 5: subs r2, r2, #1 - mov ip, r3, pull #16 + mov ip, r3, lspull #16 ldrne r3, [r0] - orrne ip, ip, r3, push #16 + orrne ip, ip, r3, lspush #16 strne ip, [r1], #4 bne 5b b 7f 6: subs r2, r2, #1 - mov ip, r3, pull #8 + mov ip, r3, lspull #8 ldrne r3, [r0] - orrne ip, ip, r3, push #24 + orrne ip, ip, r3, lspush #24 strne ip, [r1], #4 bne 6b diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 9aaf7c72065..88487c8c4f2 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -9,7 +9,6 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <mach/hardware.h> .Linsw_bad_alignment: adr r0, .Linsw_bad_align_msg diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c deleted file mode 100644 index 824253948f5..00000000000 --- a/arch/arm/lib/io-shark.c +++ /dev/null @@ -1,13 +0,0 @@ -/* - * linux/arch/arm/lib/io-shark.c - * - * by Alexander Schulz - * - * derived from: - * linux/arch/arm/lib/io-ebsa.S - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725..d0d104a0dd1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,26 @@ ENTRY(__raw_writesl) blt 5f bgt 6f -4: mov ip, r3, pull #16 +4: mov ip, r3, lspull #16 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #16 + orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b mov pc, lr -5: mov ip, r3, pull #8 +5: mov ip, r3, lspull #8 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #24 + orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b mov pc, lr -6: mov ip, r3, pull #24 +6: mov ip, r3, lspull #24 ldr r3, [r1], #4 subs r2, r2, #1 - orr ip, ip, r3, push #8 + orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b mov pc, lr diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index cd34503e424..49b800419e3 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -9,7 +9,6 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> -#include <mach/hardware.h> .Loutsw_bad_alignment: adr r0, .Loutsw_bad_align_msg diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 6dc06487f3c..c562f649734 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA. */ #include <linux/linkage.h> #include <asm/assembler.h> - +#include <asm/unwind.h> .macro ARM_DIV_BODY dividend, divisor, result, curbit @@ -207,6 +207,7 @@ Boston, MA 02111-1307, USA. */ ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) subs r2, r1, #1 moveq pc, lr @@ -230,10 +231,12 @@ ENTRY(__aeabi_uidiv) mov r0, r0, lsr r2 mov pc, lr +UNWIND(.fnend) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) ENTRY(__umodsi3) +UNWIND(.fnstart) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 @@ -247,10 +250,12 @@ ENTRY(__umodsi3) mov pc, lr +UNWIND(.fnend) ENDPROC(__umodsi3) ENTRY(__divsi3) ENTRY(__aeabi_idiv) +UNWIND(.fnstart) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. @@ -287,10 +292,12 @@ ENTRY(__aeabi_idiv) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) ENTRY(__modsi3) +UNWIND(.fnstart) cmp r1, #0 beq Ldiv0 @@ -310,11 +317,14 @@ ENTRY(__modsi3) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) ENDPROC(__modsi3) #ifdef CONFIG_AEABI ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_uidiv @@ -323,10 +333,12 @@ ENTRY(__aeabi_uidivmod) sub r1, r1, r3 mov pc, lr +UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) ENTRY(__aeabi_idivmod) - +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_idiv ldmfd sp!, {r1, r2, ip, lr} @@ -334,15 +346,18 @@ ENTRY(__aeabi_idivmod) sub r1, r1, r3 mov pc, lr +UNWIND(.fnend) ENDPROC(__aeabi_idivmod) #endif Ldiv0: - +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) str lr, [sp, #-8]! bl __div0 mov r0, #0 @ About as wrong as it could be. ldr pc, [sp], #8 - - +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 938fc14f962..d1fc0c0c342 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -147,24 +147,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, lspush #\push subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -175,10 +175,10 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, lspush #\push ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, lspull #\pull str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 650d5923ab8..94b0650ea98 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -14,27 +14,15 @@ .text .align 5 - .word 0 - -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memset again. - */ ENTRY(memset) ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 +1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 cmp r2, #16 @@ -43,29 +31,28 @@ ENTRY(memset) #if ! CALGN(1)+0 /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} #else @@ -74,54 +61,63 @@ ENTRY(memset) * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} mov r4, r1 mov r5, r1 mov r6, r1 mov r7, r1 - mov ip, r1 + mov r8, r1 mov lr, r1 cmp r2, #96 - tstgt r0, #31 + tstgt ip, #31 ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} + stmneia ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} #endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 mov pc, lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b ENDPROC(memset) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index c023fc11e86..3d73dcb959b 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,6 +16,7 @@ * __put_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * r2, r3 contains the value * Outputs: r0 is the error code * lr corrupted @@ -27,32 +28,35 @@ * Note also that it is intended that __put_user_bad is not global. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> #include <asm/domain.h> ENTRY(__put_user_1) -1: T(strb) r2, [r0] + check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: T(strb) r2, [r0] -3: T(strb) ip, [r0, #1] +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] #else -2: T(strb) ip, [r0] -3: T(strb) r2, [r0, #1] +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] #endif #else /* !CONFIG_THUMB2_KERNEL */ #ifndef __ARMEB__ -2: T(strb) r2, [r0], #1 -3: T(strb) ip, [r0] +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] #else -2: T(strb) ip, [r0], #1 -3: T(strb) r2, [r0] +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 @@ -60,18 +64,20 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) -4: T(str) r2, [r0] + check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad #ifdef CONFIG_THUMB2_KERNEL -5: T(str) r2, [r0] -6: T(str) r3, [r0, #4] +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] #else -5: T(str) r2, [r0], #4 -6: T(str) r3, [r0] +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] #endif mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S index 1dd7176c4b2..618fedae4b3 100644 --- a/arch/arm/lib/setbit.S +++ b/arch/arm/lib/setbit.S @@ -12,13 +12,4 @@ #include "bitops.h" .text -/* - * Purpose : Function to set a bit - * Prototype: int set_bit(int bit, void *addr) - */ -ENTRY(_set_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_set_bit_le) - bitop orr -ENDPROC(_set_bit_be) -ENDPROC(_set_bit_le) +bitop _set_bit, orr diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S deleted file mode 100644 index eb0edb80d7b..00000000000 --- a/arch/arm/lib/sha1.S +++ /dev/null @@ -1,211 +0,0 @@ -/* - * linux/arch/arm/lib/sha1.S - * - * SHA transform optimized for ARM - * - * Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net> - * Created: September 17, 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * The reference implementation for this code is linux/lib/sha1.c - */ - -#include <linux/linkage.h> - - .text - - -/* - * void sha_transform(__u32 *digest, const char *in, __u32 *W) - * - * Note: the "in" ptr may be unaligned. - */ - -ENTRY(sha_transform) - - stmfd sp!, {r4 - r8, lr} - - @ for (i = 0; i < 16; i++) - @ W[i] = be32_to_cpu(in[i]); - -#ifdef __ARMEB__ - mov r4, r0 - mov r0, r2 - mov r2, #64 - bl memcpy - mov r2, r0 - mov r0, r4 -#else - mov r3, r2 - mov lr, #16 -1: ldrb r4, [r1], #1 - ldrb r5, [r1], #1 - ldrb r6, [r1], #1 - ldrb r7, [r1], #1 - subs lr, lr, #1 - orr r5, r5, r4, lsl #8 - orr r6, r6, r5, lsl #8 - orr r7, r7, r6, lsl #8 - str r7, [r3], #4 - bne 1b -#endif - - @ for (i = 0; i < 64; i++) - @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); - - sub r3, r2, #4 - mov lr, #64 -2: ldr r4, [r3, #4]! - subs lr, lr, #1 - ldr r5, [r3, #8] - ldr r6, [r3, #32] - ldr r7, [r3, #52] - eor r4, r4, r5 - eor r4, r4, r6 - eor r4, r4, r7 - mov r4, r4, ror #31 - str r4, [r3, #64] - bne 2b - - /* - * The SHA functions are: - * - * f1(B,C,D) = (D ^ (B & (C ^ D))) - * f2(B,C,D) = (B ^ C ^ D) - * f3(B,C,D) = ((B & C) | (D & (B | C))) - * - * Then the sub-blocks are processed as follows: - * - * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ - * B' = A - * C' = ror(B, 2) - * D' = C - * E' = D - * - * We therefore unroll each loop 5 times to avoid register shuffling. - * Also the ror for C (and also D and E which are successivelyderived - * from it) is applied in place to cut on an additional mov insn for - * each round. - */ - - .macro sha_f1, A, B, C, D, E - ldr r3, [r2], #4 - eor ip, \C, \D - add \E, r1, \E, ror #2 - and ip, \B, ip, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f2, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - eor ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f3, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - orr ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - and ip, ip, \D, ror #2 - add \E, \E, r3 - and r3, \B, \C, ror #2 - orr ip, ip, r3 - add \E, \E, ip - .endm - - ldmia r0, {r4 - r8} - - mov lr, #4 - ldr r1, .L_sha_K + 0 - - /* adjust initial values */ - mov r6, r6, ror #30 - mov r7, r7, ror #30 - mov r8, r8, ror #30 - -3: subs lr, lr, #1 - sha_f1 r4, r5, r6, r7, r8 - sha_f1 r8, r4, r5, r6, r7 - sha_f1 r7, r8, r4, r5, r6 - sha_f1 r6, r7, r8, r4, r5 - sha_f1 r5, r6, r7, r8, r4 - bne 3b - - ldr r1, .L_sha_K + 4 - mov lr, #4 - -4: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 4b - - ldr r1, .L_sha_K + 8 - mov lr, #4 - -5: subs lr, lr, #1 - sha_f3 r4, r5, r6, r7, r8 - sha_f3 r8, r4, r5, r6, r7 - sha_f3 r7, r8, r4, r5, r6 - sha_f3 r6, r7, r8, r4, r5 - sha_f3 r5, r6, r7, r8, r4 - bne 5b - - ldr r1, .L_sha_K + 12 - mov lr, #4 - -6: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 6b - - ldmia r0, {r1, r2, r3, ip, lr} - add r4, r1, r4 - add r5, r2, r5 - add r6, r3, r6, ror #2 - add r7, ip, r7, ror #2 - add r8, lr, r8, ror #2 - stmia r0, {r4 - r8} - - ldmfd sp!, {r4 - r8, pc} - -ENDPROC(sha_transform) - - .align 2 -.L_sha_K: - .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 - - -/* - * void sha_init(__u32 *buf) - */ - - .align 2 -.L_sha_initial_digest: - .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 - -ENTRY(sha_init) - - str lr, [sp, #-4]! - adr r1, .L_sha_initial_digest - ldmia r1, {r1, r2, r3, ip, lr} - stmia r0, {r1, r2, r3, ip, lr} - ldr pc, [sp], #4 - -ENDPROC(sha_init) diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S deleted file mode 100644 index f202d7bd164..00000000000 --- a/arch/arm/lib/strncpy_from_user.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * r0 = dst, r1 = src, r2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov ip, r1 -1: subs r2, r2, #1 - ldrusr r3, r1, 1, pl - bmi 2f - strb r3, [r0], #1 - teq r3, #0 - bne 1b - sub r1, r1, #1 @ take NUL character out of count -2: sub r0, r1, ip - mov pc, lr -ENDPROC(__strncpy_from_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r3, #0 - strb r3, [r0, #0] @ null terminate - mov r0, #-EFAULT - mov pc, lr - .popsection - diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S deleted file mode 100644 index 0ecbb459c4f..00000000000 --- a/arch/arm/lib/strnlen_user.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * linux/arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n + 1 if too long - */ -ENTRY(__strnlen_user) - mov r2, r0 -1: - ldrusr r3, r0, 1 - teq r3, #0 - beq 2f - subs r1, r1, #1 - bne 1b - add r0, r0, #1 -2: sub r0, r0, r2 - mov pc, lr -ENDPROC(__strnlen_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r0, #0 - mov pc, lr - .popsection diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S index 5c98dc567f0..4becdc3a59c 100644 --- a/arch/arm/lib/testchangebit.S +++ b/arch/arm/lib/testchangebit.S @@ -12,9 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_change_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_change_bit_le) - testop eor, strb -ENDPROC(_test_and_change_bit_be) -ENDPROC(_test_and_change_bit_le) +testop _test_and_change_bit, eor, str diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 543d7094d18..918841dcce7 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -12,9 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_clear_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_clear_bit_le) - testop bicne, strneb -ENDPROC(_test_and_clear_bit_be) -ENDPROC(_test_and_clear_bit_le) +testop _test_and_clear_bit, bicne, strne diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 0b3f390401c..8d1b2fe9e48 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -12,9 +12,4 @@ #include "bitops.h" .text -ENTRY(_test_and_set_bit_be) - eor r0, r0, #0x18 @ big endian byte ordering -ENTRY(_test_and_set_bit_le) - testop orreq, streqb -ENDPROC(_test_and_set_bit_be) -ENDPROC(_test_and_set_bit_le) +testop _test_and_set_bit, orreq, streq diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index d0ece2aeb70..e50520904b7 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -32,11 +32,11 @@ rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned @@ -59,7 +59,7 @@ ENTRY(__copy_to_user) addmi ip, r2, #4 bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -88,18 +88,18 @@ USER( T(str) r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_0fupi .Lc2u_0nowords: teq ip, #0 beq .Lc2u_finished .Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_not_enough: @@ -117,10 +117,10 @@ USER( T(strgtb) r3, [r0], #1) @ May fault .Lc2u_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_1nowords - mov r3, r7, pull #8 + mov r3, r7, lspull #8 ldr r7, [r1], #4 - orr r3, r3, r7, push #24 -USER( T(str) r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #24 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -131,51 +131,51 @@ USER( T(str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_1rem8lp -.Lc2u_1cpy8lp: mov r3, r7, pull #8 +.Lc2u_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_1cpy8lp .Lc2u_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #24 - T(strne) r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #24 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi .Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_2nowords - mov r3, r7, pull #16 + mov r3, r7, lspull #16 ldr r7, [r1], #4 - orr r3, r3, r7, push #16 -USER( T(str) r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #16 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -186,51 +186,51 @@ USER( T(str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_2rem8lp -.Lc2u_2cpy8lp: mov r3, r7, pull #16 +.Lc2u_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_2cpy8lp .Lc2u_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #16 - T(strne) r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #16 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi .Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lc2u_3nowords - mov r3, r7, pull #24 + mov r3, r7, lspull #24 ldr r7, [r1], #4 - orr r3, r3, r7, push #8 -USER( T(str) r3, [r0], #4) @ May fault + orr r3, r3, r7, lspush #8 +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -241,42 +241,42 @@ USER( T(str) r3, [r0], #4) @ May fault subs ip, ip, #16 blt .Lc2u_3rem8lp -.Lc2u_3cpy8lp: mov r3, r7, pull #24 +.Lc2u_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} subs ip, ip, #16 - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} @ Shouldnt fault bpl .Lc2u_3cpy8lp .Lc2u_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldrne r7, [r1], #4 - orrne r3, r3, r7, push #8 - T(strne) r3, [r0], #4 @ Shouldnt fault + orrne r3, r3, r7, lspush #8 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi .Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished ENDPROC(__copy_to_user) @@ -295,11 +295,11 @@ ENDPROC(__copy_to_user) .Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip b .Lcfu_dest_aligned @@ -322,7 +322,7 @@ ENTRY(__copy_from_user) .Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_0nowords -USER( T(ldr) r3, [r1], #4) +USER( TUSER( ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 @@ -351,18 +351,18 @@ USER( T(ldr) r3, [r1], #4) ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - T(ldrne) r3, [r1], #4 @ Shouldnt fault + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_0fupi .Lcfu_0nowords: teq ip, #0 beq .Lcfu_finished .Lcfu_nowords: cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -375,16 +375,16 @@ USER( T(ldrgtb) r3, [r1], #1) @ May fault .Lcfu_src_not_aligned: bic r1, r1, #3 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault cmp ip, #2 bgt .Lcfu_3fupi beq .Lcfu_2fupi .Lcfu_1fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_1nowords - mov r3, r7, pull #8 -USER( T(ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #24 + mov r3, r7, lspull #8 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -396,30 +396,30 @@ USER( T(ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_1rem8lp -.Lcfu_1cpy8lp: mov r3, r7, pull #8 +.Lcfu_1cpy8lp: mov r3, r7, lspull #8 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #24 - mov r4, r4, pull #8 - orr r4, r4, r5, push #24 - mov r5, r5, pull #8 - orr r5, r5, r6, push #24 - mov r6, r6, pull #8 - orr r6, r6, r7, push #24 + orr r3, r3, r4, lspush #24 + mov r4, r4, lspull #8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 stmia r0!, {r3 - r6} bpl .Lcfu_1cpy8lp .Lcfu_1rem8lp: tst ip, #8 - movne r3, r7, pull #8 + movne r3, r7, lspull #8 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #24 - movne r4, r4, pull #8 - orrne r4, r4, r7, push #24 + orrne r3, r3, r4, lspush #24 + movne r4, r4, lspull #8 + orrne r4, r4, r7, lspush #24 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #8 -USER( T(ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #24 + movne r3, r7, lspull #8 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #24 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_1fupi @@ -437,9 +437,9 @@ USER( T(ldrne) r7, [r1], #4) @ May fault .Lcfu_2fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_2nowords - mov r3, r7, pull #16 -USER( T(ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #16 + mov r3, r7, lspull #16 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -452,30 +452,30 @@ USER( T(ldr) r7, [r1], #4) @ May fault blt .Lcfu_2rem8lp -.Lcfu_2cpy8lp: mov r3, r7, pull #16 +.Lcfu_2cpy8lp: mov r3, r7, lspull #16 ldmia r1!, {r4 - r7} @ Shouldnt fault subs ip, ip, #16 - orr r3, r3, r4, push #16 - mov r4, r4, pull #16 - orr r4, r4, r5, push #16 - mov r5, r5, pull #16 - orr r5, r5, r6, push #16 - mov r6, r6, pull #16 - orr r6, r6, r7, push #16 + orr r3, r3, r4, lspush #16 + mov r4, r4, lspull #16 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 stmia r0!, {r3 - r6} bpl .Lcfu_2cpy8lp .Lcfu_2rem8lp: tst ip, #8 - movne r3, r7, pull #16 + movne r3, r7, lspull #16 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #16 - movne r4, r4, pull #16 - orrne r4, r4, r7, push #16 + orrne r3, r3, r4, lspush #16 + movne r4, r4, lspull #16 + orrne r4, r4, r7, lspush #16 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #16 -USER( T(ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #16 + movne r3, r7, lspull #16 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #16 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_2fupi @@ -486,16 +486,16 @@ USER( T(ldrne) r7, [r1], #4) @ May fault strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #0) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished .Lcfu_3fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_3nowords - mov r3, r7, pull #24 -USER( T(ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, push #8 + mov r3, r7, lspull #24 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, lspush #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 @@ -507,30 +507,30 @@ USER( T(ldr) r7, [r1], #4) @ May fault subs ip, ip, #16 blt .Lcfu_3rem8lp -.Lcfu_3cpy8lp: mov r3, r7, pull #24 +.Lcfu_3cpy8lp: mov r3, r7, lspull #24 ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, push #8 - mov r4, r4, pull #24 - orr r4, r4, r5, push #8 - mov r5, r5, pull #24 - orr r5, r5, r6, push #8 - mov r6, r6, pull #24 - orr r6, r6, r7, push #8 + orr r3, r3, r4, lspush #8 + mov r4, r4, lspull #24 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 stmia r0!, {r3 - r6} subs ip, ip, #16 bpl .Lcfu_3cpy8lp .Lcfu_3rem8lp: tst ip, #8 - movne r3, r7, pull #24 + movne r3, r7, lspull #24 ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, push #8 - movne r4, r4, pull #24 - orrne r4, r4, r7, push #8 + orrne r3, r3, r4, lspush #8 + movne r4, r4, lspull #24 + orrne r4, r4, r7, lspush #8 stmneia r0!, {r3 - r4} tst ip, #4 - movne r3, r7, pull #24 -USER( T(ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, push #8 + movne r3, r7, lspull #24 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, lspush #8 strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_3fupi @@ -539,9 +539,9 @@ USER( T(ldrne) r7, [r1], #4) @ May fault beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished ENDPROC(__copy_from_user) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index e2d2f2cd0c4..3e58d710013 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -17,6 +17,8 @@ #include <linux/sched.h> #include <linux/hardirq.h> /* for in_atomic() */ #include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h> #include <asm/current.h> #include <asm/page.h> @@ -27,14 +29,47 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; - pmd = pmd_offset(pgd, addr); - if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) + pud = pud_offset(pgd, addr); + if (unlikely(pud_none(*pud) || pud_bad(*pud))) + return 0; + + pmd = pmd_offset(pud, addr); + if (unlikely(pmd_none(*pmd))) + return 0; + + /* + * A pmd can be bad if it refers to a HugeTLB or THP page. + * + * Both THP and HugeTLB pages have the same pmd layout + * and should not be manipulated by the pte functions. + * + * Lock the page table for the destination and check + * to see that it's still huge and whether or not we will + * need to fault on write, or if we have a splitting THP. + */ + if (unlikely(pmd_thp_or_huge(*pmd))) { + ptl = ¤t->mm->page_table_lock; + spin_lock(ptl); + if (unlikely(!pmd_thp_or_huge(*pmd) + || pmd_hugewillfault(*pmd) + || pmd_trans_splitting(*pmd))) { + spin_unlock(ptl); + return 0; + } + + *ptep = NULL; + *ptlp = ptl; + return 1; + } + + if (unlikely(pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); @@ -88,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) from += tocopy; n -= tocopy; - pte_unmap_unlock(pte, ptl); + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); } if (!atomic) up_read(¤t->mm->mmap_sem); @@ -141,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n) addr += tocopy; n -= tocopy; - pte_unmap_unlock(pte, ptl); + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); } up_read(¤t->mm->mmap_sem); diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c new file mode 100644 index 00000000000..2c40aeab3ea --- /dev/null +++ b/arch/arm/lib/xor-neon.c @@ -0,0 +1,46 @@ +/* + * linux/arch/arm/lib/xor-neon.c + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); + +#ifndef __ARM_NEON__ +#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#endif + +/* + * Pull in the reference implementations while instructing GCC (through + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit + * NEON instructions. + */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC optimize "tree-vectorize" +#else +/* + * While older versions of GCC do not generate incorrect code, they fail to + * recognize the parallel nature of these functions, and emit plain ARM code, + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + */ +#warning This code requires at least version 4.6 of GCC +#endif + +#pragma GCC diagnostic ignored "-Wunused-variable" +#include <asm-generic/xor.h> + +struct xor_block_template const xor_block_neon_inner = { + .name = "__inner_neon__", + .do_2 = xor_8regs_2, + .do_3 = xor_8regs_3, + .do_4 = xor_8regs_4, + .do_5 = xor_8regs_5, +}; +EXPORT_SYMBOL(xor_block_neon_inner); |
