diff options
Diffstat (limited to 'arch/arm/lib')
35 files changed, 720 insertions, 733 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 59ff42ddf0a..0573faab96a 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,14 +6,14 @@  lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \  		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ -		   delay.o findbit.o memchr.o memcpy.o		      \ +		   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \  		   memmove.o memset.o memzero.o setbit.o              \ -		   strncpy_from_user.o strnlen_user.o                 \  		   strchr.o strrchr.o                                 \  		   testchangebit.o testclearbit.o testsetbit.o        \  		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \ -		   ucmpdi2.o lib1funcs.o div64.o sha1.o               \ -		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o +		   ucmpdi2.o lib1funcs.o div64.o                      \ +		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \ +		   call_with_stack.o bswapsdi2.o  mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o @@ -41,7 +41,12 @@ else  endif  lib-$(CONFIG_ARCH_RPC)		+= ecard.o io-acorn.o floppydma.o -lib-$(CONFIG_ARCH_SHARK)	+= io-shark.o  $(obj)/csumpartialcopy.o:	$(obj)/csumpartialcopygeneric.S  $(obj)/csumpartialcopyuser.o:	$(obj)/csumpartialcopygeneric.S + +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) +  NEON_FLAGS			:= -mfloat-abi=softfp -mfpu=neon +  CFLAGS_xor-neon.o		+= $(NEON_FLAGS) +  obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o +endif diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index a673297b0cf..4102be617fc 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -22,15 +22,10 @@  #define mask	r7  #define offset	r8 -ENTRY(__backtrace) -		mov	r1, #0x10 -		mov	r0, fp -  ENTRY(c_backtrace)  #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)  		mov	pc, lr -ENDPROC(__backtrace)  ENDPROC(c_backtrace)  #else  		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location... @@ -85,14 +80,14 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions  		ldr	r1, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,  		ldr	r3, .Ldsi+4 -		teq	r3, r1, lsr #10 +		teq	r3, r1, lsr #11  		ldreq	r0, [frame, #-8]	@ get sp  		subeq	r0, r0, #4		@ point at the last arg  		bleq	.Ldumpstm		@ dump saved registers  1004:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, ip, lr, pc}  		ldr	r3, .Ldsi		@ instruction exists, -		teq	r3, r1, lsr #10 +		teq	r3, r1, lsr #11  		subeq	r0, frame, #16  		bleq	.Ldumpstm		@ dump saved registers @@ -107,7 +102,6 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions  		mov	r1, frame  		bl	printk  no_frame:	ldmfd	sp!, {r4 - r8, pc} -ENDPROC(__backtrace)  ENDPROC(c_backtrace)  		.pushsection __ex_table,"a" @@ -134,11 +128,11 @@ ENDPROC(c_backtrace)  		beq	2f  		add	r7, r7, #1  		teq	r7, #6 -		moveq	r7, #1 -		moveq	r1, #'\n' -		movne	r1, #' ' -		ldr	r3, [stack], #-4 -		mov	r2, reg +		moveq	r7, #0 +		adr	r3, .Lcr +		addne	r3, r3, #1		@ skip newline +		ldr	r2, [stack], #-4 +		mov	r1, reg  		adr	r0, .Lfp  		bl	printk  2:		subs	reg, reg, #1 @@ -148,11 +142,11 @@ ENDPROC(c_backtrace)  		blne	printk  		ldmfd	sp!, {instr, reg, stack, r7, pc} -.Lfp:		.asciz	"%cr%d:%08x" +.Lfp:		.asciz	" r%d:%08x%s"  .Lcr:		.asciz	"\n"  .Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"  		.align -.Ldsi:		.word	0xe92dd800 >> 10	@ stmfd sp!, {... fp, ip, lr, pc} -		.word	0xe92d0000 >> 10	@ stmfd sp!, {} +.Ldsi:		.word	0xe92dd800 >> 11	@ stmfd sp!, {... fp, ip, lr, pc} +		.word	0xe92d0000 >> 11	@ stmfd sp!, {}  #endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index d42252918bf..9f12ed1eea8 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,46 +1,78 @@ +#include <asm/unwind.h> -#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K) -	.macro	bitop, instr +#if __LINUX_ARM_ARCH__ >= 6 +	.macro	bitop, name, instr +ENTRY(	\name		) +UNWIND(	.fnstart	) +	ands	ip, r1, #3 +	strneb	r1, [ip]		@ assert word-aligned  	mov	r2, #1 -	and	r3, r0, #7		@ Get bit offset -	add	r1, r1, r0, lsr #3	@ Get byte offset +	and	r3, r0, #31		@ Get bit offset +	mov	r0, r0, lsr #5 +	add	r1, r1, r0, lsl #2	@ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) +	.arch_extension	mp +	ALT_SMP(W(pldw)	[r1]) +	ALT_UP(W(nop)) +#endif  	mov	r3, r2, lsl r3 -1:	ldrexb	r2, [r1] +1:	ldrex	r2, [r1]  	\instr	r2, r2, r3 -	strexb	r0, r2, [r1] +	strex	r0, r2, [r1]  	cmp	r0, #0  	bne	1b -	mov	pc, lr +	bx	lr +UNWIND(	.fnend		) +ENDPROC(\name		)  	.endm -	.macro	testop, instr, store -	and	r3, r0, #7		@ Get bit offset +	.macro	testop, name, instr, store +ENTRY(	\name		) +UNWIND(	.fnstart	) +	ands	ip, r1, #3 +	strneb	r1, [ip]		@ assert word-aligned  	mov	r2, #1 -	add	r1, r1, r0, lsr #3	@ Get byte offset +	and	r3, r0, #31		@ Get bit offset +	mov	r0, r0, lsr #5 +	add	r1, r1, r0, lsl #2	@ Get word offset  	mov	r3, r2, lsl r3		@ create mask  	smp_dmb -1:	ldrexb	r2, [r1] +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) +	.arch_extension	mp +	ALT_SMP(W(pldw)	[r1]) +	ALT_UP(W(nop)) +#endif +1:	ldrex	r2, [r1]  	ands	r0, r2, r3		@ save old value of bit -	\instr	r2, r2, r3			@ toggle bit -	strexb	ip, r2, [r1] +	\instr	r2, r2, r3		@ toggle bit +	strex	ip, r2, [r1]  	cmp	ip, #0  	bne	1b  	smp_dmb  	cmp	r0, #0  	movne	r0, #1 -2:	mov	pc, lr +2:	bx	lr +UNWIND(	.fnend		) +ENDPROC(\name		)  	.endm  #else -	.macro	bitop, instr -	and	r2, r0, #7 +	.macro	bitop, name, instr +ENTRY(	\name		) +UNWIND(	.fnstart	) +	ands	ip, r1, #3 +	strneb	r1, [ip]		@ assert word-aligned +	and	r2, r0, #31 +	mov	r0, r0, lsr #5  	mov	r3, #1  	mov	r3, r3, lsl r2  	save_and_disable_irqs ip -	ldrb	r2, [r1, r0, lsr #3] +	ldr	r2, [r1, r0, lsl #2]  	\instr	r2, r2, r3 -	strb	r2, [r1, r0, lsr #3] +	str	r2, [r1, r0, lsl #2]  	restore_irqs ip  	mov	pc, lr +UNWIND(	.fnend		) +ENDPROC(\name		)  	.endm  /** @@ -51,17 +83,23 @@   * Note: we can trivially conditionalise the store instruction   * to avoid dirtying the data cache.   */ -	.macro	testop, instr, store -	add	r1, r1, r0, lsr #3 -	and	r3, r0, #7 -	mov	r0, #1 +	.macro	testop, name, instr, store +ENTRY(	\name		) +UNWIND(	.fnstart	) +	ands	ip, r1, #3 +	strneb	r1, [ip]		@ assert word-aligned +	and	r3, r0, #31 +	mov	r0, r0, lsr #5  	save_and_disable_irqs ip -	ldrb	r2, [r1] +	ldr	r2, [r1, r0, lsl #2]! +	mov	r0, #1  	tst	r2, r0, lsl r3  	\instr	r2, r2, r0, lsl r3  	\store	r2, [r1]  	moveq	r0, #0  	restore_irqs ip  	mov	pc, lr +UNWIND(	.fnend		) +ENDPROC(\name		)  	.endm  #endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S new file mode 100644 index 00000000000..9fcdd154eff --- /dev/null +++ b/arch/arm/lib/bswapsdi2.S @@ -0,0 +1,36 @@ +#include <linux/linkage.h> + +#if __LINUX_ARM_ARCH__ >= 6 +ENTRY(__bswapsi2) +	rev r0, r0 +	bx lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) +	rev r3, r0 +	rev r0, r1 +	mov r1, r3 +	bx lr +ENDPROC(__bswapdi2) +#else +ENTRY(__bswapsi2) +	eor r3, r0, r0, ror #16 +	mov r3, r3, lsr #8 +	bic r3, r3, #0xff00 +	eor r0, r3, r0, ror #8 +	mov pc, lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) +	mov ip, r1 +	eor r3, ip, ip, ror #16 +	eor r1, r0, r0, ror #16 +	mov r1, r1, lsr #8 +	mov r3, r3, lsr #8 +	bic r3, r3, #0xff00 +	bic r1, r1, #0xff00 +	eor r1, r1, r0, ror #8 +	eor r0, r3, ip, ror #8 +	mov pc, lr +ENDPROC(__bswapdi2) +#endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S new file mode 100644 index 00000000000..916c80f13ae --- /dev/null +++ b/arch/arm/lib/call_with_stack.S @@ -0,0 +1,44 @@ +/* + * arch/arm/lib/call_with_stack.S + * + * Copyright (C) 2011 ARM Ltd. + * Written by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * void call_with_stack(void (*fn)(void *), void *arg, void *sp) + * + * Change the stack to that pointed at by sp, then invoke fn(arg) with + * the new stack. + */ +ENTRY(call_with_stack) +	str	sp, [r2, #-4]! +	str	lr, [r2, #-4]! + +	mov	sp, r2 +	mov	r2, r0 +	mov	r0, r1 + +	adr	lr, BSYM(1f) +	mov	pc, r2 + +1:	ldr	lr, [sp] +	ldr	sp, [sp, #4] +	mov	pc, lr +ENDPROC(call_with_stack) diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S index 80f3115cbee..f4027862172 100644 --- a/arch/arm/lib/changebit.S +++ b/arch/arm/lib/changebit.S @@ -12,12 +12,4 @@  #include "bitops.h"                  .text -/* Purpose  : Function to change a bit - * Prototype: int change_bit(int bit, void *addr) - */ -ENTRY(_change_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_change_bit_le) -	bitop	eor -ENDPROC(_change_bit_be) -ENDPROC(_change_bit_le) +bitop	_change_bit, eor diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S index 1a63e43a1df..f6b75fb64d3 100644 --- a/arch/arm/lib/clearbit.S +++ b/arch/arm/lib/clearbit.S @@ -12,13 +12,4 @@  #include "bitops.h"                  .text -/* - * Purpose  : Function to clear a bit - * Prototype: int clear_bit(int bit, void *addr) - */ -ENTRY(_clear_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_clear_bit_le) -	bitop	bic -ENDPROC(_clear_bit_be) -ENDPROC(_clear_bit_le) +bitop	_clear_bit, bic diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb00..3bc8eb811a7 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -197,24 +197,24 @@  12:	PLD(	pld	[r1, #124]		)  13:		ldr4w	r1, r4, r5, r6, r7, abort=19f -		mov	r3, lr, pull #\pull +		mov	r3, lr, lspull #\pull  		subs	r2, r2, #32  		ldr4w	r1, r8, r9, ip, lr, abort=19f -		orr	r3, r3, r4, push #\push -		mov	r4, r4, pull #\pull -		orr	r4, r4, r5, push #\push -		mov	r5, r5, pull #\pull -		orr	r5, r5, r6, push #\push -		mov	r6, r6, pull #\pull -		orr	r6, r6, r7, push #\push -		mov	r7, r7, pull #\pull -		orr	r7, r7, r8, push #\push -		mov	r8, r8, pull #\pull -		orr	r8, r8, r9, push #\push -		mov	r9, r9, pull #\pull -		orr	r9, r9, ip, push #\push -		mov	ip, ip, pull #\pull -		orr	ip, ip, lr, push #\push +		orr	r3, r3, r4, lspush #\push +		mov	r4, r4, lspull #\pull +		orr	r4, r4, r5, lspush #\push +		mov	r5, r5, lspull #\pull +		orr	r5, r5, r6, lspush #\push +		mov	r6, r6, lspull #\pull +		orr	r6, r6, r7, lspush #\push +		mov	r7, r7, lspull #\pull +		orr	r7, r7, r8, lspush #\push +		mov	r8, r8, lspull #\pull +		orr	r8, r8, r9, lspush #\push +		mov	r9, r9, lspull #\pull +		orr	r9, r9, ip, lspush #\push +		mov	ip, ip, lspull #\pull +		orr	ip, ip, lr, lspush #\push  		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f  		bge	12b  	PLD(	cmn	r2, #96			) @@ -225,10 +225,10 @@  14:		ands	ip, r2, #28  		beq	16f -15:		mov	r3, lr, pull #\pull +15:		mov	r3, lr, lspull #\pull  		ldr1w	r1, lr, abort=21f  		subs	ip, ip, #4 -		orr	r3, r3, lr, push #\push +		orr	r3, r3, lr, lspush #\push  		str1w	r0, r3, abort=21f  		bgt	15b  	CALGN(	cmp	r2, #0			) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a0..d6e742d2400 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -141,7 +141,7 @@ FN_ENTRY  		tst	len, #2  		mov	r5, r4, get_byte_0  		beq	.Lexit -		adcs	sum, sum, r4, push #16 +		adcs	sum, sum, r4, lspush #16  		strb	r5, [dst], #1  		mov	r5, r4, get_byte_1  		strb	r5, [dst], #1 @@ -171,23 +171,23 @@ FN_ENTRY  		cmp	ip, #2  		beq	.Lsrc2_aligned  		bhi	.Lsrc3_aligned -		mov	r4, r5, pull #8		@ C = 0 +		mov	r4, r5, lspull #8		@ C = 0  		bics	ip, len, #15  		beq	2f  1:		load4l	r5, r6, r7, r8 -		orr	r4, r4, r5, push #24 -		mov	r5, r5, pull #8 -		orr	r5, r5, r6, push #24 -		mov	r6, r6, pull #8 -		orr	r6, r6, r7, push #24 -		mov	r7, r7, pull #8 -		orr	r7, r7, r8, push #24 +		orr	r4, r4, r5, lspush #24 +		mov	r5, r5, lspull #8 +		orr	r5, r5, r6, lspush #24 +		mov	r6, r6, lspull #8 +		orr	r6, r6, r7, lspush #24 +		mov	r7, r7, lspull #8 +		orr	r7, r7, r8, lspush #24  		stmia	dst!, {r4, r5, r6, r7}  		adcs	sum, sum, r4  		adcs	sum, sum, r5  		adcs	sum, sum, r6  		adcs	sum, sum, r7 -		mov	r4, r8, pull #8 +		mov	r4, r8, lspull #8  		sub	ip, ip, #16  		teq	ip, #0  		bne	1b @@ -196,50 +196,50 @@ FN_ENTRY  		tst	ip, #8  		beq	3f  		load2l	r5, r6 -		orr	r4, r4, r5, push #24 -		mov	r5, r5, pull #8 -		orr	r5, r5, r6, push #24 +		orr	r4, r4, r5, lspush #24 +		mov	r5, r5, lspull #8 +		orr	r5, r5, r6, lspush #24  		stmia	dst!, {r4, r5}  		adcs	sum, sum, r4  		adcs	sum, sum, r5 -		mov	r4, r6, pull #8 +		mov	r4, r6, lspull #8  		tst	ip, #4  		beq	4f  3:		load1l	r5 -		orr	r4, r4, r5, push #24 +		orr	r4, r4, r5, lspush #24  		str	r4, [dst], #4  		adcs	sum, sum, r4 -		mov	r4, r5, pull #8 +		mov	r4, r5, lspull #8  4:		ands	len, len, #3  		beq	.Ldone  		mov	r5, r4, get_byte_0  		tst	len, #2  		beq	.Lexit -		adcs	sum, sum, r4, push #16 +		adcs	sum, sum, r4, lspush #16  		strb	r5, [dst], #1  		mov	r5, r4, get_byte_1  		strb	r5, [dst], #1  		mov	r5, r4, get_byte_2  		b	.Lexit -.Lsrc2_aligned:	mov	r4, r5, pull #16 +.Lsrc2_aligned:	mov	r4, r5, lspull #16  		adds	sum, sum, #0  		bics	ip, len, #15  		beq	2f  1:		load4l	r5, r6, r7, r8 -		orr	r4, r4, r5, push #16 -		mov	r5, r5, pull #16 -		orr	r5, r5, r6, push #16 -		mov	r6, r6, pull #16 -		orr	r6, r6, r7, push #16 -		mov	r7, r7, pull #16 -		orr	r7, r7, r8, push #16 +		orr	r4, r4, r5, lspush #16 +		mov	r5, r5, lspull #16 +		orr	r5, r5, r6, lspush #16 +		mov	r6, r6, lspull #16 +		orr	r6, r6, r7, lspush #16 +		mov	r7, r7, lspull #16 +		orr	r7, r7, r8, lspush #16  		stmia	dst!, {r4, r5, r6, r7}  		adcs	sum, sum, r4  		adcs	sum, sum, r5  		adcs	sum, sum, r6  		adcs	sum, sum, r7 -		mov	r4, r8, pull #16 +		mov	r4, r8, lspull #16  		sub	ip, ip, #16  		teq	ip, #0  		bne	1b @@ -248,20 +248,20 @@ FN_ENTRY  		tst	ip, #8  		beq	3f  		load2l	r5, r6 -		orr	r4, r4, r5, push #16 -		mov	r5, r5, pull #16 -		orr	r5, r5, r6, push #16 +		orr	r4, r4, r5, lspush #16 +		mov	r5, r5, lspull #16 +		orr	r5, r5, r6, lspush #16  		stmia	dst!, {r4, r5}  		adcs	sum, sum, r4  		adcs	sum, sum, r5 -		mov	r4, r6, pull #16 +		mov	r4, r6, lspull #16  		tst	ip, #4  		beq	4f  3:		load1l	r5 -		orr	r4, r4, r5, push #16 +		orr	r4, r4, r5, lspush #16  		str	r4, [dst], #4  		adcs	sum, sum, r4 -		mov	r4, r5, pull #16 +		mov	r4, r5, lspull #16  4:		ands	len, len, #3  		beq	.Ldone  		mov	r5, r4, get_byte_0 @@ -276,24 +276,24 @@ FN_ENTRY  		load1b	r5  		b	.Lexit -.Lsrc3_aligned:	mov	r4, r5, pull #24 +.Lsrc3_aligned:	mov	r4, r5, lspull #24  		adds	sum, sum, #0  		bics	ip, len, #15  		beq	2f  1:		load4l	r5, r6, r7, r8 -		orr	r4, r4, r5, push #8 -		mov	r5, r5, pull #24 -		orr	r5, r5, r6, push #8 -		mov	r6, r6, pull #24 -		orr	r6, r6, r7, push #8 -		mov	r7, r7, pull #24 -		orr	r7, r7, r8, push #8 +		orr	r4, r4, r5, lspush #8 +		mov	r5, r5, lspull #24 +		orr	r5, r5, r6, lspush #8 +		mov	r6, r6, lspull #24 +		orr	r6, r6, r7, lspush #8 +		mov	r7, r7, lspull #24 +		orr	r7, r7, r8, lspush #8  		stmia	dst!, {r4, r5, r6, r7}  		adcs	sum, sum, r4  		adcs	sum, sum, r5  		adcs	sum, sum, r6  		adcs	sum, sum, r7 -		mov	r4, r8, pull #24 +		mov	r4, r8, lspull #24  		sub	ip, ip, #16  		teq	ip, #0  		bne	1b @@ -302,20 +302,20 @@ FN_ENTRY  		tst	ip, #8  		beq	3f  		load2l	r5, r6 -		orr	r4, r4, r5, push #8 -		mov	r5, r5, pull #24 -		orr	r5, r5, r6, push #8 +		orr	r4, r4, r5, lspush #8 +		mov	r5, r5, lspull #24 +		orr	r5, r5, r6, lspush #8  		stmia	dst!, {r4, r5}  		adcs	sum, sum, r4  		adcs	sum, sum, r5 -		mov	r4, r6, pull #24 +		mov	r4, r6, lspull #24  		tst	ip, #4  		beq	4f  3:		load1l	r5 -		orr	r4, r4, r5, push #8 +		orr	r4, r4, r5, lspush #8  		str	r4, [dst], #4  		adcs	sum, sum, r4 -		mov	r4, r5, pull #24 +		mov	r4, r5, lspull #24  4:		ands	len, len, #3  		beq	.Ldone  		mov	r5, r4, get_byte_0 @@ -326,7 +326,7 @@ FN_ENTRY  		load1l	r4  		mov	r5, r4, get_byte_0  		strb	r5, [dst], #1 -		adcs	sum, sum, r4, push #24 +		adcs	sum, sum, r4, lspush #24  		mov	r5, r4, get_byte_1  		b	.Lexit  FN_EXIT diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay-loop.S index 8d6a8762ab8..bc1033b897b 100644 --- a/arch/arm/lib/delay.S +++ b/arch/arm/lib/delay-loop.S @@ -9,11 +9,11 @@   */  #include <linux/linkage.h>  #include <asm/assembler.h> -#include <asm/param.h> +#include <asm/delay.h>  		.text  .LC0:		.word	loops_per_jiffy -.LC1:		.word	(2199023*HZ)>>11 +.LC1:		.word	UDELAY_MULT  /*   * r0  <= 2000 @@ -21,26 +21,29 @@   * HZ  <= 1000   */ -ENTRY(__udelay) +ENTRY(__loop_udelay)  		ldr	r2, .LC1  		mul	r0, r2, r0 -ENTRY(__const_udelay)				@ 0 <= r0 <= 0x7fffff06 +ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06 +		mov	r1, #-1  		ldr	r2, .LC0  		ldr	r2, [r2]		@ max = 0x01ffffff +		add	r0, r0, r1, lsr #32-14  		mov	r0, r0, lsr #14		@ max = 0x0001ffff +		add	r2, r2, r1, lsr #32-10  		mov	r2, r2, lsr #10		@ max = 0x00007fff  		mul	r0, r2, r0		@ max = 2^32-1 +		add	r0, r0, r1, lsr #32-6  		movs	r0, r0, lsr #6  		moveq	pc, lr  /*   * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * Oh, if only we had a cycle counter...   */ +		.align 3  @ Delay routine -ENTRY(__delay) +ENTRY(__loop_delay)  		subs	r0, r0, #1  #if 0  		movls	pc, lr @@ -58,8 +61,8 @@ ENTRY(__delay)  		movls	pc, lr  		subs	r0, r0, #1  #endif -		bhi	__delay +		bhi	__loop_delay  		mov	pc, lr -ENDPROC(__udelay) -ENDPROC(__const_udelay) -ENDPROC(__delay) +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 00000000000..5306de35013 --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,93 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timex.h> + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { +	.delay		= __loop_delay, +	.const_udelay	= __loop_const_udelay, +	.udelay		= __loop_udelay, +}; + +static const struct delay_timer *delay_timer; +static bool delay_calibrated; + +int read_current_timer(unsigned long *timer_val) +{ +	if (!delay_timer) +		return -ENXIO; + +	*timer_val = delay_timer->read_current_timer(); +	return 0; +} +EXPORT_SYMBOL_GPL(read_current_timer); + +static void __timer_delay(unsigned long cycles) +{ +	cycles_t start = get_cycles(); + +	while ((get_cycles() - start) < cycles) +		cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ +	unsigned long long loops = xloops; +	loops *= arm_delay_ops.ticks_per_jiffy; +	__timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ +	__timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init register_current_timer_delay(const struct delay_timer *timer) +{ +	if (!delay_calibrated) { +		pr_info("Switching to timer-based delay loop\n"); +		delay_timer			= timer; +		lpj_fine			= timer->freq / HZ; + +		/* cpufreq may scale loops_per_jiffy, so keep a private copy */ +		arm_delay_ops.ticks_per_jiffy	= lpj_fine; +		arm_delay_ops.delay		= __timer_delay; +		arm_delay_ops.const_udelay	= __timer_const_udelay; +		arm_delay_ops.udelay		= __timer_udelay; + +		delay_calibrated		= true; +	} else { +		pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); +	} +} + +unsigned long calibrate_delay_is_known(void) +{ +	delay_calibrated = true; +	return lpj_fine; +} diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index faa7748142d..e55c4842c29 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -13,6 +13,7 @@   */  #include <linux/linkage.h> +#include <asm/unwind.h>  #ifdef __ARMEB__  #define xh r0 @@ -44,6 +45,7 @@   */  ENTRY(__do_div64) +UNWIND(.fnstart)  	@ Test for easy paths first.  	subs	ip, r4, #1 @@ -189,7 +191,12 @@ ENTRY(__do_div64)  	moveq	yh, xh  	moveq	xh, #0  	moveq	pc, lr +UNWIND(.fnend) +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64:  	@ Division by 0:  	str	lr, [sp, #-8]!  	bl	__div0 @@ -200,4 +207,5 @@ ENTRY(__do_div64)  	mov	xh, #0  	ldr	pc, [sp], #8 +UNWIND(.fnend)  ENDPROC(__do_div64) diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S index 8678eb2b7a6..e6057fa851b 100644 --- a/arch/arm/lib/ecard.S +++ b/arch/arm/lib/ecard.S @@ -12,7 +12,6 @@   */  #include <linux/linkage.h>  #include <asm/assembler.h> -#include <mach/hardware.h>  #define CPSR2SPSR(rt) \  		mrs	rt, cpsr; \ diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 1e4cbd4e7be..64f6bc1a913 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -174,8 +174,8 @@ ENDPROC(_find_next_bit_be)   */  .L_found:  #if __LINUX_ARM_ARCH__ >= 5 -		rsb	r1, r3, #0 -		and	r3, r3, r1 +		rsb	r0, r3, #0 +		and	r3, r3, r0  		clz	r3, r3  		rsb	r3, r3, #31  		add	r0, r2, r3 @@ -190,5 +190,7 @@ ENDPROC(_find_next_bit_be)  		addeq	r2, r2, #1  		mov	r0, r2  #endif +		cmp	r1, r0			@ Clamp to maxbit +		movlo	r0, r1  		mov	pc, lr diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index b1631a7dbe7..9b06bb41fca 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,8 +16,9 @@   * __get_user_X   *   * Inputs:	r0 contains the address + *		r1 contains the address limit, which must be preserved   * Outputs:	r0 is the error code - *		r2, r3 contains the zero-extended value + *		r2 contains the zero-extended value   *		lr corrupted   *   * No other registers must be altered.  (see <asm/uaccess.h> @@ -27,33 +28,40 @@   * Note also that it is intended that __get_user_bad is not global.   */  #include <linux/linkage.h> +#include <asm/assembler.h>  #include <asm/errno.h> +#include <asm/domain.h>  ENTRY(__get_user_1) -1:	ldrbt	r2, [r0] +	check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb)	r2, [r0]  	mov	r0, #0  	mov	pc, lr  ENDPROC(__get_user_1)  ENTRY(__get_user_2) -#ifdef CONFIG_THUMB2_KERNEL -2:	ldrbt	r2, [r0] -3:	ldrbt	r3, [r0, #1] -#else +	check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb	.req	ip  2:	ldrbt	r2, [r0], #1 -3:	ldrbt	r3, [r0] +3:	ldrbt	rb, [r0], #0 +#else +rb	.req	r0 +2:	ldrb	r2, [r0] +3:	ldrb	rb, [r0, #1]  #endif  #ifndef __ARMEB__ -	orr	r2, r2, r3, lsl #8 +	orr	r2, r2, rb, lsl #8  #else -	orr	r2, r3, r2, lsl #8 +	orr	r2, rb, r2, lsl #8  #endif  	mov	r0, #0  	mov	pc, lr  ENDPROC(__get_user_2)  ENTRY(__get_user_4) -4:	ldrt	r2, [r0] +	check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr)	r2, [r0]  	mov	r0, #0  	mov	pc, lr  ENDPROC(__get_user_4) diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S index 1b197ea7aab..69719bad674 100644 --- a/arch/arm/lib/io-acorn.S +++ b/arch/arm/lib/io-acorn.S @@ -11,13 +11,14 @@   *   */  #include <linux/linkage.h> +#include <linux/kern_levels.h>  #include <asm/assembler.h>  		.text  		.align  .Liosl_warning: -		.ascii	"<4>insl/outsl not implemented, called from %08lX\0" +		.ascii	KERN_WARNING "insl/outsl not implemented, called from %08lX\0"  		.align  /* diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4..7a7430950c7 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -47,25 +47,25 @@ ENTRY(__raw_readsl)  		strb	ip, [r1], #1  4:		subs	r2, r2, #1 -		mov	ip, r3, pull #24 +		mov	ip, r3, lspull #24  		ldrne	r3, [r0] -		orrne	ip, ip, r3, push #8 +		orrne	ip, ip, r3, lspush #8  		strne	ip, [r1], #4  		bne	4b  		b	8f  5:		subs	r2, r2, #1 -		mov	ip, r3, pull #16 +		mov	ip, r3, lspull #16  		ldrne	r3, [r0] -		orrne	ip, ip, r3, push #16 +		orrne	ip, ip, r3, lspush #16  		strne	ip, [r1], #4  		bne	5b  		b	7f  6:		subs	r2, r2, #1 -		mov	ip, r3, pull #8 +		mov	ip, r3, lspull #8  		ldrne	r3, [r0] -		orrne	ip, ip, r3, push #24 +		orrne	ip, ip, r3, lspush #24  		strne	ip, [r1], #4  		bne	6b diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 9aaf7c72065..88487c8c4f2 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -9,7 +9,6 @@   */  #include <linux/linkage.h>  #include <asm/assembler.h> -#include <mach/hardware.h>  .Linsw_bad_alignment:  		adr	r0, .Linsw_bad_align_msg diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c deleted file mode 100644 index 824253948f5..00000000000 --- a/arch/arm/lib/io-shark.c +++ /dev/null @@ -1,13 +0,0 @@ -/* - *  linux/arch/arm/lib/io-shark.c - * - *  by Alexander Schulz - * - * derived from: - * linux/arch/arm/lib/io-ebsa.S - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725..d0d104a0dd1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -41,26 +41,26 @@ ENTRY(__raw_writesl)  		blt	5f  		bgt	6f -4:		mov	ip, r3, pull #16 +4:		mov	ip, r3, lspull #16  		ldr	r3, [r1], #4  		subs	r2, r2, #1 -		orr	ip, ip, r3, push #16 +		orr	ip, ip, r3, lspush #16  		str	ip, [r0]  		bne	4b  		mov	pc, lr -5:		mov	ip, r3, pull #8 +5:		mov	ip, r3, lspull #8  		ldr	r3, [r1], #4  		subs	r2, r2, #1 -		orr	ip, ip, r3, push #24 +		orr	ip, ip, r3, lspush #24  		str	ip, [r0]  		bne	5b  		mov	pc, lr -6:		mov	ip, r3, pull #24 +6:		mov	ip, r3, lspull #24  		ldr	r3, [r1], #4  		subs	r2, r2, #1 -		orr	ip, ip, r3, push #8 +		orr	ip, ip, r3, lspush #8  		str	ip, [r0]  		bne	6b  		mov	pc, lr diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index cd34503e424..49b800419e3 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -9,7 +9,6 @@   */  #include <linux/linkage.h>  #include <asm/assembler.h> -#include <mach/hardware.h>  .Loutsw_bad_alignment:  		adr	r0, .Loutsw_bad_align_msg diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 6dc06487f3c..c562f649734 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA.  */  #include <linux/linkage.h>  #include <asm/assembler.h> - +#include <asm/unwind.h>  .macro ARM_DIV_BODY dividend, divisor, result, curbit @@ -207,6 +207,7 @@ Boston, MA 02111-1307, USA.  */  ENTRY(__udivsi3)  ENTRY(__aeabi_uidiv) +UNWIND(.fnstart)  	subs	r2, r1, #1  	moveq	pc, lr @@ -230,10 +231,12 @@ ENTRY(__aeabi_uidiv)  	mov	r0, r0, lsr r2  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__udivsi3)  ENDPROC(__aeabi_uidiv)  ENTRY(__umodsi3) +UNWIND(.fnstart)  	subs	r2, r1, #1			@ compare divisor with 1  	bcc	Ldiv0 @@ -247,10 +250,12 @@ ENTRY(__umodsi3)  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__umodsi3)  ENTRY(__divsi3)  ENTRY(__aeabi_idiv) +UNWIND(.fnstart)  	cmp	r1, #0  	eor	ip, r0, r1			@ save the sign of the result. @@ -287,10 +292,12 @@ ENTRY(__aeabi_idiv)  	rsbmi	r0, r0, #0  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__divsi3)  ENDPROC(__aeabi_idiv)  ENTRY(__modsi3) +UNWIND(.fnstart)  	cmp	r1, #0  	beq	Ldiv0 @@ -310,11 +317,14 @@ ENTRY(__modsi3)  	rsbmi	r0, r0, #0  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__modsi3)  #ifdef CONFIG_AEABI  ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr}	)  	stmfd	sp!, {r0, r1, ip, lr}  	bl	__aeabi_uidiv @@ -323,10 +333,12 @@ ENTRY(__aeabi_uidivmod)  	sub	r1, r1, r3  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__aeabi_uidivmod)  ENTRY(__aeabi_idivmod) - +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr}	)  	stmfd	sp!, {r0, r1, ip, lr}  	bl	__aeabi_idiv  	ldmfd	sp!, {r1, r2, ip, lr} @@ -334,15 +346,18 @@ ENTRY(__aeabi_idivmod)  	sub	r1, r1, r3  	mov	pc, lr +UNWIND(.fnend)  ENDPROC(__aeabi_idivmod)  #endif  Ldiv0: - +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr})  	str	lr, [sp, #-8]!  	bl	__div0  	mov	r0, #0			@ About as wrong as it could be.  	ldr	pc, [sp], #8 - - +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 938fc14f962..d1fc0c0c342 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -147,24 +147,24 @@ ENTRY(memmove)  12:	PLD(	pld	[r1, #-128]		)  13:		ldmdb   r1!, {r7, r8, r9, ip} -		mov     lr, r3, push #\push +		mov     lr, r3, lspush #\push  		subs    r2, r2, #32  		ldmdb   r1!, {r3, r4, r5, r6} -		orr     lr, lr, ip, pull #\pull -		mov     ip, ip, push #\push -		orr     ip, ip, r9, pull #\pull -		mov     r9, r9, push #\push -		orr     r9, r9, r8, pull #\pull -		mov     r8, r8, push #\push -		orr     r8, r8, r7, pull #\pull -		mov     r7, r7, push #\push -		orr     r7, r7, r6, pull #\pull -		mov     r6, r6, push #\push -		orr     r6, r6, r5, pull #\pull -		mov     r5, r5, push #\push -		orr     r5, r5, r4, pull #\pull -		mov     r4, r4, push #\push -		orr     r4, r4, r3, pull #\pull +		orr     lr, lr, ip, lspull #\pull +		mov     ip, ip, lspush #\push +		orr     ip, ip, r9, lspull #\pull +		mov     r9, r9, lspush #\push +		orr     r9, r9, r8, lspull #\pull +		mov     r8, r8, lspush #\push +		orr     r8, r8, r7, lspull #\pull +		mov     r7, r7, lspush #\push +		orr     r7, r7, r6, lspull #\pull +		mov     r6, r6, lspush #\push +		orr     r6, r6, r5, lspull #\pull +		mov     r5, r5, lspush #\push +		orr     r5, r5, r4, lspull #\pull +		mov     r4, r4, lspush #\push +		orr     r4, r4, r3, lspull #\pull  		stmdb   r0!, {r4 - r9, ip, lr}  		bge	12b  	PLD(	cmn	r2, #96			) @@ -175,10 +175,10 @@ ENTRY(memmove)  14:		ands	ip, r2, #28  		beq	16f -15:		mov     lr, r3, push #\push +15:		mov     lr, r3, lspush #\push  		ldr	r3, [r1, #-4]!  		subs	ip, ip, #4 -		orr	lr, lr, r3, pull #\pull +		orr	lr, lr, r3, lspull #\pull  		str	lr, [r0, #-4]!  		bgt	15b  	CALGN(	cmp	r2, #0			) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 650d5923ab8..94b0650ea98 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -14,27 +14,15 @@  	.text  	.align	5 -	.word	0 - -1:	subs	r2, r2, #4		@ 1 do we have enough -	blt	5f			@ 1 bytes to align with? -	cmp	r3, #2			@ 1 -	strltb	r1, [r0], #1		@ 1 -	strleb	r1, [r0], #1		@ 1 -	strb	r1, [r0], #1		@ 1 -	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted.  Try doing the - * memset again. - */  ENTRY(memset)  	ands	r3, r0, #3		@ 1 unaligned? -	bne	1b			@ 1 +	mov	ip, r0			@ preserve r0 as return value +	bne	6f			@ 1  /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary.   */ -	orr	r1, r1, r1, lsl #8 +1:	orr	r1, r1, r1, lsl #8  	orr	r1, r1, r1, lsl #16  	mov	r3, r1  	cmp	r2, #16 @@ -43,29 +31,28 @@ ENTRY(memset)  #if ! CALGN(1)+0  /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR   */ -	str	lr, [sp, #-4]! -	mov	ip, r1 +	stmfd	sp!, {r8, lr} +	mov	r8, r1  	mov	lr, r1  2:	subs	r2, r2, #64 -	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time. -	stmgeia	r0!, {r1, r3, ip, lr} -	stmgeia	r0!, {r1, r3, ip, lr} -	stmgeia	r0!, {r1, r3, ip, lr} +	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time. +	stmgeia	ip!, {r1, r3, r8, lr} +	stmgeia	ip!, {r1, r3, r8, lr} +	stmgeia	ip!, {r1, r3, r8, lr}  	bgt	2b -	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go. +	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.  /*   * No need to correct the count; we're only testing bits from now on   */  	tst	r2, #32 -	stmneia	r0!, {r1, r3, ip, lr} -	stmneia	r0!, {r1, r3, ip, lr} +	stmneia	ip!, {r1, r3, r8, lr} +	stmneia	ip!, {r1, r3, r8, lr}  	tst	r2, #16 -	stmneia	r0!, {r1, r3, ip, lr} -	ldr	lr, [sp], #4 +	stmneia	ip!, {r1, r3, r8, lr} +	ldmfd	sp!, {r8, lr}  #else @@ -74,54 +61,63 @@ ENTRY(memset)   * whole cache lines at once.   */ -	stmfd	sp!, {r4-r7, lr} +	stmfd	sp!, {r4-r8, lr}  	mov	r4, r1  	mov	r5, r1  	mov	r6, r1  	mov	r7, r1 -	mov	ip, r1 +	mov	r8, r1  	mov	lr, r1  	cmp	r2, #96 -	tstgt	r0, #31 +	tstgt	ip, #31  	ble	3f -	and	ip, r0, #31 -	rsb	ip, ip, #32 -	sub	r2, r2, ip -	movs	ip, ip, lsl #(32 - 4) -	stmcsia	r0!, {r4, r5, r6, r7} -	stmmiia	r0!, {r4, r5} -	tst	ip, #(1 << 30) -	mov	ip, r1 -	strne	r1, [r0], #4 +	and	r8, ip, #31 +	rsb	r8, r8, #32 +	sub	r2, r2, r8 +	movs	r8, r8, lsl #(32 - 4) +	stmcsia	ip!, {r4, r5, r6, r7} +	stmmiia	ip!, {r4, r5} +	tst	r8, #(1 << 30) +	mov	r8, r1 +	strne	r1, [ip], #4  3:	subs	r2, r2, #64 -	stmgeia	r0!, {r1, r3-r7, ip, lr} -	stmgeia	r0!, {r1, r3-r7, ip, lr} +	stmgeia	ip!, {r1, r3-r8, lr} +	stmgeia	ip!, {r1, r3-r8, lr}  	bgt	3b -	ldmeqfd	sp!, {r4-r7, pc} +	ldmeqfd	sp!, {r4-r8, pc}  	tst	r2, #32 -	stmneia	r0!, {r1, r3-r7, ip, lr} +	stmneia	ip!, {r1, r3-r8, lr}  	tst	r2, #16 -	stmneia	r0!, {r4-r7} -	ldmfd	sp!, {r4-r7, lr} +	stmneia	ip!, {r4-r7} +	ldmfd	sp!, {r4-r8, lr}  #endif  4:	tst	r2, #8 -	stmneia	r0!, {r1, r3} +	stmneia	ip!, {r1, r3}  	tst	r2, #4 -	strne	r1, [r0], #4 +	strne	r1, [ip], #4  /*   * When we get here, we've got less than 4 bytes to zero.  We   * may have an unaligned pointer as well.   */  5:	tst	r2, #2 -	strneb	r1, [r0], #1 -	strneb	r1, [r0], #1 +	strneb	r1, [ip], #1 +	strneb	r1, [ip], #1  	tst	r2, #1 -	strneb	r1, [r0], #1 +	strneb	r1, [ip], #1  	mov	pc, lr + +6:	subs	r2, r2, #4		@ 1 do we have enough +	blt	5b			@ 1 bytes to align with? +	cmp	r3, #2			@ 1 +	strltb	r1, [ip], #1		@ 1 +	strleb	r1, [ip], #1		@ 1 +	strb	r1, [ip], #1		@ 1 +	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3)) +	b	1b  ENDPROC(memset) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 5a01a23c6c0..3d73dcb959b 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,6 +16,7 @@   * __put_user_X   *   * Inputs:	r0 contains the address + *		r1 contains the address limit, which must be preserved   *		r2, r3 contains the value   * Outputs:	r0 is the error code   *		lr corrupted @@ -27,31 +28,35 @@   * Note also that it is intended that __put_user_bad is not global.   */  #include <linux/linkage.h> +#include <asm/assembler.h>  #include <asm/errno.h> +#include <asm/domain.h>  ENTRY(__put_user_1) -1:	strbt	r2, [r0] +	check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb)	r2, [r0]  	mov	r0, #0  	mov	pc, lr  ENDPROC(__put_user_1)  ENTRY(__put_user_2) +	check_uaccess r0, 2, r1, ip, __put_user_bad  	mov	ip, r2, lsr #8  #ifdef CONFIG_THUMB2_KERNEL  #ifndef __ARMEB__ -2:	strbt	r2, [r0] -3:	strbt	ip, [r0, #1] +2: TUSER(strb)	r2, [r0] +3: TUSER(strb)	ip, [r0, #1]  #else -2:	strbt	ip, [r0] -3:	strbt	r2, [r0, #1] +2: TUSER(strb)	ip, [r0] +3: TUSER(strb)	r2, [r0, #1]  #endif  #else	/* !CONFIG_THUMB2_KERNEL */  #ifndef __ARMEB__ -2:	strbt	r2, [r0], #1 -3:	strbt	ip, [r0] +2: TUSER(strb)	r2, [r0], #1 +3: TUSER(strb)	ip, [r0]  #else -2:	strbt	ip, [r0], #1 -3:	strbt	r2, [r0] +2: TUSER(strb)	ip, [r0], #1 +3: TUSER(strb)	r2, [r0]  #endif  #endif	/* CONFIG_THUMB2_KERNEL */  	mov	r0, #0 @@ -59,18 +64,20 @@ ENTRY(__put_user_2)  ENDPROC(__put_user_2)  ENTRY(__put_user_4) -4:	strt	r2, [r0] +	check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str)	r2, [r0]  	mov	r0, #0  	mov	pc, lr  ENDPROC(__put_user_4)  ENTRY(__put_user_8) +	check_uaccess r0, 8, r1, ip, __put_user_bad  #ifdef CONFIG_THUMB2_KERNEL -5:	strt	r2, [r0] -6:	strt	r3, [r0, #4] +5: TUSER(str)	r2, [r0] +6: TUSER(str)	r3, [r0, #4]  #else -5:	strt	r2, [r0], #4 -6:	strt	r3, [r0] +5: TUSER(str)	r2, [r0], #4 +6: TUSER(str)	r3, [r0]  #endif  	mov	r0, #0  	mov	pc, lr diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S index 1dd7176c4b2..618fedae4b3 100644 --- a/arch/arm/lib/setbit.S +++ b/arch/arm/lib/setbit.S @@ -12,13 +12,4 @@  #include "bitops.h"  		.text -/* - * Purpose  : Function to set a bit - * Prototype: int set_bit(int bit, void *addr) - */ -ENTRY(_set_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_set_bit_le) -	bitop	orr -ENDPROC(_set_bit_be) -ENDPROC(_set_bit_le) +bitop	_set_bit, orr diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S deleted file mode 100644 index eb0edb80d7b..00000000000 --- a/arch/arm/lib/sha1.S +++ /dev/null @@ -1,211 +0,0 @@ -/* - *  linux/arch/arm/lib/sha1.S - * - *  SHA transform optimized for ARM - * - *  Copyright:	(C) 2005 by Nicolas Pitre <nico@fluxnic.net> - *  Created:	September 17, 2005 - * - *  This program is free software; you can redistribute it and/or modify - *  it under the terms of the GNU General Public License version 2 as - *  published by the Free Software Foundation. - * - *  The reference implementation for this code is linux/lib/sha1.c - */ - -#include <linux/linkage.h> - -	.text - - -/* - * void sha_transform(__u32 *digest, const char *in, __u32 *W) - * - * Note: the "in" ptr may be unaligned. - */ - -ENTRY(sha_transform) - -	stmfd	sp!, {r4 - r8, lr} - -	@ for (i = 0; i < 16; i++) -	@         W[i] = be32_to_cpu(in[i]); - -#ifdef __ARMEB__ -	mov	r4, r0 -	mov	r0, r2 -	mov	r2, #64 -	bl	memcpy -	mov	r2, r0 -	mov	r0, r4 -#else -	mov	r3, r2 -	mov	lr, #16 -1:	ldrb	r4, [r1], #1 -	ldrb	r5, [r1], #1 -	ldrb	r6, [r1], #1 -	ldrb	r7, [r1], #1 -	subs	lr, lr, #1 -	orr	r5, r5, r4, lsl #8 -	orr	r6, r6, r5, lsl #8 -	orr	r7, r7, r6, lsl #8 -	str	r7, [r3], #4 -	bne	1b -#endif - -	@ for (i = 0; i < 64; i++) -	@         W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); - -	sub	r3, r2, #4 -	mov	lr, #64 -2:	ldr	r4, [r3, #4]! -	subs	lr, lr, #1 -	ldr	r5, [r3, #8] -	ldr	r6, [r3, #32] -	ldr	r7, [r3, #52] -	eor	r4, r4, r5 -	eor	r4, r4, r6 -	eor	r4, r4, r7 -	mov	r4, r4, ror #31 -	str	r4, [r3, #64] -	bne	2b - -	/* -	 * The SHA functions are: -	 * -	 * f1(B,C,D) = (D ^ (B & (C ^ D))) -	 * f2(B,C,D) = (B ^ C ^ D) -	 * f3(B,C,D) = ((B & C) | (D & (B | C))) -	 * -	 * Then the sub-blocks are processed as follows: -	 * -	 * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ -	 * B' = A -	 * C' = ror(B, 2) -	 * D' = C -	 * E' = D -	 * -	 * We therefore unroll each loop 5 times to avoid register shuffling. -	 * Also the ror for C (and also D and E which are successivelyderived -	 * from it) is applied in place to cut on an additional mov insn for -	 * each round. -	 */ - -	.macro	sha_f1, A, B, C, D, E -	ldr	r3, [r2], #4 -	eor	ip, \C, \D -	add	\E, r1, \E, ror #2 -	and	ip, \B, ip, ror #2 -	add	\E, \E, \A, ror #27 -	eor	ip, ip, \D, ror #2 -	add	\E, \E, r3 -	add	\E, \E, ip -	.endm - -	.macro	sha_f2, A, B, C, D, E -	ldr	r3, [r2], #4 -	add	\E, r1, \E, ror #2 -	eor	ip, \B, \C, ror #2 -	add	\E, \E, \A, ror #27 -	eor	ip, ip, \D, ror #2 -	add	\E, \E, r3 -	add	\E, \E, ip -	.endm - -	.macro	sha_f3, A, B, C, D, E -	ldr	r3, [r2], #4 -	add	\E, r1, \E, ror #2 -	orr	ip, \B, \C, ror #2 -	add	\E, \E, \A, ror #27 -	and	ip, ip, \D, ror #2 -	add	\E, \E, r3 -	and	r3, \B, \C, ror #2 -	orr	ip, ip, r3 -	add	\E, \E, ip -	.endm - -	ldmia	r0, {r4 - r8} - -	mov	lr, #4 -	ldr	r1, .L_sha_K + 0 - -	/* adjust initial values */ -	mov	r6, r6, ror #30 -	mov	r7, r7, ror #30 -	mov	r8, r8, ror #30 - -3:	subs	lr, lr, #1 -	sha_f1	r4, r5, r6, r7, r8 -	sha_f1	r8, r4, r5, r6, r7 -	sha_f1	r7, r8, r4, r5, r6 -	sha_f1	r6, r7, r8, r4, r5 -	sha_f1	r5, r6, r7, r8, r4 -	bne	3b - -	ldr	r1, .L_sha_K + 4 -	mov	lr, #4 - -4:	subs	lr, lr, #1 -	sha_f2	r4, r5, r6, r7, r8 -	sha_f2	r8, r4, r5, r6, r7 -	sha_f2	r7, r8, r4, r5, r6 -	sha_f2	r6, r7, r8, r4, r5 -	sha_f2	r5, r6, r7, r8, r4 -	bne	4b - -	ldr	r1, .L_sha_K + 8 -	mov	lr, #4 - -5:	subs	lr, lr, #1 -	sha_f3	r4, r5, r6, r7, r8 -	sha_f3	r8, r4, r5, r6, r7 -	sha_f3	r7, r8, r4, r5, r6 -	sha_f3	r6, r7, r8, r4, r5 -	sha_f3	r5, r6, r7, r8, r4 -	bne	5b - -	ldr	r1, .L_sha_K + 12 -	mov	lr, #4 - -6:	subs	lr, lr, #1 -	sha_f2	r4, r5, r6, r7, r8 -	sha_f2	r8, r4, r5, r6, r7 -	sha_f2	r7, r8, r4, r5, r6 -	sha_f2	r6, r7, r8, r4, r5 -	sha_f2	r5, r6, r7, r8, r4 -	bne	6b - -	ldmia	r0, {r1, r2, r3, ip, lr} -	add	r4, r1, r4 -	add	r5, r2, r5 -	add	r6, r3, r6, ror #2 -	add	r7, ip, r7, ror #2 -	add	r8, lr, r8, ror #2 -	stmia	r0, {r4 - r8} - -	ldmfd	sp!, {r4 - r8, pc} - -ENDPROC(sha_transform) - -	.align	2 -.L_sha_K: -	.word	0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 - - -/* - * void sha_init(__u32 *buf) - */ - -	.align	2 -.L_sha_initial_digest: -	.word	0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 - -ENTRY(sha_init) - -	str	lr, [sp, #-4]! -	adr	r1, .L_sha_initial_digest -	ldmia	r1, {r1, r2, r3, ip, lr} -	stmia	r0, {r1, r2, r3, ip, lr} -	ldr	pc, [sp], #4 - -ENDPROC(sha_init) diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S deleted file mode 100644 index f202d7bd164..00000000000 --- a/arch/arm/lib/strncpy_from_user.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - *  linux/arch/arm/lib/strncpy_from_user.S - * - *  Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - -	.text -	.align	5 - -/* - * Copy a string from user space to kernel space. - *  r0 = dst, r1 = src, r2 = byte length - * returns the number of characters copied (strlen of copied string), - *  -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) -	mov	ip, r1 -1:	subs	r2, r2, #1 -	ldrusr	r3, r1, 1, pl -	bmi	2f -	strb	r3, [r0], #1 -	teq	r3, #0 -	bne	1b -	sub	r1, r1, #1	@ take NUL character out of count -2:	sub	r0, r1, ip -	mov	pc, lr -ENDPROC(__strncpy_from_user) - -	.pushsection .fixup,"ax" -	.align	0 -9001:	mov	r3, #0 -	strb	r3, [r0, #0]	@ null terminate -	mov	r0, #-EFAULT -	mov	pc, lr -	.popsection - diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S deleted file mode 100644 index 0ecbb459c4f..00000000000 --- a/arch/arm/lib/strnlen_user.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - *  linux/arch/arm/lib/strnlen_user.S - * - *  Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/errno.h> - -	.text -	.align	5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose  : get length of a string in user memory - * Params   : str - address of string in user memory - * Returns  : length of string *including terminator* - *	      or zero on exception, or n + 1 if too long - */ -ENTRY(__strnlen_user) -	mov	r2, r0 -1: -	ldrusr	r3, r0, 1 -	teq	r3, #0 -	beq	2f -	subs	r1, r1, #1 -	bne	1b -	add	r0, r0, #1 -2:	sub	r0, r0, r2 -	mov	pc, lr -ENDPROC(__strnlen_user) - -	.pushsection .fixup,"ax" -	.align	0 -9001:	mov	r0, #0 -	mov	pc, lr -	.popsection diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S index 5c98dc567f0..4becdc3a59c 100644 --- a/arch/arm/lib/testchangebit.S +++ b/arch/arm/lib/testchangebit.S @@ -12,9 +12,4 @@  #include "bitops.h"                  .text -ENTRY(_test_and_change_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_test_and_change_bit_le) -	testop	eor, strb -ENDPROC(_test_and_change_bit_be) -ENDPROC(_test_and_change_bit_le) +testop	_test_and_change_bit, eor, str diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 543d7094d18..918841dcce7 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -12,9 +12,4 @@  #include "bitops.h"                  .text -ENTRY(_test_and_clear_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_test_and_clear_bit_le) -	testop	bicne, strneb -ENDPROC(_test_and_clear_bit_be) -ENDPROC(_test_and_clear_bit_le) +testop	_test_and_clear_bit, bicne, strne diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 0b3f390401c..8d1b2fe9e48 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -12,9 +12,4 @@  #include "bitops.h"                  .text -ENTRY(_test_and_set_bit_be) -		eor	r0, r0, #0x18		@ big endian byte ordering -ENTRY(_test_and_set_bit_le) -	testop	orreq, streqb -ENDPROC(_test_and_set_bit_be) -ENDPROC(_test_and_set_bit_le) +testop	_test_and_set_bit, orreq, streq diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index fee9f6f88ad..e50520904b7 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -14,6 +14,7 @@  #include <linux/linkage.h>  #include <asm/assembler.h>  #include <asm/errno.h> +#include <asm/domain.h>  		.text @@ -31,11 +32,11 @@  		rsb	ip, ip, #4  		cmp	ip, #2  		ldrb	r3, [r1], #1 -USER(		strbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault  		ldrgeb	r3, [r1], #1 -USER(		strgebt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault  		ldrgtb	r3, [r1], #1 -USER(		strgtbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault  		sub	r2, r2, ip  		b	.Lc2u_dest_aligned @@ -58,7 +59,7 @@ ENTRY(__copy_to_user)  		addmi	ip, r2, #4  		bmi	.Lc2u_0nowords  		ldr	r3, [r1], #4 -USER(		strt	r3, [r0], #4)			@ May fault +USER(	TUSER(	str)	r3, [r0], #4)			@ May fault  		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction  		rsb	ip, ip, #0  		movs	ip, ip, lsr #32 - PAGE_SHIFT @@ -87,18 +88,18 @@ USER(		strt	r3, [r0], #4)			@ May fault  		stmneia	r0!, {r3 - r4}			@ Shouldnt fault  		tst	ip, #4  		ldrne	r3, [r1], #4 -		strnet	r3, [r0], #4			@ Shouldnt fault +	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault  		ands	ip, ip, #3  		beq	.Lc2u_0fupi  .Lc2u_0nowords:	teq	ip, #0  		beq	.Lc2u_finished  .Lc2u_nowords:	cmp	ip, #2  		ldrb	r3, [r1], #1 -USER(		strbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault  		ldrgeb	r3, [r1], #1 -USER(		strgebt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault  		ldrgtb	r3, [r1], #1 -USER(		strgtbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault  		b	.Lc2u_finished  .Lc2u_not_enough: @@ -116,10 +117,10 @@ USER(		strgtbt	r3, [r0], #1)			@ May fault  .Lc2u_1fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lc2u_1nowords -		mov	r3, r7, pull #8 +		mov	r3, r7, lspull #8  		ldr	r7, [r1], #4 -		orr	r3, r3, r7, push #24 -USER(		strt	r3, [r0], #4)			@ May fault +		orr	r3, r3, r7, lspush #24 +USER(	TUSER(	str)	r3, [r0], #4)			@ May fault  		mov	ip, r0, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0  		movs	ip, ip, lsr #32 - PAGE_SHIFT @@ -130,51 +131,51 @@ USER(		strt	r3, [r0], #4)			@ May fault  		subs	ip, ip, #16  		blt	.Lc2u_1rem8lp -.Lc2u_1cpy8lp:	mov	r3, r7, pull #8 +.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8  		ldmia	r1!, {r4 - r7}  		subs	ip, ip, #16 -		orr	r3, r3, r4, push #24 -		mov	r4, r4, pull #8 -		orr	r4, r4, r5, push #24 -		mov	r5, r5, pull #8 -		orr	r5, r5, r6, push #24 -		mov	r6, r6, pull #8 -		orr	r6, r6, r7, push #24 +		orr	r3, r3, r4, lspush #24 +		mov	r4, r4, lspull #8 +		orr	r4, r4, r5, lspush #24 +		mov	r5, r5, lspull #8 +		orr	r5, r5, r6, lspush #24 +		mov	r6, r6, lspull #8 +		orr	r6, r6, r7, lspush #24  		stmia	r0!, {r3 - r6}			@ Shouldnt fault  		bpl	.Lc2u_1cpy8lp  .Lc2u_1rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #8 +		movne	r3, r7, lspull #8  		ldmneia	r1!, {r4, r7} -		orrne	r3, r3, r4, push #24 -		movne	r4, r4, pull #8 -		orrne	r4, r4, r7, push #24 +		orrne	r3, r3, r4, lspush #24 +		movne	r4, r4, lspull #8 +		orrne	r4, r4, r7, lspush #24  		stmneia	r0!, {r3 - r4}			@ Shouldnt fault  		tst	ip, #4 -		movne	r3, r7, pull #8 +		movne	r3, r7, lspull #8  		ldrne	r7, [r1], #4 -		orrne	r3, r3, r7, push #24 -		strnet	r3, [r0], #4			@ Shouldnt fault +		orrne	r3, r3, r7, lspush #24 +	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault  		ands	ip, ip, #3  		beq	.Lc2u_1fupi  .Lc2u_1nowords:	mov	r3, r7, get_byte_1  		teq	ip, #0  		beq	.Lc2u_finished  		cmp	ip, #2 -USER(		strbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault  		movge	r3, r7, get_byte_2 -USER(		strgebt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault  		movgt	r3, r7, get_byte_3 -USER(		strgtbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault  		b	.Lc2u_finished  .Lc2u_2fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lc2u_2nowords -		mov	r3, r7, pull #16 +		mov	r3, r7, lspull #16  		ldr	r7, [r1], #4 -		orr	r3, r3, r7, push #16 -USER(		strt	r3, [r0], #4)			@ May fault +		orr	r3, r3, r7, lspush #16 +USER(	TUSER(	str)	r3, [r0], #4)			@ May fault  		mov	ip, r0, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0  		movs	ip, ip, lsr #32 - PAGE_SHIFT @@ -185,51 +186,51 @@ USER(		strt	r3, [r0], #4)			@ May fault  		subs	ip, ip, #16  		blt	.Lc2u_2rem8lp -.Lc2u_2cpy8lp:	mov	r3, r7, pull #16 +.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16  		ldmia	r1!, {r4 - r7}  		subs	ip, ip, #16 -		orr	r3, r3, r4, push #16 -		mov	r4, r4, pull #16 -		orr	r4, r4, r5, push #16 -		mov	r5, r5, pull #16 -		orr	r5, r5, r6, push #16 -		mov	r6, r6, pull #16 -		orr	r6, r6, r7, push #16 +		orr	r3, r3, r4, lspush #16 +		mov	r4, r4, lspull #16 +		orr	r4, r4, r5, lspush #16 +		mov	r5, r5, lspull #16 +		orr	r5, r5, r6, lspush #16 +		mov	r6, r6, lspull #16 +		orr	r6, r6, r7, lspush #16  		stmia	r0!, {r3 - r6}			@ Shouldnt fault  		bpl	.Lc2u_2cpy8lp  .Lc2u_2rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #16 +		movne	r3, r7, lspull #16  		ldmneia	r1!, {r4, r7} -		orrne	r3, r3, r4, push #16 -		movne	r4, r4, pull #16 -		orrne	r4, r4, r7, push #16 +		orrne	r3, r3, r4, lspush #16 +		movne	r4, r4, lspull #16 +		orrne	r4, r4, r7, lspush #16  		stmneia	r0!, {r3 - r4}			@ Shouldnt fault  		tst	ip, #4 -		movne	r3, r7, pull #16 +		movne	r3, r7, lspull #16  		ldrne	r7, [r1], #4 -		orrne	r3, r3, r7, push #16 -		strnet	r3, [r0], #4			@ Shouldnt fault +		orrne	r3, r3, r7, lspush #16 +	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault  		ands	ip, ip, #3  		beq	.Lc2u_2fupi  .Lc2u_2nowords:	mov	r3, r7, get_byte_2  		teq	ip, #0  		beq	.Lc2u_finished  		cmp	ip, #2 -USER(		strbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault  		movge	r3, r7, get_byte_3 -USER(		strgebt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault  		ldrgtb	r3, [r1], #0 -USER(		strgtbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault  		b	.Lc2u_finished  .Lc2u_3fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lc2u_3nowords -		mov	r3, r7, pull #24 +		mov	r3, r7, lspull #24  		ldr	r7, [r1], #4 -		orr	r3, r3, r7, push #8 -USER(		strt	r3, [r0], #4)			@ May fault +		orr	r3, r3, r7, lspush #8 +USER(	TUSER(	str)	r3, [r0], #4)			@ May fault  		mov	ip, r0, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0  		movs	ip, ip, lsr #32 - PAGE_SHIFT @@ -240,42 +241,42 @@ USER(		strt	r3, [r0], #4)			@ May fault  		subs	ip, ip, #16  		blt	.Lc2u_3rem8lp -.Lc2u_3cpy8lp:	mov	r3, r7, pull #24 +.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24  		ldmia	r1!, {r4 - r7}  		subs	ip, ip, #16 -		orr	r3, r3, r4, push #8 -		mov	r4, r4, pull #24 -		orr	r4, r4, r5, push #8 -		mov	r5, r5, pull #24 -		orr	r5, r5, r6, push #8 -		mov	r6, r6, pull #24 -		orr	r6, r6, r7, push #8 +		orr	r3, r3, r4, lspush #8 +		mov	r4, r4, lspull #24 +		orr	r4, r4, r5, lspush #8 +		mov	r5, r5, lspull #24 +		orr	r5, r5, r6, lspush #8 +		mov	r6, r6, lspull #24 +		orr	r6, r6, r7, lspush #8  		stmia	r0!, {r3 - r6}			@ Shouldnt fault  		bpl	.Lc2u_3cpy8lp  .Lc2u_3rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #24 +		movne	r3, r7, lspull #24  		ldmneia	r1!, {r4, r7} -		orrne	r3, r3, r4, push #8 -		movne	r4, r4, pull #24 -		orrne	r4, r4, r7, push #8 +		orrne	r3, r3, r4, lspush #8 +		movne	r4, r4, lspull #24 +		orrne	r4, r4, r7, lspush #8  		stmneia	r0!, {r3 - r4}			@ Shouldnt fault  		tst	ip, #4 -		movne	r3, r7, pull #24 +		movne	r3, r7, lspull #24  		ldrne	r7, [r1], #4 -		orrne	r3, r3, r7, push #8 -		strnet	r3, [r0], #4			@ Shouldnt fault +		orrne	r3, r3, r7, lspush #8 +	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault  		ands	ip, ip, #3  		beq	.Lc2u_3fupi  .Lc2u_3nowords:	mov	r3, r7, get_byte_3  		teq	ip, #0  		beq	.Lc2u_finished  		cmp	ip, #2 -USER(		strbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault  		ldrgeb	r3, [r1], #1 -USER(		strgebt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault  		ldrgtb	r3, [r1], #0 -USER(		strgtbt	r3, [r0], #1)			@ May fault +USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault  		b	.Lc2u_finished  ENDPROC(__copy_to_user) @@ -294,11 +295,11 @@ ENDPROC(__copy_to_user)  .Lcfu_dest_not_aligned:  		rsb	ip, ip, #4  		cmp	ip, #2 -USER(		ldrbt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault  		strb	r3, [r0], #1 -USER(		ldrgebt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault  		strgeb	r3, [r0], #1 -USER(		ldrgtbt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault  		strgtb	r3, [r0], #1  		sub	r2, r2, ip  		b	.Lcfu_dest_aligned @@ -321,7 +322,7 @@ ENTRY(__copy_from_user)  .Lcfu_0fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lcfu_0nowords -USER(		ldrt	r3, [r1], #4) +USER(	TUSER(	ldr)	r3, [r1], #4)  		str	r3, [r0], #4  		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction  		rsb	ip, ip, #0 @@ -350,18 +351,18 @@ USER(		ldrt	r3, [r1], #4)  		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault  		stmneia	r0!, {r3 - r4}  		tst	ip, #4 -		ldrnet	r3, [r1], #4			@ Shouldnt fault +	TUSER(	ldrne) r3, [r1], #4			@ Shouldnt fault  		strne	r3, [r0], #4  		ands	ip, ip, #3  		beq	.Lcfu_0fupi  .Lcfu_0nowords:	teq	ip, #0  		beq	.Lcfu_finished  .Lcfu_nowords:	cmp	ip, #2 -USER(		ldrbt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault  		strb	r3, [r0], #1 -USER(		ldrgebt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault  		strgeb	r3, [r0], #1 -USER(		ldrgtbt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault  		strgtb	r3, [r0], #1  		b	.Lcfu_finished @@ -374,16 +375,16 @@ USER(		ldrgtbt	r3, [r1], #1)			@ May fault  .Lcfu_src_not_aligned:  		bic	r1, r1, #3 -USER(		ldrt	r7, [r1], #4)			@ May fault +USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault  		cmp	ip, #2  		bgt	.Lcfu_3fupi  		beq	.Lcfu_2fupi  .Lcfu_1fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lcfu_1nowords -		mov	r3, r7, pull #8 -USER(		ldrt	r7, [r1], #4)			@ May fault -		orr	r3, r3, r7, push #24 +		mov	r3, r7, lspull #8 +USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault +		orr	r3, r3, r7, lspush #24  		str	r3, [r0], #4  		mov	ip, r1, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0 @@ -395,30 +396,30 @@ USER(		ldrt	r7, [r1], #4)			@ May fault  		subs	ip, ip, #16  		blt	.Lcfu_1rem8lp -.Lcfu_1cpy8lp:	mov	r3, r7, pull #8 +.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8  		ldmia	r1!, {r4 - r7}			@ Shouldnt fault  		subs	ip, ip, #16 -		orr	r3, r3, r4, push #24 -		mov	r4, r4, pull #8 -		orr	r4, r4, r5, push #24 -		mov	r5, r5, pull #8 -		orr	r5, r5, r6, push #24 -		mov	r6, r6, pull #8 -		orr	r6, r6, r7, push #24 +		orr	r3, r3, r4, lspush #24 +		mov	r4, r4, lspull #8 +		orr	r4, r4, r5, lspush #24 +		mov	r5, r5, lspull #8 +		orr	r5, r5, r6, lspush #24 +		mov	r6, r6, lspull #8 +		orr	r6, r6, r7, lspush #24  		stmia	r0!, {r3 - r6}  		bpl	.Lcfu_1cpy8lp  .Lcfu_1rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #8 +		movne	r3, r7, lspull #8  		ldmneia	r1!, {r4, r7}			@ Shouldnt fault -		orrne	r3, r3, r4, push #24 -		movne	r4, r4, pull #8 -		orrne	r4, r4, r7, push #24 +		orrne	r3, r3, r4, lspush #24 +		movne	r4, r4, lspull #8 +		orrne	r4, r4, r7, lspush #24  		stmneia	r0!, {r3 - r4}  		tst	ip, #4 -		movne	r3, r7, pull #8 -USER(		ldrnet	r7, [r1], #4)			@ May fault -		orrne	r3, r3, r7, push #24 +		movne	r3, r7, lspull #8 +USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault +		orrne	r3, r3, r7, lspush #24  		strne	r3, [r0], #4  		ands	ip, ip, #3  		beq	.Lcfu_1fupi @@ -436,9 +437,9 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault  .Lcfu_2fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lcfu_2nowords -		mov	r3, r7, pull #16 -USER(		ldrt	r7, [r1], #4)			@ May fault -		orr	r3, r3, r7, push #16 +		mov	r3, r7, lspull #16 +USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault +		orr	r3, r3, r7, lspush #16  		str	r3, [r0], #4  		mov	ip, r1, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0 @@ -451,30 +452,30 @@ USER(		ldrt	r7, [r1], #4)			@ May fault  		blt	.Lcfu_2rem8lp -.Lcfu_2cpy8lp:	mov	r3, r7, pull #16 +.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16  		ldmia	r1!, {r4 - r7}			@ Shouldnt fault  		subs	ip, ip, #16 -		orr	r3, r3, r4, push #16 -		mov	r4, r4, pull #16 -		orr	r4, r4, r5, push #16 -		mov	r5, r5, pull #16 -		orr	r5, r5, r6, push #16 -		mov	r6, r6, pull #16 -		orr	r6, r6, r7, push #16 +		orr	r3, r3, r4, lspush #16 +		mov	r4, r4, lspull #16 +		orr	r4, r4, r5, lspush #16 +		mov	r5, r5, lspull #16 +		orr	r5, r5, r6, lspush #16 +		mov	r6, r6, lspull #16 +		orr	r6, r6, r7, lspush #16  		stmia	r0!, {r3 - r6}  		bpl	.Lcfu_2cpy8lp  .Lcfu_2rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #16 +		movne	r3, r7, lspull #16  		ldmneia	r1!, {r4, r7}			@ Shouldnt fault -		orrne	r3, r3, r4, push #16 -		movne	r4, r4, pull #16 -		orrne	r4, r4, r7, push #16 +		orrne	r3, r3, r4, lspush #16 +		movne	r4, r4, lspull #16 +		orrne	r4, r4, r7, lspush #16  		stmneia	r0!, {r3 - r4}  		tst	ip, #4 -		movne	r3, r7, pull #16 -USER(		ldrnet	r7, [r1], #4)			@ May fault -		orrne	r3, r3, r7, push #16 +		movne	r3, r7, lspull #16 +USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault +		orrne	r3, r3, r7, lspush #16  		strne	r3, [r0], #4  		ands	ip, ip, #3  		beq	.Lcfu_2fupi @@ -485,16 +486,16 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault  		strb	r3, [r0], #1  		movge	r3, r7, get_byte_3  		strgeb	r3, [r0], #1 -USER(		ldrgtbt	r3, [r1], #0)			@ May fault +USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault  		strgtb	r3, [r0], #1  		b	.Lcfu_finished  .Lcfu_3fupi:	subs	r2, r2, #4  		addmi	ip, r2, #4  		bmi	.Lcfu_3nowords -		mov	r3, r7, pull #24 -USER(		ldrt	r7, [r1], #4)			@ May fault -		orr	r3, r3, r7, push #8 +		mov	r3, r7, lspull #24 +USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault +		orr	r3, r3, r7, lspush #8  		str	r3, [r0], #4  		mov	ip, r1, lsl #32 - PAGE_SHIFT  		rsb	ip, ip, #0 @@ -506,30 +507,30 @@ USER(		ldrt	r7, [r1], #4)			@ May fault  		subs	ip, ip, #16  		blt	.Lcfu_3rem8lp -.Lcfu_3cpy8lp:	mov	r3, r7, pull #24 +.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24  		ldmia	r1!, {r4 - r7}			@ Shouldnt fault -		orr	r3, r3, r4, push #8 -		mov	r4, r4, pull #24 -		orr	r4, r4, r5, push #8 -		mov	r5, r5, pull #24 -		orr	r5, r5, r6, push #8 -		mov	r6, r6, pull #24 -		orr	r6, r6, r7, push #8 +		orr	r3, r3, r4, lspush #8 +		mov	r4, r4, lspull #24 +		orr	r4, r4, r5, lspush #8 +		mov	r5, r5, lspull #24 +		orr	r5, r5, r6, lspush #8 +		mov	r6, r6, lspull #24 +		orr	r6, r6, r7, lspush #8  		stmia	r0!, {r3 - r6}  		subs	ip, ip, #16  		bpl	.Lcfu_3cpy8lp  .Lcfu_3rem8lp:	tst	ip, #8 -		movne	r3, r7, pull #24 +		movne	r3, r7, lspull #24  		ldmneia	r1!, {r4, r7}			@ Shouldnt fault -		orrne	r3, r3, r4, push #8 -		movne	r4, r4, pull #24 -		orrne	r4, r4, r7, push #8 +		orrne	r3, r3, r4, lspush #8 +		movne	r4, r4, lspull #24 +		orrne	r4, r4, r7, lspush #8  		stmneia	r0!, {r3 - r4}  		tst	ip, #4 -		movne	r3, r7, pull #24 -USER(		ldrnet	r7, [r1], #4)			@ May fault -		orrne	r3, r3, r7, push #8 +		movne	r3, r7, lspull #24 +USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault +		orrne	r3, r3, r7, lspush #8  		strne	r3, [r0], #4  		ands	ip, ip, #3  		beq	.Lcfu_3fupi @@ -538,9 +539,9 @@ USER(		ldrnet	r7, [r1], #4)			@ May fault  		beq	.Lcfu_finished  		cmp	ip, #2  		strb	r3, [r0], #1 -USER(		ldrgebt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault  		strgeb	r3, [r0], #1 -USER(		ldrgtbt	r3, [r1], #1)			@ May fault +USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault  		strgtb	r3, [r0], #1  		b	.Lcfu_finished  ENDPROC(__copy_from_user) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index e2d2f2cd0c4..3e58d710013 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -17,6 +17,8 @@  #include <linux/sched.h>  #include <linux/hardirq.h> /* for in_atomic() */  #include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h>  #include <asm/current.h>  #include <asm/page.h> @@ -27,14 +29,47 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)  	pgd_t *pgd;  	pmd_t *pmd;  	pte_t *pte; +	pud_t *pud;  	spinlock_t *ptl;  	pgd = pgd_offset(current->mm, addr);  	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))  		return 0; -	pmd = pmd_offset(pgd, addr); -	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) +	pud = pud_offset(pgd, addr); +	if (unlikely(pud_none(*pud) || pud_bad(*pud))) +		return 0; + +	pmd = pmd_offset(pud, addr); +	if (unlikely(pmd_none(*pmd))) +		return 0; + +	/* +	 * A pmd can be bad if it refers to a HugeTLB or THP page. +	 * +	 * Both THP and HugeTLB pages have the same pmd layout +	 * and should not be manipulated by the pte functions. +	 * +	 * Lock the page table for the destination and check +	 * to see that it's still huge and whether or not we will +	 * need to fault on write, or if we have a splitting THP. +	 */ +	if (unlikely(pmd_thp_or_huge(*pmd))) { +		ptl = ¤t->mm->page_table_lock; +		spin_lock(ptl); +		if (unlikely(!pmd_thp_or_huge(*pmd) +			|| pmd_hugewillfault(*pmd) +			|| pmd_trans_splitting(*pmd))) { +			spin_unlock(ptl); +			return 0; +		} + +		*ptep = NULL; +		*ptlp = ptl; +		return 1; +	} + +	if (unlikely(pmd_bad(*pmd)))  		return 0;  	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); @@ -88,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)  		from += tocopy;  		n -= tocopy; -		pte_unmap_unlock(pte, ptl); +		if (pte) +			pte_unmap_unlock(pte, ptl); +		else +			spin_unlock(ptl);  	}  	if (!atomic)  		up_read(¤t->mm->mmap_sem); @@ -141,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n)  		addr += tocopy;  		n -= tocopy; -		pte_unmap_unlock(pte, ptl); +		if (pte) +			pte_unmap_unlock(pte, ptl); +		else +			spin_unlock(ptl);  	}  	up_read(¤t->mm->mmap_sem); diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c new file mode 100644 index 00000000000..2c40aeab3ea --- /dev/null +++ b/arch/arm/lib/xor-neon.c @@ -0,0 +1,46 @@ +/* + * linux/arch/arm/lib/xor-neon.c + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); + +#ifndef __ARM_NEON__ +#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#endif + +/* + * Pull in the reference implementations while instructing GCC (through + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit + * NEON instructions. + */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC optimize "tree-vectorize" +#else +/* + * While older versions of GCC do not generate incorrect code, they fail to + * recognize the parallel nature of these functions, and emit plain ARM code, + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + */ +#warning This code requires at least version 4.6 of GCC +#endif + +#pragma GCC diagnostic ignored "-Wunused-variable" +#include <asm-generic/xor.h> + +struct xor_block_template const xor_block_neon_inner = { +	.name	= "__inner_neon__", +	.do_2	= xor_8regs_2, +	.do_3	= xor_8regs_3, +	.do_4	= xor_8regs_4, +	.do_5	= xor_8regs_5, +}; +EXPORT_SYMBOL(xor_block_neon_inner);  | 
