diff options
Diffstat (limited to 'arch/powerpc/lib')
| -rw-r--r-- | arch/powerpc/lib/Makefile | 19 | ||||
| -rw-r--r-- | arch/powerpc/lib/checksum_64.S | 58 | ||||
| -rw-r--r-- | arch/powerpc/lib/code-patching.c | 15 | ||||
| -rw-r--r-- | arch/powerpc/lib/copypage_64.S | 4 | ||||
| -rw-r--r-- | arch/powerpc/lib/copypage_power7.S | 12 | ||||
| -rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 55 | ||||
| -rw-r--r-- | arch/powerpc/lib/copyuser_power7.S | 86 | ||||
| -rw-r--r-- | arch/powerpc/lib/crtsavres.S | 186 | ||||
| -rw-r--r-- | arch/powerpc/lib/hweight_64.S | 8 | ||||
| -rw-r--r-- | arch/powerpc/lib/mem_64.S | 6 | ||||
| -rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 28 | ||||
| -rw-r--r-- | arch/powerpc/lib/memcpy_power7.S | 81 | ||||
| -rw-r--r-- | arch/powerpc/lib/sstep.c | 112 | ||||
| -rw-r--r-- | arch/powerpc/lib/string_64.S | 2 | ||||
| -rw-r--r-- | arch/powerpc/lib/xor_vmx.c | 177 | 
15 files changed, 697 insertions, 152 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 45043327669..59fa2de9546 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -10,15 +10,21 @@ CFLAGS_REMOVE_code-patching.o = -pg  CFLAGS_REMOVE_feature-fixups.o = -pg  obj-y			:= string.o alloc.o \ -			   checksum_$(CONFIG_WORD_SIZE).o crtsavres.o +			   crtsavres.o  obj-$(CONFIG_PPC32)	+= div64.o copy_32.o  obj-$(CONFIG_HAS_IOMEM)	+= devres.o  obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \ -			   memcpy_64.o usercopy_64.o mem_64.o string.o \ -			   checksum_wrappers_64.o hweight_64.o \ -			   copyuser_power7.o string_64.o copypage_power7.o \ -			   memcpy_power7.o +			   usercopy_64.o mem_64.o string.o \ +			   hweight_64.o \ +			   copyuser_power7.o string_64.o copypage_power7.o +ifeq ($(CONFIG_GENERIC_CSUM),) +obj-y			+= checksum_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o +endif + +obj-$(CONFIG_PPC64)		+= memcpy_power7.o memcpy_64.o  +  obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o  ifeq ($(CONFIG_PPC64),y) @@ -31,3 +37,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o  obj-y			+= code-patching.o  obj-y			+= feature-fixups.o  obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o + +obj-$(CONFIG_ALTIVEC)	+= xor_vmx.o +CFLAGS_xor_vmx.o += -maltivec -mabi=altivec diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 167f72555d6..57a07206505 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -226,19 +226,35 @@ _GLOBAL(csum_partial)  	blr -	.macro source +	.macro srcnr  100:  	.section __ex_table,"a"  	.align 3 -	.llong 100b,.Lsrc_error +	.llong 100b,.Lsrc_error_nr  	.previous  	.endm -	.macro dest +	.macro source +150: +	.section __ex_table,"a" +	.align 3 +	.llong 150b,.Lsrc_error +	.previous +	.endm + +	.macro dstnr  200:  	.section __ex_table,"a"  	.align 3 -	.llong 200b,.Ldest_error +	.llong 200b,.Ldest_error_nr +	.previous +	.endm + +	.macro dest +250: +	.section __ex_table,"a" +	.align 3 +	.llong 250b,.Ldest_error  	.previous  	.endm @@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic)  	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */  	beq	.Lcopy_aligned -	li	r7,4 -	sub	r6,r7,r6 +	li	r9,4 +	sub	r6,r9,r6  	mtctr	r6  1: -source;	lhz	r6,0(r3)		/* align to doubleword */ +srcnr;	lhz	r6,0(r3)		/* align to doubleword */  	subi	r5,r5,2  	addi	r3,r3,2  	adde	r0,r0,r6 -dest;	sth	r6,0(r4) +dstnr;	sth	r6,0(r4)  	addi	r4,r4,2  	bdnz	1b @@ -392,10 +408,10 @@ dest;	std	r16,56(r4)  	mtctr	r6  3: -source;	ld	r6,0(r3) +srcnr;	ld	r6,0(r3)  	addi	r3,r3,8  	adde	r0,r0,r6 -dest;	std	r6,0(r4) +dstnr;	std	r6,0(r4)  	addi	r4,r4,8  	bdnz	3b @@ -405,10 +421,10 @@ dest;	std	r6,0(r4)  	srdi.	r6,r5,2  	beq	.Lcopy_tail_halfword -source;	lwz	r6,0(r3) +srcnr;	lwz	r6,0(r3)  	addi	r3,r3,4  	adde	r0,r0,r6 -dest;	stw	r6,0(r4) +dstnr;	stw	r6,0(r4)  	addi	r4,r4,4  	subi	r5,r5,4 @@ -416,10 +432,10 @@ dest;	stw	r6,0(r4)  	srdi.	r6,r5,1  	beq	.Lcopy_tail_byte -source;	lhz	r6,0(r3) +srcnr;	lhz	r6,0(r3)  	addi	r3,r3,2  	adde	r0,r0,r6 -dest;	sth	r6,0(r4) +dstnr;	sth	r6,0(r4)  	addi	r4,r4,2  	subi	r5,r5,2 @@ -427,10 +443,10 @@ dest;	sth	r6,0(r4)  	andi.	r6,r5,1  	beq	.Lcopy_finish -source;	lbz	r6,0(r3) +srcnr;	lbz	r6,0(r3)  	sldi	r9,r6,8			/* Pad the byte out to 16 bits */  	adde	r0,r0,r9 -dest;	stb	r6,0(r4) +dstnr;	stb	r6,0(r4)  .Lcopy_finish:  	addze	r0,r0			/* add in final carry */ @@ -440,6 +456,11 @@ dest;	stb	r6,0(r4)  	blr  .Lsrc_error: +	ld	r14,STK_REG(R14)(r1) +	ld	r15,STK_REG(R15)(r1) +	ld	r16,STK_REG(R16)(r1) +	addi	r1,r1,STACKFRAMESIZE +.Lsrc_error_nr:  	cmpdi	0,r7,0  	beqlr  	li	r6,-EFAULT @@ -447,6 +468,11 @@ dest;	stb	r6,0(r4)  	blr  .Ldest_error: +	ld	r14,STK_REG(R14)(r1) +	ld	r15,STK_REG(R15)(r1) +	ld	r16,STK_REG(R16)(r1) +	addi	r1,r1,STACKFRAMESIZE +.Ldest_error_nr:  	cmpdi	0,r8,0  	beqlr  	li	r6,-EFAULT diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 17e5b236431..d5edbeb8eb8 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -159,6 +159,21 @@ unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)  	return 0;  } +#ifdef CONFIG_PPC_BOOK3E_64 +void __patch_exception(int exc, unsigned long addr) +{ +	extern unsigned int interrupt_base_book3e; +	unsigned int *ibase = &interrupt_base_book3e; + +	/* Our exceptions vectors start with a NOP and -then- a branch +	 * to deal with single stepping from userspace which stops on +	 * the second instruction. Thus we need to patch the second +	 * instruction of the exception, not the first one +	 */ + +	patch_branch(ibase + (exc / 4) + 1, addr, 0); +} +#endif  #ifdef CONFIG_CODE_PATCHING_SELFTEST diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 9f9434a8526..a3c4dc4defd 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -16,11 +16,11 @@ PPC64_CACHES:          .tc             ppc64_caches[TC],ppc64_caches          .section        ".text" -_GLOBAL(copy_page) +_GLOBAL_TOC(copy_page)  BEGIN_FTR_SECTION  	lis	r5,PAGE_SIZE@h  FTR_SECTION_ELSE -	b	.copypage_power7 +	b	copypage_power7  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)  	ori	r5,r5,PAGE_SIZE@l  BEGIN_FTR_SECTION diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 395c594722a..d7dafb3777a 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -56,15 +56,15 @@ _GLOBAL(copypage_power7)  #ifdef CONFIG_ALTIVEC  	mflr	r0 -	std	r3,48(r1) -	std	r4,56(r1) +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)  	std	r0,16(r1)  	stdu	r1,-STACKFRAMESIZE(r1) -	bl	.enter_vmx_copy +	bl	enter_vmx_copy  	cmpwi	r3,0  	ld	r0,STACKFRAMESIZE+16(r1) -	ld	r3,STACKFRAMESIZE+48(r1) -	ld	r4,STACKFRAMESIZE+56(r1) +	ld	r3,STK_REG(R31)(r1) +	ld	r4,STK_REG(R30)(r1)  	mtlr	r0  	li	r0,(PAGE_SIZE/128) @@ -103,7 +103,7 @@ _GLOBAL(copypage_power7)  	addi	r3,r3,128  	bdnz	1b -	b	.exit_vmx_copy		/* tail call optimise */ +	b	exit_vmx_copy		/* tail call optimise */  #else  	li	r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a5901490..0860ee46013 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,8 +9,16 @@  #include <asm/processor.h>  #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define sLd sld		/* Shift towards low-numbered address. */ +#define sHd srd		/* Shift towards high-numbered address. */ +#else +#define sLd srd		/* Shift towards low-numbered address. */ +#define sHd sld		/* Shift towards high-numbered address. */ +#endif +  	.align	7 -_GLOBAL(__copy_tofrom_user) +_GLOBAL_TOC(__copy_tofrom_user)  BEGIN_FTR_SECTION  	nop  FTR_SECTION_ELSE @@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */  25:	ld	r0,8(r4) -	sld	r6,r9,r10 +	sLd	r6,r9,r10  26:	ldu	r9,16(r4) -	srd	r7,r0,r11 -	sld	r8,r0,r10 +	sHd	r7,r0,r11 +	sLd	r8,r0,r10  	or	r7,r7,r6  	blt	cr6,79f  27:	ld	r0,8(r4) @@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */  29:	ldu	r9,8(r4) -	sld	r8,r0,r10 +	sLd	r8,r0,r10  	addi	r3,r3,-8  	blt	cr6,5f  30:	ld	r0,8(r4) -	srd	r12,r9,r11 -	sld	r6,r9,r10 +	sHd	r12,r9,r11 +	sLd	r6,r9,r10  31:	ldu	r9,16(r4)  	or	r12,r8,r12 -	srd	r7,r0,r11 -	sld	r8,r0,r10 +	sHd	r7,r0,r11 +	sLd	r8,r0,r10  	addi	r3,r3,16  	beq	cr6,78f  1:	or	r7,r7,r6  32:	ld	r0,8(r4)  76:	std	r12,8(r3) -2:	srd	r12,r9,r11 -	sld	r6,r9,r10 +2:	sHd	r12,r9,r11 +	sLd	r6,r9,r10  33:	ldu	r9,16(r4)  	or	r12,r8,r12  77:	stdu	r7,16(r3) -	srd	r7,r0,r11 -	sld	r8,r0,r10 +	sHd	r7,r0,r11 +	sLd	r8,r0,r10  	bdnz	1b  78:	std	r12,8(r3)  	or	r7,r7,r6  79:	std	r7,16(r3) -5:	srd	r12,r9,r11 +5:	sHd	r12,r9,r11  	or	r12,r8,r12  80:	std	r12,24(r3)  	bne	6f @@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  	blr  6:	cmpwi	cr1,r5,8  	addi	r3,r3,32 -	sld	r9,r9,r10 +	sLd	r9,r9,r10  	ble	cr1,7f  34:	ld	r0,8(r4) -	srd	r7,r0,r11 +	sHd	r7,r0,r11  	or	r9,r7,r9  7:  	bf	cr7*4+1,1f +#ifdef __BIG_ENDIAN__  	rotldi	r9,r9,32 +#endif  94:	stw	r9,0(r3) +#ifdef __LITTLE_ENDIAN__ +	rotrdi	r9,r9,32 +#endif  	addi	r3,r3,4  1:	bf	cr7*4+2,2f +#ifdef __BIG_ENDIAN__  	rotldi	r9,r9,16 +#endif  95:	sth	r9,0(r3) +#ifdef __LITTLE_ENDIAN__ +	rotrdi	r9,r9,16 +#endif  	addi	r3,r3,2  2:	bf	cr7*4+3,3f +#ifdef __BIG_ENDIAN__  	rotldi	r9,r9,8 +#endif  96:	stb	r9,0(r3) +#ifdef __LITTLE_ENDIAN__ +	rotrdi	r9,r9,8 +#endif  3:	li	r3,0  	blr diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index d1f11795a7a..c46c876ac96 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -19,6 +19,14 @@   */  #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC +#endif +  	.macro err1  100:  	.section __ex_table,"a" @@ -58,7 +66,7 @@  	ld	r15,STK_REG(R15)(r1)  	ld	r14,STK_REG(R14)(r1)  .Ldo_err3: -	bl	.exit_vmx_usercopy +	bl	exit_vmx_usercopy  	ld	r0,STACKFRAMESIZE+16(r1)  	mtlr	r0  	b	.Lexit @@ -77,9 +85,9 @@  .Lexit:  	addi	r1,r1,STACKFRAMESIZE  .Ldo_err1: -	ld	r3,48(r1) -	ld	r4,56(r1) -	ld	r5,64(r1) +	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1) +	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)  	b	__copy_tofrom_user_base @@ -88,18 +96,18 @@ _GLOBAL(__copy_tofrom_user_power7)  	cmpldi	r5,16  	cmpldi	cr1,r5,4096 -	std	r3,48(r1) -	std	r4,56(r1) -	std	r5,64(r1) +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1) +	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)  	blt	.Lshort_copy  	bgt	cr1,.Lvmx_copy  #else  	cmpldi	r5,16 -	std	r3,48(r1) -	std	r4,56(r1) -	std	r5,64(r1) +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1) +	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)  	blt	.Lshort_copy  #endif @@ -287,12 +295,12 @@ err1;	stb	r0,0(r3)  	mflr	r0  	std	r0,16(r1)  	stdu	r1,-STACKFRAMESIZE(r1) -	bl	.enter_vmx_usercopy +	bl	enter_vmx_usercopy  	cmpwi	cr1,r3,0  	ld	r0,STACKFRAMESIZE+16(r1) -	ld	r3,STACKFRAMESIZE+48(r1) -	ld	r4,STACKFRAMESIZE+56(r1) -	ld	r5,STACKFRAMESIZE+64(r1) +	ld	r3,STK_REG(R31)(r1) +	ld	r4,STK_REG(R30)(r1) +	ld	r5,STK_REG(R29)(r1)  	mtlr	r0  	/* @@ -506,7 +514,7 @@ err3;	lbz	r0,0(r4)  err3;	stb	r0,0(r3)  15:	addi	r1,r1,STACKFRAMESIZE -	b	.exit_vmx_usercopy	/* tail call optimise */ +	b	exit_vmx_usercopy	/* tail call optimise */  .Lvmx_unaligned_copy:  	/* Get the destination 16B aligned */ @@ -552,13 +560,13 @@ err3;	stw	r7,4(r3)  	li	r10,32  	li	r11,48 -	lvsl	vr16,0,r4	/* Setup permute control vector */ +	LVS(vr16,0,r4)		/* Setup permute control vector */  err3;	lvx	vr0,0,r4  	addi	r4,r4,16  	bf	cr7*4+3,5f  err3;	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	addi	r4,r4,16  err3;	stvx	vr8,r0,r3  	addi	r3,r3,16 @@ -566,9 +574,9 @@ err3;	stvx	vr8,r0,r3  5:	bf	cr7*4+2,6f  err3;	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  err3;	lvx	vr0,r4,r9 -	vperm	vr9,vr1,vr0,vr16 +	VPERM(vr9,vr1,vr0,vr16)  	addi	r4,r4,32  err3;	stvx	vr8,r0,r3  err3;	stvx	vr9,r3,r9 @@ -576,13 +584,13 @@ err3;	stvx	vr9,r3,r9  6:	bf	cr7*4+1,7f  err3;	lvx	vr3,r0,r4 -	vperm	vr8,vr0,vr3,vr16 +	VPERM(vr8,vr0,vr3,vr16)  err3;	lvx	vr2,r4,r9 -	vperm	vr9,vr3,vr2,vr16 +	VPERM(vr9,vr3,vr2,vr16)  err3;	lvx	vr1,r4,r10 -	vperm	vr10,vr2,vr1,vr16 +	VPERM(vr10,vr2,vr1,vr16)  err3;	lvx	vr0,r4,r11 -	vperm	vr11,vr1,vr0,vr16 +	VPERM(vr11,vr1,vr0,vr16)  	addi	r4,r4,64  err3;	stvx	vr8,r0,r3  err3;	stvx	vr9,r3,r9 @@ -611,21 +619,21 @@ err3;	stvx	vr11,r3,r11  	.align	5  8:  err4;	lvx	vr7,r0,r4 -	vperm	vr8,vr0,vr7,vr16 +	VPERM(vr8,vr0,vr7,vr16)  err4;	lvx	vr6,r4,r9 -	vperm	vr9,vr7,vr6,vr16 +	VPERM(vr9,vr7,vr6,vr16)  err4;	lvx	vr5,r4,r10 -	vperm	vr10,vr6,vr5,vr16 +	VPERM(vr10,vr6,vr5,vr16)  err4;	lvx	vr4,r4,r11 -	vperm	vr11,vr5,vr4,vr16 +	VPERM(vr11,vr5,vr4,vr16)  err4;	lvx	vr3,r4,r12 -	vperm	vr12,vr4,vr3,vr16 +	VPERM(vr12,vr4,vr3,vr16)  err4;	lvx	vr2,r4,r14 -	vperm	vr13,vr3,vr2,vr16 +	VPERM(vr13,vr3,vr2,vr16)  err4;	lvx	vr1,r4,r15 -	vperm	vr14,vr2,vr1,vr16 +	VPERM(vr14,vr2,vr1,vr16)  err4;	lvx	vr0,r4,r16 -	vperm	vr15,vr1,vr0,vr16 +	VPERM(vr15,vr1,vr0,vr16)  	addi	r4,r4,128  err4;	stvx	vr8,r0,r3  err4;	stvx	vr9,r3,r9 @@ -649,13 +657,13 @@ err4;	stvx	vr15,r3,r16  	bf	cr7*4+1,9f  err3;	lvx	vr3,r0,r4 -	vperm	vr8,vr0,vr3,vr16 +	VPERM(vr8,vr0,vr3,vr16)  err3;	lvx	vr2,r4,r9 -	vperm	vr9,vr3,vr2,vr16 +	VPERM(vr9,vr3,vr2,vr16)  err3;	lvx	vr1,r4,r10 -	vperm	vr10,vr2,vr1,vr16 +	VPERM(vr10,vr2,vr1,vr16)  err3;	lvx	vr0,r4,r11 -	vperm	vr11,vr1,vr0,vr16 +	VPERM(vr11,vr1,vr0,vr16)  	addi	r4,r4,64  err3;	stvx	vr8,r0,r3  err3;	stvx	vr9,r3,r9 @@ -665,9 +673,9 @@ err3;	stvx	vr11,r3,r11  9:	bf	cr7*4+2,10f  err3;	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  err3;	lvx	vr0,r4,r9 -	vperm	vr9,vr1,vr0,vr16 +	VPERM(vr9,vr1,vr0,vr16)  	addi	r4,r4,32  err3;	stvx	vr8,r0,r3  err3;	stvx	vr9,r3,r9 @@ -675,7 +683,7 @@ err3;	stvx	vr9,r3,r9  10:	bf	cr7*4+3,11f  err3;	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	addi	r4,r4,16  err3;	stvx	vr8,r0,r3  	addi	r3,r3,16 @@ -709,5 +717,5 @@ err3;	lbz	r0,0(r4)  err3;	stb	r0,0(r3)  15:	addi	r1,r1,STACKFRAMESIZE -	b	.exit_vmx_usercopy	/* tail call optimise */ +	b	exit_vmx_usercopy	/* tail call optimise */  #endif /* CONFiG_ALTIVEC */ diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index b2c68ce139a..a5b30c71a8d 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -231,6 +231,87 @@ _GLOBAL(_rest32gpr_31_x)  	mr	1,11  	blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area.  */ + +_GLOBAL(_savevr_20) +	li	r11,-192 +	stvx	vr20,r11,r0 +_GLOBAL(_savevr_21) +	li	r11,-176 +	stvx	vr21,r11,r0 +_GLOBAL(_savevr_22) +	li	r11,-160 +	stvx	vr22,r11,r0 +_GLOBAL(_savevr_23) +	li	r11,-144 +	stvx	vr23,r11,r0 +_GLOBAL(_savevr_24) +	li	r11,-128 +	stvx	vr24,r11,r0 +_GLOBAL(_savevr_25) +	li	r11,-112 +	stvx	vr25,r11,r0 +_GLOBAL(_savevr_26) +	li	r11,-96 +	stvx	vr26,r11,r0 +_GLOBAL(_savevr_27) +	li	r11,-80 +	stvx	vr27,r11,r0 +_GLOBAL(_savevr_28) +	li	r11,-64 +	stvx	vr28,r11,r0 +_GLOBAL(_savevr_29) +	li	r11,-48 +	stvx	vr29,r11,r0 +_GLOBAL(_savevr_30) +	li	r11,-32 +	stvx	vr30,r11,r0 +_GLOBAL(_savevr_31) +	li	r11,-16 +	stvx	vr31,r11,r0 +	blr + +_GLOBAL(_restvr_20) +	li	r11,-192 +	lvx	vr20,r11,r0 +_GLOBAL(_restvr_21) +	li	r11,-176 +	lvx	vr21,r11,r0 +_GLOBAL(_restvr_22) +	li	r11,-160 +	lvx	vr22,r11,r0 +_GLOBAL(_restvr_23) +	li	r11,-144 +	lvx	vr23,r11,r0 +_GLOBAL(_restvr_24) +	li	r11,-128 +	lvx	vr24,r11,r0 +_GLOBAL(_restvr_25) +	li	r11,-112 +	lvx	vr25,r11,r0 +_GLOBAL(_restvr_26) +	li	r11,-96 +	lvx	vr26,r11,r0 +_GLOBAL(_restvr_27) +	li	r11,-80 +	lvx	vr27,r11,r0 +_GLOBAL(_restvr_28) +	li	r11,-64 +	lvx	vr28,r11,r0 +_GLOBAL(_restvr_29) +	li	r11,-48 +	lvx	vr29,r11,r0 +_GLOBAL(_restvr_30) +	li	r11,-32 +	lvx	vr30,r11,r0 +_GLOBAL(_restvr_31) +	li	r11,-16 +	lvx	vr31,r11,r0 +	blr + +#endif /* CONFIG_ALTIVEC */ +  #else /* CONFIG_PPC64 */  	.section ".text.save.restore","ax",@progbits @@ -356,6 +437,111 @@ _restgpr0_31:  	mtlr	r0  	blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area.  */ + +.globl	_savevr_20 +_savevr_20: +	li	r12,-192 +	stvx	vr20,r12,r0 +.globl	_savevr_21 +_savevr_21: +	li	r12,-176 +	stvx	vr21,r12,r0 +.globl	_savevr_22 +_savevr_22: +	li	r12,-160 +	stvx	vr22,r12,r0 +.globl	_savevr_23 +_savevr_23: +	li	r12,-144 +	stvx	vr23,r12,r0 +.globl	_savevr_24 +_savevr_24: +	li	r12,-128 +	stvx	vr24,r12,r0 +.globl	_savevr_25 +_savevr_25: +	li	r12,-112 +	stvx	vr25,r12,r0 +.globl	_savevr_26 +_savevr_26: +	li	r12,-96 +	stvx	vr26,r12,r0 +.globl	_savevr_27 +_savevr_27: +	li	r12,-80 +	stvx	vr27,r12,r0 +.globl	_savevr_28 +_savevr_28: +	li	r12,-64 +	stvx	vr28,r12,r0 +.globl	_savevr_29 +_savevr_29: +	li	r12,-48 +	stvx	vr29,r12,r0 +.globl	_savevr_30 +_savevr_30: +	li	r12,-32 +	stvx	vr30,r12,r0 +.globl	_savevr_31 +_savevr_31: +	li	r12,-16 +	stvx	vr31,r12,r0 +	blr + +.globl	_restvr_20 +_restvr_20: +	li	r12,-192 +	lvx	vr20,r12,r0 +.globl	_restvr_21 +_restvr_21: +	li	r12,-176 +	lvx	vr21,r12,r0 +.globl	_restvr_22 +_restvr_22: +	li	r12,-160 +	lvx	vr22,r12,r0 +.globl	_restvr_23 +_restvr_23: +	li	r12,-144 +	lvx	vr23,r12,r0 +.globl	_restvr_24 +_restvr_24: +	li	r12,-128 +	lvx	vr24,r12,r0 +.globl	_restvr_25 +_restvr_25: +	li	r12,-112 +	lvx	vr25,r12,r0 +.globl	_restvr_26 +_restvr_26: +	li	r12,-96 +	lvx	vr26,r12,r0 +.globl	_restvr_27 +_restvr_27: +	li	r12,-80 +	lvx	vr27,r12,r0 +.globl	_restvr_28 +_restvr_28: +	li	r12,-64 +	lvx	vr28,r12,r0 +.globl	_restvr_29 +_restvr_29: +	li	r12,-48 +	lvx	vr29,r12,r0 +.globl	_restvr_30 +_restvr_30: +	li	r12,-32 +	lvx	vr30,r12,r0 +.globl	_restvr_31 +_restvr_31: +	li	r12,-16 +	lvx	vr31,r12,r0 +	blr + +#endif /* CONFIG_ALTIVEC */ +  #endif /* CONFIG_PPC64 */  #endif diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 9b96ff2ecd4..19e66001a4f 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -24,7 +24,7 @@  _GLOBAL(__arch_hweight8)  BEGIN_FTR_SECTION -	b .__sw_hweight8 +	b __sw_hweight8  	nop  	nop  FTR_SECTION_ELSE @@ -35,7 +35,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)  _GLOBAL(__arch_hweight16)  BEGIN_FTR_SECTION -	b .__sw_hweight16 +	b __sw_hweight16  	nop  	nop  	nop @@ -57,7 +57,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)  _GLOBAL(__arch_hweight32)  BEGIN_FTR_SECTION -	b .__sw_hweight32 +	b __sw_hweight32  	nop  	nop  	nop @@ -82,7 +82,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)  _GLOBAL(__arch_hweight64)  BEGIN_FTR_SECTION -	b .__sw_hweight64 +	b __sw_hweight64  	nop  	nop  	nop diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index f4fcb0bc656..43435c6892f 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,10 +77,10 @@ _GLOBAL(memset)  	stb	r4,0(r6)  	blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove)  	cmplw	0,r3,r4 -	bgt	.backwards_memcpy -	b	.memcpy +	bgt	backwards_memcpy +	b	memcpy  _GLOBAL(backwards_memcpy)  	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */ diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index d2bbbc8d7dc..32a06ec395d 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -10,12 +10,29 @@  #include <asm/ppc_asm.h>  	.align	7 -_GLOBAL(memcpy) +_GLOBAL_TOC(memcpy)  BEGIN_FTR_SECTION -	std	r3,48(r1)	/* save destination pointer for return value */ +#ifdef __LITTLE_ENDIAN__ +	cmpdi	cr7,r5,0 +#else +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */ +#endif  FTR_SECTION_ELSE +#ifndef SELFTEST  	b	memcpy_power7 +#endif  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ +	/* dumb little-endian memcpy that will get replaced at runtime */ +	addi r9,r3,-1 +	addi r4,r4,-1 +	beqlr cr7 +	mtctr r5 +1:	lbzu r10,1(r4) +	stbu r10,1(r9) +	bdnz 1b +	blr +#else  	PPC_MTOCRF(0x01,r5)  	cmpldi	cr1,r5,16  	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry @@ -71,7 +88,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  2:	bf	cr7*4+3,3f  	lbz	r9,8(r4)  	stb	r9,0(r3) -3:	ld	r3,48(r1)	/* return dest pointer */ +3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */  	blr  .Lsrc_unaligned: @@ -154,7 +171,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  2:	bf	cr7*4+3,3f  	rotldi	r9,r9,8  	stb	r9,0(r3) -3:	ld	r3,48(r1)	/* return dest pointer */ +3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */  	blr  .Ldst_unaligned: @@ -199,5 +216,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)  3:	bf	cr7*4+3,4f  	lbz	r0,0(r4)  	stb	r0,0(r3) -4:	ld	r3,48(r1)	/* return dest pointer */ +4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */  	blr +#endif diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 0663630baf3..2ff5c142f87 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -20,18 +20,27 @@  #include <asm/ppc_asm.h>  _GLOBAL(memcpy_power7) + +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC +#endif +  #ifdef CONFIG_ALTIVEC  	cmpldi	r5,16  	cmpldi	cr1,r5,4096 -	std	r3,48(r1) +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)  	blt	.Lshort_copy  	bgt	cr1,.Lvmx_copy  #else  	cmpldi	r5,16 -	std	r3,48(r1) +	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)  	blt	.Lshort_copy  #endif @@ -207,7 +216,7 @@ _GLOBAL(memcpy_power7)  	lbz	r0,0(r4)  	stb	r0,0(r3) -15:	ld	r3,48(r1) +15:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)  	blr  .Lunwind_stack_nonvmx_copy: @@ -217,16 +226,16 @@ _GLOBAL(memcpy_power7)  #ifdef CONFIG_ALTIVEC  .Lvmx_copy:  	mflr	r0 -	std	r4,56(r1) -	std	r5,64(r1) +	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1) +	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)  	std	r0,16(r1)  	stdu	r1,-STACKFRAMESIZE(r1) -	bl	.enter_vmx_copy +	bl	enter_vmx_copy  	cmpwi	cr1,r3,0  	ld	r0,STACKFRAMESIZE+16(r1) -	ld	r3,STACKFRAMESIZE+48(r1) -	ld	r4,STACKFRAMESIZE+56(r1) -	ld	r5,STACKFRAMESIZE+64(r1) +	ld	r3,STK_REG(R31)(r1) +	ld	r4,STK_REG(R30)(r1) +	ld	r5,STK_REG(R29)(r1)  	mtlr	r0  	/* @@ -438,8 +447,8 @@ _GLOBAL(memcpy_power7)  	stb	r0,0(r3)  15:	addi	r1,r1,STACKFRAMESIZE -	ld	r3,48(r1) -	b	.exit_vmx_copy		/* tail call optimise */ +	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	b	exit_vmx_copy		/* tail call optimise */  .Lvmx_unaligned_copy:  	/* Get the destination 16B aligned */ @@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)  	li	r10,32  	li	r11,48 -	lvsl	vr16,0,r4	/* Setup permute control vector */ +	LVS(vr16,0,r4)		/* Setup permute control vector */  	lvx	vr0,0,r4  	addi	r4,r4,16  	bf	cr7*4+3,5f  	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	addi	r4,r4,16  	stvx	vr8,r0,r3  	addi	r3,r3,16 @@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)  5:	bf	cr7*4+2,6f  	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	lvx	vr0,r4,r9 -	vperm	vr9,vr1,vr0,vr16 +	VPERM(vr9,vr1,vr0,vr16)  	addi	r4,r4,32  	stvx	vr8,r0,r3  	stvx	vr9,r3,r9 @@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)  6:	bf	cr7*4+1,7f  	lvx	vr3,r0,r4 -	vperm	vr8,vr0,vr3,vr16 +	VPERM(vr8,vr0,vr3,vr16)  	lvx	vr2,r4,r9 -	vperm	vr9,vr3,vr2,vr16 +	VPERM(vr9,vr3,vr2,vr16)  	lvx	vr1,r4,r10 -	vperm	vr10,vr2,vr1,vr16 +	VPERM(vr10,vr2,vr1,vr16)  	lvx	vr0,r4,r11 -	vperm	vr11,vr1,vr0,vr16 +	VPERM(vr11,vr1,vr0,vr16)  	addi	r4,r4,64  	stvx	vr8,r0,r3  	stvx	vr9,r3,r9 @@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)  	.align	5  8:  	lvx	vr7,r0,r4 -	vperm	vr8,vr0,vr7,vr16 +	VPERM(vr8,vr0,vr7,vr16)  	lvx	vr6,r4,r9 -	vperm	vr9,vr7,vr6,vr16 +	VPERM(vr9,vr7,vr6,vr16)  	lvx	vr5,r4,r10 -	vperm	vr10,vr6,vr5,vr16 +	VPERM(vr10,vr6,vr5,vr16)  	lvx	vr4,r4,r11 -	vperm	vr11,vr5,vr4,vr16 +	VPERM(vr11,vr5,vr4,vr16)  	lvx	vr3,r4,r12 -	vperm	vr12,vr4,vr3,vr16 +	VPERM(vr12,vr4,vr3,vr16)  	lvx	vr2,r4,r14 -	vperm	vr13,vr3,vr2,vr16 +	VPERM(vr13,vr3,vr2,vr16)  	lvx	vr1,r4,r15 -	vperm	vr14,vr2,vr1,vr16 +	VPERM(vr14,vr2,vr1,vr16)  	lvx	vr0,r4,r16 -	vperm	vr15,vr1,vr0,vr16 +	VPERM(vr15,vr1,vr0,vr16)  	addi	r4,r4,128  	stvx	vr8,r0,r3  	stvx	vr9,r3,r9 @@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)  	bf	cr7*4+1,9f  	lvx	vr3,r0,r4 -	vperm	vr8,vr0,vr3,vr16 +	VPERM(vr8,vr0,vr3,vr16)  	lvx	vr2,r4,r9 -	vperm	vr9,vr3,vr2,vr16 +	VPERM(vr9,vr3,vr2,vr16)  	lvx	vr1,r4,r10 -	vperm	vr10,vr2,vr1,vr16 +	VPERM(vr10,vr2,vr1,vr16)  	lvx	vr0,r4,r11 -	vperm	vr11,vr1,vr0,vr16 +	VPERM(vr11,vr1,vr0,vr16)  	addi	r4,r4,64  	stvx	vr8,r0,r3  	stvx	vr9,r3,r9 @@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)  9:	bf	cr7*4+2,10f  	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	lvx	vr0,r4,r9 -	vperm	vr9,vr1,vr0,vr16 +	VPERM(vr9,vr1,vr0,vr16)  	addi	r4,r4,32  	stvx	vr8,r0,r3  	stvx	vr9,r3,r9 @@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)  10:	bf	cr7*4+3,11f  	lvx	vr1,r0,r4 -	vperm	vr8,vr0,vr1,vr16 +	VPERM(vr8,vr0,vr1,vr16)  	addi	r4,r4,16  	stvx	vr8,r0,r3  	addi	r3,r3,16 @@ -642,6 +651,6 @@ _GLOBAL(memcpy_power7)  	stb	r0,0(r3)  15:	addi	r1,r1,STACKFRAMESIZE -	ld	r3,48(r1) -	b	.exit_vmx_copy		/* tail call optimise */ +	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1) +	b	exit_vmx_copy		/* tail call optimise */  #endif /* CONFiG_ALTIVEC */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb86..5c09f365c84 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -212,11 +212,19 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,  {  	int err;  	unsigned long x, b, c; +#ifdef __LITTLE_ENDIAN__ +	int len = nb; /* save a copy of the length for byte reversal */ +#endif  	/* unaligned, do this in pieces */  	x = 0;  	for (; nb > 0; nb -= c) { +#ifdef __LITTLE_ENDIAN__ +		c = 1; +#endif +#ifdef __BIG_ENDIAN__  		c = max_align(ea); +#endif  		if (c > nb)  			c = max_align(nb);  		err = read_mem_aligned(&b, ea, c); @@ -225,7 +233,24 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,  		x = (x << (8 * c)) + b;  		ea += c;  	} +#ifdef __LITTLE_ENDIAN__ +	switch (len) { +	case 2: +		*dest = byterev_2(x); +		break; +	case 4: +		*dest = byterev_4(x); +		break; +#ifdef __powerpc64__ +	case 8: +		*dest = byterev_8(x); +		break; +#endif +	} +#endif +#ifdef __BIG_ENDIAN__  	*dest = x; +#endif  	return 0;  } @@ -273,9 +298,29 @@ static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,  	int err;  	unsigned long c; +#ifdef __LITTLE_ENDIAN__ +	switch (nb) { +	case 2: +		val = byterev_2(val); +		break; +	case 4: +		val = byterev_4(val); +		break; +#ifdef __powerpc64__ +	case 8: +		val = byterev_8(val); +		break; +#endif +	} +#endif  	/* unaligned or little-endian, do this in pieces */  	for (; nb > 0; nb -= c) { +#ifdef __LITTLE_ENDIAN__ +		c = 1; +#endif +#ifdef __BIG_ENDIAN__  		c = max_align(ea); +#endif  		if (c > nb)  			c = max_align(nb);  		err = write_mem_aligned(val >> (nb - c) * 8, ea, c); @@ -310,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),  				struct pt_regs *regs)  {  	int err; -	unsigned long val[sizeof(double) / sizeof(long)]; +	union { +		double dbl; +		unsigned long ul[2]; +		struct { +#ifdef __BIG_ENDIAN__ +			unsigned _pad_; +			unsigned word; +#endif +#ifdef __LITTLE_ENDIAN__ +			unsigned word; +			unsigned _pad_; +#endif +		} single; +	} data;  	unsigned long ptr;  	if (!address_ok(regs, ea, nb))  		return -EFAULT;  	if ((ea & 3) == 0)  		return (*func)(rn, ea); -	ptr = (unsigned long) &val[0]; +	ptr = (unsigned long) &data.ul;  	if (sizeof(unsigned long) == 8 || nb == 4) { -		err = read_mem_unaligned(&val[0], ea, nb, regs); -		ptr += sizeof(unsigned long) - nb; +		err = read_mem_unaligned(&data.ul[0], ea, nb, regs); +		if (nb == 4) +			ptr = (unsigned long)&(data.single.word);  	} else {  		/* reading a double on 32-bit */ -		err = read_mem_unaligned(&val[0], ea, 4, regs); +		err = read_mem_unaligned(&data.ul[0], ea, 4, regs);  		if (!err) -			err = read_mem_unaligned(&val[1], ea + 4, 4, regs); +			err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);  	}  	if (err)  		return err; @@ -337,28 +396,42 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),  				 struct pt_regs *regs)  {  	int err; -	unsigned long val[sizeof(double) / sizeof(long)]; +	union { +		double dbl; +		unsigned long ul[2]; +		struct { +#ifdef __BIG_ENDIAN__ +			unsigned _pad_; +			unsigned word; +#endif +#ifdef __LITTLE_ENDIAN__ +			unsigned word; +			unsigned _pad_; +#endif +		} single; +	} data;  	unsigned long ptr;  	if (!address_ok(regs, ea, nb))  		return -EFAULT;  	if ((ea & 3) == 0)  		return (*func)(rn, ea); -	ptr = (unsigned long) &val[0]; +	ptr = (unsigned long) &data.ul[0];  	if (sizeof(unsigned long) == 8 || nb == 4) { -		ptr += sizeof(unsigned long) - nb; +		if (nb == 4) +			ptr = (unsigned long)&(data.single.word);  		err = (*func)(rn, ptr);  		if (err)  			return err; -		err = write_mem_unaligned(val[0], ea, nb, regs); +		err = write_mem_unaligned(data.ul[0], ea, nb, regs);  	} else {  		/* writing a double on 32-bit */  		err = (*func)(rn, ptr);  		if (err)  			return err; -		err = write_mem_unaligned(val[0], ea, 4, regs); +		err = write_mem_unaligned(data.ul[0], ea, 4, regs);  		if (!err) -			err = write_mem_unaligned(val[1], ea + 4, 4, regs); +			err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);  	}  	return err;  } @@ -1125,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  			sh = regs->gpr[rb] & 0x3f;  			ival = (signed int) regs->gpr[rd];  			regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); -			if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) +			if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))  				regs->xer |= XER_CA;  			else  				regs->xer &= ~XER_CA; @@ -1135,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  			sh = rb;  			ival = (signed int) regs->gpr[rd];  			regs->gpr[ra] = ival >> sh; -			if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) +			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)  				regs->xer |= XER_CA;  			else  				regs->xer &= ~XER_CA; @@ -1143,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  #ifdef __powerpc64__  		case 27:	/* sld */ -			sh = regs->gpr[rd] & 0x7f; +			sh = regs->gpr[rb] & 0x7f;  			if (sh < 64)  				regs->gpr[ra] = regs->gpr[rd] << sh;  			else @@ -1162,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  			sh = regs->gpr[rb] & 0x7f;  			ival = (signed long int) regs->gpr[rd];  			regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); -			if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) +			if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))  				regs->xer |= XER_CA;  			else  				regs->xer &= ~XER_CA; @@ -1173,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  			sh = rb | ((instr & 2) << 4);  			ival = (signed long int) regs->gpr[rd];  			regs->gpr[ra] = ival >> sh; -			if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) +			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)  				regs->xer |= XER_CA;  			else  				regs->xer &= ~XER_CA; @@ -1397,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  				regs->gpr[rd] = byterev_4(val);  			goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU  		case 535:	/* lfsx */  		case 567:	/* lfsux */  			if (!(regs->msr & MSR_FP)) @@ -1505,6 +1578,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  		 */  		if ((ra == 1) && !(regs->msr & MSR_PR) \  			&& (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32  			/*  			 * Check if we will touch kernel sack overflow  			 */ @@ -1513,7 +1587,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)  				err = -EINVAL;  				break;  			} - +#endif /* CONFIG_PPC32 */  			/*  			 * Check if we already set since that means we'll  			 * lose the previous value. diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 3b1e48049fa..7bd9549a90a 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -77,7 +77,7 @@ err3;	stb	r0,0(r3)  	mr	r3,r4  	blr -_GLOBAL(__clear_user) +_GLOBAL_TOC(__clear_user)  	cmpdi	r4,32  	neg	r6,r3  	li	r0,0 diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c new file mode 100644 index 00000000000..e905f7c2ea7 --- /dev/null +++ b/arch/powerpc/lib/xor_vmx.c @@ -0,0 +1,177 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <altivec.h> + +#include <linux/preempt.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <asm/switch_to.h> + +typedef vector signed char unative_t; + +#define DEFINE(V)				\ +	unative_t *V = (unative_t *)V##_in;	\ +	unative_t V##_0, V##_1, V##_2, V##_3 + +#define LOAD(V)			\ +	do {			\ +		V##_0 = V[0];	\ +		V##_1 = V[1];	\ +		V##_2 = V[2];	\ +		V##_3 = V[3];	\ +	} while (0) + +#define STORE(V)		\ +	do {			\ +		V[0] = V##_0;	\ +		V[1] = V##_1;	\ +		V[2] = V##_2;	\ +		V[3] = V##_3;	\ +	} while (0) + +#define XOR(V1, V2)					\ +	do {						\ +		V1##_0 = vec_xor(V1##_0, V2##_0);	\ +		V1##_1 = vec_xor(V1##_1, V2##_1);	\ +		V1##_2 = vec_xor(V1##_2, V2##_2);	\ +		V1##_3 = vec_xor(V1##_3, V2##_3);	\ +	} while (0) + +void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, +		   unsigned long *v2_in) +{ +	DEFINE(v1); +	DEFINE(v2); +	unsigned long lines = bytes / (sizeof(unative_t)) / 4; + +	preempt_disable(); +	enable_kernel_altivec(); + +	do { +		LOAD(v1); +		LOAD(v2); +		XOR(v1, v2); +		STORE(v1); + +		v1 += 4; +		v2 += 4; +	} while (--lines > 0); + +	preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_2); + +void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, +		   unsigned long *v2_in, unsigned long *v3_in) +{ +	DEFINE(v1); +	DEFINE(v2); +	DEFINE(v3); +	unsigned long lines = bytes / (sizeof(unative_t)) / 4; + +	preempt_disable(); +	enable_kernel_altivec(); + +	do { +		LOAD(v1); +		LOAD(v2); +		LOAD(v3); +		XOR(v1, v2); +		XOR(v1, v3); +		STORE(v1); + +		v1 += 4; +		v2 += 4; +		v3 += 4; +	} while (--lines > 0); + +	preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_3); + +void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, +		   unsigned long *v2_in, unsigned long *v3_in, +		   unsigned long *v4_in) +{ +	DEFINE(v1); +	DEFINE(v2); +	DEFINE(v3); +	DEFINE(v4); +	unsigned long lines = bytes / (sizeof(unative_t)) / 4; + +	preempt_disable(); +	enable_kernel_altivec(); + +	do { +		LOAD(v1); +		LOAD(v2); +		LOAD(v3); +		LOAD(v4); +		XOR(v1, v2); +		XOR(v3, v4); +		XOR(v1, v3); +		STORE(v1); + +		v1 += 4; +		v2 += 4; +		v3 += 4; +		v4 += 4; +	} while (--lines > 0); + +	preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_4); + +void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, +		   unsigned long *v2_in, unsigned long *v3_in, +		   unsigned long *v4_in, unsigned long *v5_in) +{ +	DEFINE(v1); +	DEFINE(v2); +	DEFINE(v3); +	DEFINE(v4); +	DEFINE(v5); +	unsigned long lines = bytes / (sizeof(unative_t)) / 4; + +	preempt_disable(); +	enable_kernel_altivec(); + +	do { +		LOAD(v1); +		LOAD(v2); +		LOAD(v3); +		LOAD(v4); +		LOAD(v5); +		XOR(v1, v2); +		XOR(v3, v4); +		XOR(v1, v5); +		XOR(v1, v3); +		STORE(v1); + +		v1 += 4; +		v2 += 4; +		v3 += 4; +		v4 += 4; +		v5 += 4; +	} while (--lines > 0); + +	preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_5);  | 
