diff options
Diffstat (limited to 'arch/powerpc/lib')
| -rw-r--r-- | arch/powerpc/lib/Makefile | 25 | ||||
| -rw-r--r-- | arch/powerpc/lib/checksum_64.S | 58 | ||||
| -rw-r--r-- | arch/powerpc/lib/code-patching.c | 15 | ||||
| -rw-r--r-- | arch/powerpc/lib/copypage_64.S | 4 | ||||
| -rw-r--r-- | arch/powerpc/lib/copypage_power7.S | 31 | ||||
| -rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 55 | ||||
| -rw-r--r-- | arch/powerpc/lib/copyuser_power7.S | 98 | ||||
| -rw-r--r-- | arch/powerpc/lib/crtsavres.S | 186 | ||||
| -rw-r--r-- | arch/powerpc/lib/hweight_64.S | 8 | ||||
| -rw-r--r-- | arch/powerpc/lib/locks.c | 4 | ||||
| -rw-r--r-- | arch/powerpc/lib/mem_64.S | 6 | ||||
| -rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 28 | ||||
| -rw-r--r-- | arch/powerpc/lib/memcpy_power7.S | 81 | ||||
| -rw-r--r-- | arch/powerpc/lib/sstep.c | 122 | ||||
| -rw-r--r-- | arch/powerpc/lib/string_64.S | 2 | ||||
| -rw-r--r-- | arch/powerpc/lib/xor_vmx.c | 177 |
16 files changed, 725 insertions, 175 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 746e0c895cd..59fa2de9546 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -4,24 +4,28 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ccflags-$(CONFIG_PPC64) := -mno-minimal-toc +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ - checksum_$(CONFIG_WORD_SIZE).o crtsavres.o + crtsavres.o obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - memcpy_64.o usercopy_64.o mem_64.o string.o \ - checksum_wrappers_64.o hweight_64.o \ - copyuser_power7.o string_64.o copypage_power7.o \ - memcpy_power7.o -obj-$(CONFIG_XMON) += sstep.o ldstfp.o -obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o -obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o + usercopy_64.o mem_64.o string.o \ + hweight_64.o \ + copyuser_power7.o string_64.o copypage_power7.o +ifeq ($(CONFIG_GENERIC_CSUM),) +obj-y += checksum_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_PPC64) += checksum_wrappers_64.o +endif + +obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o + +obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o @@ -33,3 +37,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-y += code-patching.o obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o + +obj-$(CONFIG_ALTIVEC) += xor_vmx.o +CFLAGS_xor_vmx.o += -maltivec -mabi=altivec diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 167f72555d6..57a07206505 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -226,19 +226,35 @@ _GLOBAL(csum_partial) blr - .macro source + .macro srcnr 100: .section __ex_table,"a" .align 3 - .llong 100b,.Lsrc_error + .llong 100b,.Lsrc_error_nr .previous .endm - .macro dest + .macro source +150: + .section __ex_table,"a" + .align 3 + .llong 150b,.Lsrc_error + .previous + .endm + + .macro dstnr 200: .section __ex_table,"a" .align 3 - .llong 200b,.Ldest_error + .llong 200b,.Ldest_error_nr + .previous + .endm + + .macro dest +250: + .section __ex_table,"a" + .align 3 + .llong 250b,.Ldest_error .previous .endm @@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic) rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ beq .Lcopy_aligned - li r7,4 - sub r6,r7,r6 + li r9,4 + sub r6,r9,r6 mtctr r6 1: -source; lhz r6,0(r3) /* align to doubleword */ +srcnr; lhz r6,0(r3) /* align to doubleword */ subi r5,r5,2 addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 bdnz 1b @@ -392,10 +408,10 @@ dest; std r16,56(r4) mtctr r6 3: -source; ld r6,0(r3) +srcnr; ld r6,0(r3) addi r3,r3,8 adde r0,r0,r6 -dest; std r6,0(r4) +dstnr; std r6,0(r4) addi r4,r4,8 bdnz 3b @@ -405,10 +421,10 @@ dest; std r6,0(r4) srdi. r6,r5,2 beq .Lcopy_tail_halfword -source; lwz r6,0(r3) +srcnr; lwz r6,0(r3) addi r3,r3,4 adde r0,r0,r6 -dest; stw r6,0(r4) +dstnr; stw r6,0(r4) addi r4,r4,4 subi r5,r5,4 @@ -416,10 +432,10 @@ dest; stw r6,0(r4) srdi. r6,r5,1 beq .Lcopy_tail_byte -source; lhz r6,0(r3) +srcnr; lhz r6,0(r3) addi r3,r3,2 adde r0,r0,r6 -dest; sth r6,0(r4) +dstnr; sth r6,0(r4) addi r4,r4,2 subi r5,r5,2 @@ -427,10 +443,10 @@ dest; sth r6,0(r4) andi. r6,r5,1 beq .Lcopy_finish -source; lbz r6,0(r3) +srcnr; lbz r6,0(r3) sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 -dest; stb r6,0(r4) +dstnr; stb r6,0(r4) .Lcopy_finish: addze r0,r0 /* add in final carry */ @@ -440,6 +456,11 @@ dest; stb r6,0(r4) blr .Lsrc_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Lsrc_error_nr: cmpdi 0,r7,0 beqlr li r6,-EFAULT @@ -447,6 +468,11 @@ dest; stb r6,0(r4) blr .Ldest_error: + ld r14,STK_REG(R14)(r1) + ld r15,STK_REG(R15)(r1) + ld r16,STK_REG(R16)(r1) + addi r1,r1,STACKFRAMESIZE +.Ldest_error_nr: cmpdi 0,r8,0 beqlr li r6,-EFAULT diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 17e5b236431..d5edbeb8eb8 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -159,6 +159,21 @@ unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) return 0; } +#ifdef CONFIG_PPC_BOOK3E_64 +void __patch_exception(int exc, unsigned long addr) +{ + extern unsigned int interrupt_base_book3e; + unsigned int *ibase = &interrupt_base_book3e; + + /* Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one + */ + + patch_branch(ibase + (exc / 4) + 1, addr, 0); +} +#endif #ifdef CONFIG_CODE_PATCHING_SELFTEST diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 9f9434a8526..a3c4dc4defd 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -16,11 +16,11 @@ PPC64_CACHES: .tc ppc64_caches[TC],ppc64_caches .section ".text" -_GLOBAL(copy_page) +_GLOBAL_TOC(copy_page) BEGIN_FTR_SECTION lis r5,PAGE_SIZE@h FTR_SECTION_ELSE - b .copypage_power7 + b copypage_power7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) ori r5,r5,PAGE_SIZE@l BEGIN_FTR_SECTION diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 0ef75bf0695..d7dafb3777a 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -28,13 +28,14 @@ _GLOBAL(copypage_power7) * aligned we don't need to clear the bottom 7 bits of either * address. */ - ori r9,r3,1 /* stream=1 */ + ori r9,r3,1 /* stream=1 => to */ #ifdef CONFIG_PPC_64K_PAGES - lis r7,0x0E01 /* depth=7, units=512 */ + lis r7,0x0E01 /* depth=7 + * units/cachelines=512 */ #else lis r7,0x0E00 /* depth=7 */ - ori r7,r7,0x1000 /* units=32 */ + ori r7,r7,0x1000 /* units/cachelines=32 */ #endif ori r10,r7,1 /* stream=1 */ @@ -43,25 +44,27 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" - dcbt r0,r4,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r4,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC mflr r0 - std r3,48(r1) - std r4,56(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_copy + bl enter_vmx_copy cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) mtlr r0 li r0,(PAGE_SIZE/128) @@ -100,7 +103,7 @@ _GLOBAL(copypage_power7) addi r3,r3,128 bdnz 1b - b .exit_vmx_copy /* tail call optimise */ + b exit_vmx_copy /* tail call optimise */ #else li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a5901490..0860ee46013 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,8 +9,16 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define sLd sld /* Shift towards low-numbered address. */ +#define sHd srd /* Shift towards high-numbered address. */ +#else +#define sLd srd /* Shift towards low-numbered address. */ +#define sHd sld /* Shift towards high-numbered address. */ +#endif + .align 7 -_GLOBAL(__copy_tofrom_user) +_GLOBAL_TOC(__copy_tofrom_user) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE @@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 25: ld r0,8(r4) - sld r6,r9,r10 + sLd r6,r9,r10 26: ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27: ld r0,8(r4) @@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 29: ldu r9,8(r4) - sld r8,r0,r10 + sLd r8,r0,r10 addi r3,r3,-8 blt cr6,5f 30: ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 + sHd r12,r9,r11 + sLd r6,r9,r10 31: ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addi r3,r3,16 beq cr6,78f 1: or r7,r7,r6 32: ld r0,8(r4) 76: std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 +2: sHd r12,r9,r11 + sLd r6,r9,r10 33: ldu r9,16(r4) or r12,r8,r12 77: stdu r7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz 1b 78: std r12,8(r3) or r7,r7,r6 79: std r7,16(r3) -5: srd r12,r9,r11 +5: sHd r12,r9,r11 or r12,r8,r12 80: std r12,24(r3) bne 6f @@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr 6: cmpwi cr1,r5,8 addi r3,r3,32 - sld r9,r9,r10 + sLd r9,r9,r10 ble cr1,7f 34: ld r0,8(r4) - srd r7,r0,r11 + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 +#endif 94: stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addi r3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 +#endif 95: sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addi r3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 +#endif 96: stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 0d24ff15f5f..c46c876ac96 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -19,6 +19,14 @@ */ #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB) lvsl VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB) lvsr VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC +#endif + .macro err1 100: .section __ex_table,"a" @@ -58,7 +66,7 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl .exit_vmx_usercopy + bl exit_vmx_usercopy ld r0,STACKFRAMESIZE+16(r1) mtlr r0 b .Lexit @@ -77,9 +85,9 @@ .Lexit: addi r1,r1,STACKFRAMESIZE .Ldo_err1: - ld r3,48(r1) - ld r4,56(r1) - ld r5,64(r1) + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1) b __copy_tofrom_user_base @@ -88,18 +96,18 @@ _GLOBAL(__copy_tofrom_user_power7) cmpldi r5,16 cmpldi cr1,r5,4096 - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) blt .Lshort_copy bgt cr1,.Lvmx_copy #else cmpldi r5,16 - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) blt .Lshort_copy #endif @@ -287,12 +295,12 @@ err1; stb r0,0(r3) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_usercopy + bl enter_vmx_usercopy cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) + ld r5,STK_REG(R29)(r1) mtlr r0 /* @@ -318,12 +326,14 @@ err1; stb r0,0(r3) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r6,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy @@ -504,7 +514,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b .exit_vmx_usercopy /* tail call optimise */ + b exit_vmx_usercopy /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -550,13 +560,13 @@ err3; stw r7,4(r3) li r10,32 li r11,48 - lvsl vr16,0,r4 /* Setup permute control vector */ + LVS(vr16,0,r4) /* Setup permute control vector */ err3; lvx vr0,0,r4 addi r4,r4,16 bf cr7*4+3,5f err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) addi r4,r4,16 err3; stvx vr8,r0,r3 addi r3,r3,16 @@ -564,9 +574,9 @@ err3; stvx vr8,r0,r3 5: bf cr7*4+2,6f err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) err3; lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + VPERM(vr9,vr1,vr0,vr16) addi r4,r4,32 err3; stvx vr8,r0,r3 err3; stvx vr9,r3,r9 @@ -574,13 +584,13 @@ err3; stvx vr9,r3,r9 6: bf cr7*4+1,7f err3; lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 + VPERM(vr8,vr0,vr3,vr16) err3; lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 + VPERM(vr9,vr3,vr2,vr16) err3; lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 + VPERM(vr10,vr2,vr1,vr16) err3; lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + VPERM(vr11,vr1,vr0,vr16) addi r4,r4,64 err3; stvx vr8,r0,r3 err3; stvx vr9,r3,r9 @@ -609,21 +619,21 @@ err3; stvx vr11,r3,r11 .align 5 8: err4; lvx vr7,r0,r4 - vperm vr8,vr0,vr7,vr16 + VPERM(vr8,vr0,vr7,vr16) err4; lvx vr6,r4,r9 - vperm vr9,vr7,vr6,vr16 + VPERM(vr9,vr7,vr6,vr16) err4; lvx vr5,r4,r10 - vperm vr10,vr6,vr5,vr16 + VPERM(vr10,vr6,vr5,vr16) err4; lvx vr4,r4,r11 - vperm vr11,vr5,vr4,vr16 + VPERM(vr11,vr5,vr4,vr16) err4; lvx vr3,r4,r12 - vperm vr12,vr4,vr3,vr16 + VPERM(vr12,vr4,vr3,vr16) err4; lvx vr2,r4,r14 - vperm vr13,vr3,vr2,vr16 + VPERM(vr13,vr3,vr2,vr16) err4; lvx vr1,r4,r15 - vperm vr14,vr2,vr1,vr16 + VPERM(vr14,vr2,vr1,vr16) err4; lvx vr0,r4,r16 - vperm vr15,vr1,vr0,vr16 + VPERM(vr15,vr1,vr0,vr16) addi r4,r4,128 err4; stvx vr8,r0,r3 err4; stvx vr9,r3,r9 @@ -647,13 +657,13 @@ err4; stvx vr15,r3,r16 bf cr7*4+1,9f err3; lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 + VPERM(vr8,vr0,vr3,vr16) err3; lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 + VPERM(vr9,vr3,vr2,vr16) err3; lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 + VPERM(vr10,vr2,vr1,vr16) err3; lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + VPERM(vr11,vr1,vr0,vr16) addi r4,r4,64 err3; stvx vr8,r0,r3 err3; stvx vr9,r3,r9 @@ -663,9 +673,9 @@ err3; stvx vr11,r3,r11 9: bf cr7*4+2,10f err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) err3; lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + VPERM(vr9,vr1,vr0,vr16) addi r4,r4,32 err3; stvx vr8,r0,r3 err3; stvx vr9,r3,r9 @@ -673,7 +683,7 @@ err3; stvx vr9,r3,r9 10: bf cr7*4+3,11f err3; lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) addi r4,r4,16 err3; stvx vr8,r0,r3 addi r3,r3,16 @@ -707,5 +717,5 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b .exit_vmx_usercopy /* tail call optimise */ + b exit_vmx_usercopy /* tail call optimise */ #endif /* CONFiG_ALTIVEC */ diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index b2c68ce139a..a5b30c71a8d 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -231,6 +231,87 @@ _GLOBAL(_rest32gpr_31_x) mr 1,11 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +_GLOBAL(_savevr_20) + li r11,-192 + stvx vr20,r11,r0 +_GLOBAL(_savevr_21) + li r11,-176 + stvx vr21,r11,r0 +_GLOBAL(_savevr_22) + li r11,-160 + stvx vr22,r11,r0 +_GLOBAL(_savevr_23) + li r11,-144 + stvx vr23,r11,r0 +_GLOBAL(_savevr_24) + li r11,-128 + stvx vr24,r11,r0 +_GLOBAL(_savevr_25) + li r11,-112 + stvx vr25,r11,r0 +_GLOBAL(_savevr_26) + li r11,-96 + stvx vr26,r11,r0 +_GLOBAL(_savevr_27) + li r11,-80 + stvx vr27,r11,r0 +_GLOBAL(_savevr_28) + li r11,-64 + stvx vr28,r11,r0 +_GLOBAL(_savevr_29) + li r11,-48 + stvx vr29,r11,r0 +_GLOBAL(_savevr_30) + li r11,-32 + stvx vr30,r11,r0 +_GLOBAL(_savevr_31) + li r11,-16 + stvx vr31,r11,r0 + blr + +_GLOBAL(_restvr_20) + li r11,-192 + lvx vr20,r11,r0 +_GLOBAL(_restvr_21) + li r11,-176 + lvx vr21,r11,r0 +_GLOBAL(_restvr_22) + li r11,-160 + lvx vr22,r11,r0 +_GLOBAL(_restvr_23) + li r11,-144 + lvx vr23,r11,r0 +_GLOBAL(_restvr_24) + li r11,-128 + lvx vr24,r11,r0 +_GLOBAL(_restvr_25) + li r11,-112 + lvx vr25,r11,r0 +_GLOBAL(_restvr_26) + li r11,-96 + lvx vr26,r11,r0 +_GLOBAL(_restvr_27) + li r11,-80 + lvx vr27,r11,r0 +_GLOBAL(_restvr_28) + li r11,-64 + lvx vr28,r11,r0 +_GLOBAL(_restvr_29) + li r11,-48 + lvx vr29,r11,r0 +_GLOBAL(_restvr_30) + li r11,-32 + lvx vr30,r11,r0 +_GLOBAL(_restvr_31) + li r11,-16 + lvx vr31,r11,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #else /* CONFIG_PPC64 */ .section ".text.save.restore","ax",@progbits @@ -356,6 +437,111 @@ _restgpr0_31: mtlr r0 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +.globl _savevr_20 +_savevr_20: + li r12,-192 + stvx vr20,r12,r0 +.globl _savevr_21 +_savevr_21: + li r12,-176 + stvx vr21,r12,r0 +.globl _savevr_22 +_savevr_22: + li r12,-160 + stvx vr22,r12,r0 +.globl _savevr_23 +_savevr_23: + li r12,-144 + stvx vr23,r12,r0 +.globl _savevr_24 +_savevr_24: + li r12,-128 + stvx vr24,r12,r0 +.globl _savevr_25 +_savevr_25: + li r12,-112 + stvx vr25,r12,r0 +.globl _savevr_26 +_savevr_26: + li r12,-96 + stvx vr26,r12,r0 +.globl _savevr_27 +_savevr_27: + li r12,-80 + stvx vr27,r12,r0 +.globl _savevr_28 +_savevr_28: + li r12,-64 + stvx vr28,r12,r0 +.globl _savevr_29 +_savevr_29: + li r12,-48 + stvx vr29,r12,r0 +.globl _savevr_30 +_savevr_30: + li r12,-32 + stvx vr30,r12,r0 +.globl _savevr_31 +_savevr_31: + li r12,-16 + stvx vr31,r12,r0 + blr + +.globl _restvr_20 +_restvr_20: + li r12,-192 + lvx vr20,r12,r0 +.globl _restvr_21 +_restvr_21: + li r12,-176 + lvx vr21,r12,r0 +.globl _restvr_22 +_restvr_22: + li r12,-160 + lvx vr22,r12,r0 +.globl _restvr_23 +_restvr_23: + li r12,-144 + lvx vr23,r12,r0 +.globl _restvr_24 +_restvr_24: + li r12,-128 + lvx vr24,r12,r0 +.globl _restvr_25 +_restvr_25: + li r12,-112 + lvx vr25,r12,r0 +.globl _restvr_26 +_restvr_26: + li r12,-96 + lvx vr26,r12,r0 +.globl _restvr_27 +_restvr_27: + li r12,-80 + lvx vr27,r12,r0 +.globl _restvr_28 +_restvr_28: + li r12,-64 + lvx vr28,r12,r0 +.globl _restvr_29 +_restvr_29: + li r12,-48 + lvx vr29,r12,r0 +.globl _restvr_30 +_restvr_30: + li r12,-32 + lvx vr30,r12,r0 +.globl _restvr_31 +_restvr_31: + li r12,-16 + lvx vr31,r12,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #endif /* CONFIG_PPC64 */ #endif diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 9b96ff2ecd4..19e66001a4f 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -24,7 +24,7 @@ _GLOBAL(__arch_hweight8) BEGIN_FTR_SECTION - b .__sw_hweight8 + b __sw_hweight8 nop nop FTR_SECTION_ELSE @@ -35,7 +35,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) _GLOBAL(__arch_hweight16) BEGIN_FTR_SECTION - b .__sw_hweight16 + b __sw_hweight16 nop nop nop @@ -57,7 +57,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) _GLOBAL(__arch_hweight32) BEGIN_FTR_SECTION - b .__sw_hweight32 + b __sw_hweight32 nop nop nop @@ -82,7 +82,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) _GLOBAL(__arch_hweight64) BEGIN_FTR_SECTION - b .__sw_hweight64 + b __sw_hweight64 nop nop nop diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index bb7cfecf278..0c9c8d7d073 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -32,7 +32,7 @@ void __spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca_of(holder_cpu).yield_count; + yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); @@ -57,7 +57,7 @@ void __rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = lppaca_of(holder_cpu).yield_count; + yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index f4fcb0bc656..43435c6892f 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,10 +77,10 @@ _GLOBAL(memset) stb r4,0(r6) blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove) cmplw 0,r3,r4 - bgt .backwards_memcpy - b .memcpy + bgt backwards_memcpy + b memcpy _GLOBAL(backwards_memcpy) rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index d2bbbc8d7dc..32a06ec395d 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -10,12 +10,29 @@ #include <asm/ppc_asm.h> .align 7 -_GLOBAL(memcpy) +_GLOBAL_TOC(memcpy) BEGIN_FTR_SECTION - std r3,48(r1) /* save destination pointer for return value */ +#ifdef __LITTLE_ENDIAN__ + cmpdi cr7,r5,0 +#else + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ +#endif FTR_SECTION_ELSE +#ifndef SELFTEST b memcpy_power7 +#endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ + /* dumb little-endian memcpy that will get replaced at runtime */ + addi r9,r3,-1 + addi r4,r4,-1 + beqlr cr7 + mtctr r5 +1: lbzu r10,1(r4) + stbu r10,1(r9) + bdnz 1b + blr +#else PPC_MTOCRF(0x01,r5) cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry @@ -71,7 +88,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f lbz r9,8(r4) stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr .Lsrc_unaligned: @@ -154,7 +171,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f rotldi r9,r9,8 stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr .Ldst_unaligned: @@ -199,5 +216,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: bf cr7*4+3,4f lbz r0,0(r4) stb r0,0(r3) -4: ld r3,48(r1) /* return dest pointer */ +4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ blr +#endif diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 0663630baf3..2ff5c142f87 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -20,18 +20,27 @@ #include <asm/ppc_asm.h> _GLOBAL(memcpy_power7) + +#ifdef __BIG_ENDIAN__ +#define LVS(VRT,RA,RB) lvsl VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC +#else +#define LVS(VRT,RA,RB) lvsr VRT,RA,RB +#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC +#endif + #ifdef CONFIG_ALTIVEC cmpldi r5,16 cmpldi cr1,r5,4096 - std r3,48(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) blt .Lshort_copy bgt cr1,.Lvmx_copy #else cmpldi r5,16 - std r3,48(r1) + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) blt .Lshort_copy #endif @@ -207,7 +216,7 @@ _GLOBAL(memcpy_power7) lbz r0,0(r4) stb r0,0(r3) -15: ld r3,48(r1) +15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) blr .Lunwind_stack_nonvmx_copy: @@ -217,16 +226,16 @@ _GLOBAL(memcpy_power7) #ifdef CONFIG_ALTIVEC .Lvmx_copy: mflr r0 - std r4,56(r1) - std r5,64(r1) + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl .enter_vmx_copy + bl enter_vmx_copy cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STK_REG(R31)(r1) + ld r4,STK_REG(R30)(r1) + ld r5,STK_REG(R29)(r1) mtlr r0 /* @@ -438,8 +447,8 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) - b .exit_vmx_copy /* tail call optimise */ + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + b exit_vmx_copy /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7) li r10,32 li r11,48 - lvsl vr16,0,r4 /* Setup permute control vector */ + LVS(vr16,0,r4) /* Setup permute control vector */ lvx vr0,0,r4 addi r4,r4,16 bf cr7*4+3,5f lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) addi r4,r4,16 stvx vr8,r0,r3 addi r3,r3,16 @@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7) 5: bf cr7*4+2,6f lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + VPERM(vr9,vr1,vr0,vr16) addi r4,r4,32 stvx vr8,r0,r3 stvx vr9,r3,r9 @@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7) 6: bf cr7*4+1,7f lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 + VPERM(vr8,vr0,vr3,vr16) lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 + VPERM(vr9,vr3,vr2,vr16) lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 + VPERM(vr10,vr2,vr1,vr16) lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + VPERM(vr11,vr1,vr0,vr16) addi r4,r4,64 stvx vr8,r0,r3 stvx vr9,r3,r9 @@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7) .align 5 8: lvx vr7,r0,r4 - vperm vr8,vr0,vr7,vr16 + VPERM(vr8,vr0,vr7,vr16) lvx vr6,r4,r9 - vperm vr9,vr7,vr6,vr16 + VPERM(vr9,vr7,vr6,vr16) lvx vr5,r4,r10 - vperm vr10,vr6,vr5,vr16 + VPERM(vr10,vr6,vr5,vr16) lvx vr4,r4,r11 - vperm vr11,vr5,vr4,vr16 + VPERM(vr11,vr5,vr4,vr16) lvx vr3,r4,r12 - vperm vr12,vr4,vr3,vr16 + VPERM(vr12,vr4,vr3,vr16) lvx vr2,r4,r14 - vperm vr13,vr3,vr2,vr16 + VPERM(vr13,vr3,vr2,vr16) lvx vr1,r4,r15 - vperm vr14,vr2,vr1,vr16 + VPERM(vr14,vr2,vr1,vr16) lvx vr0,r4,r16 - vperm vr15,vr1,vr0,vr16 + VPERM(vr15,vr1,vr0,vr16) addi r4,r4,128 stvx vr8,r0,r3 stvx vr9,r3,r9 @@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7) bf cr7*4+1,9f lvx vr3,r0,r4 - vperm vr8,vr0,vr3,vr16 + VPERM(vr8,vr0,vr3,vr16) lvx vr2,r4,r9 - vperm vr9,vr3,vr2,vr16 + VPERM(vr9,vr3,vr2,vr16) lvx vr1,r4,r10 - vperm vr10,vr2,vr1,vr16 + VPERM(vr10,vr2,vr1,vr16) lvx vr0,r4,r11 - vperm vr11,vr1,vr0,vr16 + VPERM(vr11,vr1,vr0,vr16) addi r4,r4,64 stvx vr8,r0,r3 stvx vr9,r3,r9 @@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7) 9: bf cr7*4+2,10f lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) lvx vr0,r4,r9 - vperm vr9,vr1,vr0,vr16 + VPERM(vr9,vr1,vr0,vr16) addi r4,r4,32 stvx vr8,r0,r3 stvx vr9,r3,r9 @@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7) 10: bf cr7*4+3,11f lvx vr1,r0,r4 - vperm vr8,vr0,vr1,vr16 + VPERM(vr8,vr0,vr1,vr16) addi r4,r4,16 stvx vr8,r0,r3 addi r3,r3,16 @@ -642,6 +651,6 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) - b .exit_vmx_copy /* tail call optimise */ + ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) + b exit_vmx_copy /* tail call optimise */ #endif /* CONFiG_ALTIVEC */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e15c521846c..5c09f365c84 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -100,8 +100,10 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ea = (signed short) instr; /* sign-extend */ if (ra) { ea += regs->gpr[ra]; - if (instr & 0x04000000) /* update forms */ - regs->gpr[ra] = ea; + if (instr & 0x04000000) { /* update forms */ + if ((instr>>26) != 47) /* stmw is not an update form */ + regs->gpr[ra] = ea; + } } return truncate_if_32bit(regs->msr, ea); @@ -210,11 +212,19 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea, { int err; unsigned long x, b, c; +#ifdef __LITTLE_ENDIAN__ + int len = nb; /* save a copy of the length for byte reversal */ +#endif /* unaligned, do this in pieces */ x = 0; for (; nb > 0; nb -= c) { +#ifdef __LITTLE_ENDIAN__ + c = 1; +#endif +#ifdef __BIG_ENDIAN__ c = max_align(ea); +#endif if (c > nb) c = max_align(nb); err = read_mem_aligned(&b, ea, c); @@ -223,7 +233,24 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea, x = (x << (8 * c)) + b; ea += c; } +#ifdef __LITTLE_ENDIAN__ + switch (len) { + case 2: + *dest = byterev_2(x); + break; + case 4: + *dest = byterev_4(x); + break; +#ifdef __powerpc64__ + case 8: + *dest = byterev_8(x); + break; +#endif + } +#endif +#ifdef __BIG_ENDIAN__ *dest = x; +#endif return 0; } @@ -271,15 +298,35 @@ static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea, int err; unsigned long c; +#ifdef __LITTLE_ENDIAN__ + switch (nb) { + case 2: + val = byterev_2(val); + break; + case 4: + val = byterev_4(val); + break; +#ifdef __powerpc64__ + case 8: + val = byterev_8(val); + break; +#endif + } +#endif /* unaligned or little-endian, do this in pieces */ for (; nb > 0; nb -= c) { +#ifdef __LITTLE_ENDIAN__ + c = 1; +#endif +#ifdef __BIG_ENDIAN__ c = max_align(ea); +#endif if (c > nb) c = max_align(nb); err = write_mem_aligned(val >> (nb - c) * 8, ea, c); if (err) return err; - ++ea; + ea += c; } return 0; } @@ -308,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long), struct pt_regs *regs) { int err; - unsigned long val[sizeof(double) / sizeof(long)]; + union { + double dbl; + unsigned long ul[2]; + struct { +#ifdef __BIG_ENDIAN__ + unsigned _pad_; + unsigned word; +#endif +#ifdef __LITTLE_ENDIAN__ + unsigned word; + unsigned _pad_; +#endif + } single; + } data; unsigned long ptr; if (!address_ok(regs, ea, nb)) return -EFAULT; if ((ea & 3) == 0) return (*func)(rn, ea); - ptr = (unsigned long) &val[0]; + ptr = (unsigned long) &data.ul; if (sizeof(unsigned long) == 8 || nb == 4) { - err = read_mem_unaligned(&val[0], ea, nb, regs); - ptr += sizeof(unsigned long) - nb; + err = read_mem_unaligned(&data.ul[0], ea, nb, regs); + if (nb == 4) + ptr = (unsigned long)&(data.single.word); } else { /* reading a double on 32-bit */ - err = read_mem_unaligned(&val[0], ea, 4, regs); + err = read_mem_unaligned(&data.ul[0], ea, 4, regs); if (!err) - err = read_mem_unaligned(&val[1], ea + 4, 4, regs); + err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs); } if (err) return err; @@ -335,28 +396,42 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), struct pt_regs *regs) { int err; - unsigned long val[sizeof(double) / sizeof(long)]; + union { + double dbl; + unsigned long ul[2]; + struct { +#ifdef __BIG_ENDIAN__ + unsigned _pad_; + unsigned word; +#endif +#ifdef __LITTLE_ENDIAN__ + unsigned word; + unsigned _pad_; +#endif + } single; + } data; unsigned long ptr; if (!address_ok(regs, ea, nb)) return -EFAULT; if ((ea & 3) == 0) return (*func)(rn, ea); - ptr = (unsigned long) &val[0]; + ptr = (unsigned long) &data.ul[0]; if (sizeof(unsigned long) == 8 || nb == 4) { - ptr += sizeof(unsigned long) - nb; + if (nb == 4) + ptr = (unsigned long)&(data.single.word); err = (*func)(rn, ptr); if (err) return err; - err = write_mem_unaligned(val[0], ea, nb, regs); + err = write_mem_unaligned(data.ul[0], ea, nb, regs); } else { /* writing a double on 32-bit */ err = (*func)(rn, ptr); if (err) return err; - err = write_mem_unaligned(val[0], ea, 4, regs); + err = write_mem_unaligned(data.ul[0], ea, 4, regs); if (!err) - err = write_mem_unaligned(val[1], ea + 4, 4, regs); + err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs); } return err; } @@ -580,7 +655,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) - regs->nip = imm; + regs->nip = truncate_if_32bit(regs->msr, imm); return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ @@ -1123,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1133,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1141,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) regs->gpr[ra] = regs->gpr[rd] << sh; else @@ -1160,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1171,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1395,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) @@ -1503,6 +1578,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) */ if ((ra == 1) && !(regs->msr & MSR_PR) \ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32 /* * Check if we will touch kernel sack overflow */ @@ -1511,7 +1587,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = -EINVAL; break; } - +#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 3b1e48049fa..7bd9549a90a 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -77,7 +77,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL(__clear_user) +_GLOBAL_TOC(__clear_user) cmpdi r4,32 neg r6,r3 li r0,0 diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c new file mode 100644 index 00000000000..e905f7c2ea7 --- /dev/null +++ b/arch/powerpc/lib/xor_vmx.c @@ -0,0 +1,177 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <altivec.h> + +#include <linux/preempt.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <asm/switch_to.h> + +typedef vector signed char unative_t; + +#define DEFINE(V) \ + unative_t *V = (unative_t *)V##_in; \ + unative_t V##_0, V##_1, V##_2, V##_3 + +#define LOAD(V) \ + do { \ + V##_0 = V[0]; \ + V##_1 = V[1]; \ + V##_2 = V[2]; \ + V##_3 = V[3]; \ + } while (0) + +#define STORE(V) \ + do { \ + V[0] = V##_0; \ + V[1] = V##_1; \ + V[2] = V##_2; \ + V[3] = V##_3; \ + } while (0) + +#define XOR(V1, V2) \ + do { \ + V1##_0 = vec_xor(V1##_0, V2##_0); \ + V1##_1 = vec_xor(V1##_1, V2##_1); \ + V1##_2 = vec_xor(V1##_2, V2##_2); \ + V1##_3 = vec_xor(V1##_3, V2##_3); \ + } while (0) + +void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, + unsigned long *v2_in) +{ + DEFINE(v1); + DEFINE(v2); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + preempt_disable(); + enable_kernel_altivec(); + + do { + LOAD(v1); + LOAD(v2); + XOR(v1, v2); + STORE(v1); + + v1 += 4; + v2 += 4; + } while (--lines > 0); + + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_2); + +void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, + unsigned long *v2_in, unsigned long *v3_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + preempt_disable(); + enable_kernel_altivec(); + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + XOR(v1, v2); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + } while (--lines > 0); + + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_3); + +void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, + unsigned long *v2_in, unsigned long *v3_in, + unsigned long *v4_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + preempt_disable(); + enable_kernel_altivec(); + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + } while (--lines > 0); + + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_4); + +void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, + unsigned long *v2_in, unsigned long *v3_in, + unsigned long *v4_in, unsigned long *v5_in) +{ + DEFINE(v1); + DEFINE(v2); + DEFINE(v3); + DEFINE(v4); + DEFINE(v5); + unsigned long lines = bytes / (sizeof(unative_t)) / 4; + + preempt_disable(); + enable_kernel_altivec(); + + do { + LOAD(v1); + LOAD(v2); + LOAD(v3); + LOAD(v4); + LOAD(v5); + XOR(v1, v2); + XOR(v3, v4); + XOR(v1, v5); + XOR(v1, v3); + STORE(v1); + + v1 += 4; + v2 += 4; + v3 += 4; + v4 += 4; + v5 += 4; + } while (--lines > 0); + + preempt_enable(); +} +EXPORT_SYMBOL(xor_altivec_5); |
