aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile19
-rw-r--r--arch/powerpc/lib/checksum_64.S58
-rw-r--r--arch/powerpc/lib/code-patching.c15
-rw-r--r--arch/powerpc/lib/copypage_64.S4
-rw-r--r--arch/powerpc/lib/copypage_power7.S12
-rw-r--r--arch/powerpc/lib/copyuser_64.S55
-rw-r--r--arch/powerpc/lib/copyuser_power7.S86
-rw-r--r--arch/powerpc/lib/crtsavres.S186
-rw-r--r--arch/powerpc/lib/hweight_64.S8
-rw-r--r--arch/powerpc/lib/mem_64.S6
-rw-r--r--arch/powerpc/lib/memcpy_64.S28
-rw-r--r--arch/powerpc/lib/memcpy_power7.S81
-rw-r--r--arch/powerpc/lib/sstep.c112
-rw-r--r--arch/powerpc/lib/string_64.S2
-rw-r--r--arch/powerpc/lib/xor_vmx.c177
15 files changed, 697 insertions, 152 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 45043327669..59fa2de9546 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -10,15 +10,21 @@ CFLAGS_REMOVE_code-patching.o = -pg
CFLAGS_REMOVE_feature-fixups.o = -pg
obj-y := string.o alloc.o \
- checksum_$(CONFIG_WORD_SIZE).o crtsavres.o
+ crtsavres.o
obj-$(CONFIG_PPC32) += div64.o copy_32.o
obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
- memcpy_64.o usercopy_64.o mem_64.o string.o \
- checksum_wrappers_64.o hweight_64.o \
- copyuser_power7.o string_64.o copypage_power7.o \
- memcpy_power7.o
+ usercopy_64.o mem_64.o string.o \
+ hweight_64.o \
+ copyuser_power7.o string_64.o copypage_power7.o
+ifeq ($(CONFIG_GENERIC_CSUM),)
+obj-y += checksum_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
+endif
+
+obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
+
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
ifeq ($(CONFIG_PPC64),y)
@@ -31,3 +37,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-y += code-patching.o
obj-y += feature-fixups.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o
+CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 167f72555d6..57a07206505 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -226,19 +226,35 @@ _GLOBAL(csum_partial)
blr
- .macro source
+ .macro srcnr
100:
.section __ex_table,"a"
.align 3
- .llong 100b,.Lsrc_error
+ .llong 100b,.Lsrc_error_nr
.previous
.endm
- .macro dest
+ .macro source
+150:
+ .section __ex_table,"a"
+ .align 3
+ .llong 150b,.Lsrc_error
+ .previous
+ .endm
+
+ .macro dstnr
200:
.section __ex_table,"a"
.align 3
- .llong 200b,.Ldest_error
+ .llong 200b,.Ldest_error_nr
+ .previous
+ .endm
+
+ .macro dest
+250:
+ .section __ex_table,"a"
+ .align 3
+ .llong 250b,.Ldest_error
.previous
.endm
@@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic)
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
beq .Lcopy_aligned
- li r7,4
- sub r6,r7,r6
+ li r9,4
+ sub r6,r9,r6
mtctr r6
1:
-source; lhz r6,0(r3) /* align to doubleword */
+srcnr; lhz r6,0(r3) /* align to doubleword */
subi r5,r5,2
addi r3,r3,2
adde r0,r0,r6
-dest; sth r6,0(r4)
+dstnr; sth r6,0(r4)
addi r4,r4,2
bdnz 1b
@@ -392,10 +408,10 @@ dest; std r16,56(r4)
mtctr r6
3:
-source; ld r6,0(r3)
+srcnr; ld r6,0(r3)
addi r3,r3,8
adde r0,r0,r6
-dest; std r6,0(r4)
+dstnr; std r6,0(r4)
addi r4,r4,8
bdnz 3b
@@ -405,10 +421,10 @@ dest; std r6,0(r4)
srdi. r6,r5,2
beq .Lcopy_tail_halfword
-source; lwz r6,0(r3)
+srcnr; lwz r6,0(r3)
addi r3,r3,4
adde r0,r0,r6
-dest; stw r6,0(r4)
+dstnr; stw r6,0(r4)
addi r4,r4,4
subi r5,r5,4
@@ -416,10 +432,10 @@ dest; stw r6,0(r4)
srdi. r6,r5,1
beq .Lcopy_tail_byte
-source; lhz r6,0(r3)
+srcnr; lhz r6,0(r3)
addi r3,r3,2
adde r0,r0,r6
-dest; sth r6,0(r4)
+dstnr; sth r6,0(r4)
addi r4,r4,2
subi r5,r5,2
@@ -427,10 +443,10 @@ dest; sth r6,0(r4)
andi. r6,r5,1
beq .Lcopy_finish
-source; lbz r6,0(r3)
+srcnr; lbz r6,0(r3)
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
-dest; stb r6,0(r4)
+dstnr; stb r6,0(r4)
.Lcopy_finish:
addze r0,r0 /* add in final carry */
@@ -440,6 +456,11 @@ dest; stb r6,0(r4)
blr
.Lsrc_error:
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Lsrc_error_nr:
cmpdi 0,r7,0
beqlr
li r6,-EFAULT
@@ -447,6 +468,11 @@ dest; stb r6,0(r4)
blr
.Ldest_error:
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldest_error_nr:
cmpdi 0,r8,0
beqlr
li r6,-EFAULT
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 17e5b236431..d5edbeb8eb8 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -159,6 +159,21 @@ unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
return 0;
}
+#ifdef CONFIG_PPC_BOOK3E_64
+void __patch_exception(int exc, unsigned long addr)
+{
+ extern unsigned int interrupt_base_book3e;
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /* Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
+#endif
#ifdef CONFIG_CODE_PATCHING_SELFTEST
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 9f9434a8526..a3c4dc4defd 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -16,11 +16,11 @@ PPC64_CACHES:
.tc ppc64_caches[TC],ppc64_caches
.section ".text"
-_GLOBAL(copy_page)
+_GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
FTR_SECTION_ELSE
- b .copypage_power7
+ b copypage_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index 395c594722a..d7dafb3777a 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -56,15 +56,15 @@ _GLOBAL(copypage_power7)
#ifdef CONFIG_ALTIVEC
mflr r0
- std r3,48(r1)
- std r4,56(r1)
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl .enter_vmx_copy
+ bl enter_vmx_copy
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
- ld r3,STACKFRAMESIZE+48(r1)
- ld r4,STACKFRAMESIZE+56(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
mtlr r0
li r0,(PAGE_SIZE/128)
@@ -103,7 +103,7 @@ _GLOBAL(copypage_power7)
addi r3,r3,128
bdnz 1b
- b .exit_vmx_copy /* tail call optimise */
+ b exit_vmx_copy /* tail call optimise */
#else
li r0,(PAGE_SIZE/128)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index d73a5901490..0860ee46013 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,8 +9,16 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#ifdef __BIG_ENDIAN__
+#define sLd sld /* Shift towards low-numbered address. */
+#define sHd srd /* Shift towards high-numbered address. */
+#else
+#define sLd srd /* Shift towards low-numbered address. */
+#define sHd sld /* Shift towards high-numbered address. */
+#endif
+
.align 7
-_GLOBAL(__copy_tofrom_user)
+_GLOBAL_TOC(__copy_tofrom_user)
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
25: ld r0,8(r4)
- sld r6,r9,r10
+ sLd r6,r9,r10
26: ldu r9,16(r4)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
27: ld r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
29: ldu r9,8(r4)
- sld r8,r0,r10
+ sLd r8,r0,r10
addi r3,r3,-8
blt cr6,5f
30: ld r0,8(r4)
- srd r12,r9,r11
- sld r6,r9,r10
+ sHd r12,r9,r11
+ sLd r6,r9,r10
31: ldu r9,16(r4)
or r12,r8,r12
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
addi r3,r3,16
beq cr6,78f
1: or r7,r7,r6
32: ld r0,8(r4)
76: std r12,8(r3)
-2: srd r12,r9,r11
- sld r6,r9,r10
+2: sHd r12,r9,r11
+ sLd r6,r9,r10
33: ldu r9,16(r4)
or r12,r8,r12
77: stdu r7,16(r3)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
bdnz 1b
78: std r12,8(r3)
or r7,r7,r6
79: std r7,16(r3)
-5: srd r12,r9,r11
+5: sHd r12,r9,r11
or r12,r8,r12
80: std r12,24(r3)
bne 6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
- sld r9,r9,r10
+ sLd r9,r9,r10
ble cr1,7f
34: ld r0,8(r4)
- srd r7,r0,r11
+ sHd r7,r0,r11
or r9,r7,r9
7:
bf cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,32
+#endif
94: stw r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,32
+#endif
addi r3,r3,4
1: bf cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,16
+#endif
95: sth r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,16
+#endif
addi r3,r3,2
2: bf cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
rotldi r9,r9,8
+#endif
96: stb r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,8
+#endif
3: li r3,0
blr
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d1f11795a7a..c46c876ac96 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -19,6 +19,14 @@
*/
#include <asm/ppc_asm.h>
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
.macro err1
100:
.section __ex_table,"a"
@@ -58,7 +66,7 @@
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Ldo_err3:
- bl .exit_vmx_usercopy
+ bl exit_vmx_usercopy
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
@@ -77,9 +85,9 @@
.Lexit:
addi r1,r1,STACKFRAMESIZE
.Ldo_err1:
- ld r3,48(r1)
- ld r4,56(r1)
- ld r5,64(r1)
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
b __copy_tofrom_user_base
@@ -88,18 +96,18 @@ _GLOBAL(__copy_tofrom_user_power7)
cmpldi r5,16
cmpldi cr1,r5,4096
- std r3,48(r1)
- std r4,56(r1)
- std r5,64(r1)
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
bgt cr1,.Lvmx_copy
#else
cmpldi r5,16
- std r3,48(r1)
- std r4,56(r1)
- std r5,64(r1)
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
#endif
@@ -287,12 +295,12 @@ err1; stb r0,0(r3)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl .enter_vmx_usercopy
+ bl enter_vmx_usercopy
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
- ld r3,STACKFRAMESIZE+48(r1)
- ld r4,STACKFRAMESIZE+56(r1)
- ld r5,STACKFRAMESIZE+64(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
mtlr r0
/*
@@ -506,7 +514,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_usercopy /* tail call optimise */
+ b exit_vmx_usercopy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -552,13 +560,13 @@ err3; stw r7,4(r3)
li r10,32
li r11,48
- lvsl vr16,0,r4 /* Setup permute control vector */
+ LVS(vr16,0,r4) /* Setup permute control vector */
err3; lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
err3; lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
@@ -566,9 +574,9 @@ err3; stvx vr8,r0,r3
5: bf cr7*4+2,6f
err3; lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
- vperm vr9,vr1,vr0,vr16
+ VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
@@ -576,13 +584,13 @@ err3; stvx vr9,r3,r9
6: bf cr7*4+1,7f
err3; lvx vr3,r0,r4
- vperm vr8,vr0,vr3,vr16
+ VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
- vperm vr9,vr3,vr2,vr16
+ VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
- vperm vr10,vr2,vr1,vr16
+ VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
- vperm vr11,vr1,vr0,vr16
+ VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
@@ -611,21 +619,21 @@ err3; stvx vr11,r3,r11
.align 5
8:
err4; lvx vr7,r0,r4
- vperm vr8,vr0,vr7,vr16
+ VPERM(vr8,vr0,vr7,vr16)
err4; lvx vr6,r4,r9
- vperm vr9,vr7,vr6,vr16
+ VPERM(vr9,vr7,vr6,vr16)
err4; lvx vr5,r4,r10
- vperm vr10,vr6,vr5,vr16
+ VPERM(vr10,vr6,vr5,vr16)
err4; lvx vr4,r4,r11
- vperm vr11,vr5,vr4,vr16
+ VPERM(vr11,vr5,vr4,vr16)
err4; lvx vr3,r4,r12
- vperm vr12,vr4,vr3,vr16
+ VPERM(vr12,vr4,vr3,vr16)
err4; lvx vr2,r4,r14
- vperm vr13,vr3,vr2,vr16
+ VPERM(vr13,vr3,vr2,vr16)
err4; lvx vr1,r4,r15
- vperm vr14,vr2,vr1,vr16
+ VPERM(vr14,vr2,vr1,vr16)
err4; lvx vr0,r4,r16
- vperm vr15,vr1,vr0,vr16
+ VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
err4; stvx vr8,r0,r3
err4; stvx vr9,r3,r9
@@ -649,13 +657,13 @@ err4; stvx vr15,r3,r16
bf cr7*4+1,9f
err3; lvx vr3,r0,r4
- vperm vr8,vr0,vr3,vr16
+ VPERM(vr8,vr0,vr3,vr16)
err3; lvx vr2,r4,r9
- vperm vr9,vr3,vr2,vr16
+ VPERM(vr9,vr3,vr2,vr16)
err3; lvx vr1,r4,r10
- vperm vr10,vr2,vr1,vr16
+ VPERM(vr10,vr2,vr1,vr16)
err3; lvx vr0,r4,r11
- vperm vr11,vr1,vr0,vr16
+ VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
@@ -665,9 +673,9 @@ err3; stvx vr11,r3,r11
9: bf cr7*4+2,10f
err3; lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
err3; lvx vr0,r4,r9
- vperm vr9,vr1,vr0,vr16
+ VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
err3; stvx vr8,r0,r3
err3; stvx vr9,r3,r9
@@ -675,7 +683,7 @@ err3; stvx vr9,r3,r9
10: bf cr7*4+3,11f
err3; lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
err3; stvx vr8,r0,r3
addi r3,r3,16
@@ -709,5 +717,5 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_usercopy /* tail call optimise */
+ b exit_vmx_usercopy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index b2c68ce139a..a5b30c71a8d 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -231,6 +231,87 @@ _GLOBAL(_rest32gpr_31_x)
mr 1,11
blr
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+_GLOBAL(_savevr_20)
+ li r11,-192
+ stvx vr20,r11,r0
+_GLOBAL(_savevr_21)
+ li r11,-176
+ stvx vr21,r11,r0
+_GLOBAL(_savevr_22)
+ li r11,-160
+ stvx vr22,r11,r0
+_GLOBAL(_savevr_23)
+ li r11,-144
+ stvx vr23,r11,r0
+_GLOBAL(_savevr_24)
+ li r11,-128
+ stvx vr24,r11,r0
+_GLOBAL(_savevr_25)
+ li r11,-112
+ stvx vr25,r11,r0
+_GLOBAL(_savevr_26)
+ li r11,-96
+ stvx vr26,r11,r0
+_GLOBAL(_savevr_27)
+ li r11,-80
+ stvx vr27,r11,r0
+_GLOBAL(_savevr_28)
+ li r11,-64
+ stvx vr28,r11,r0
+_GLOBAL(_savevr_29)
+ li r11,-48
+ stvx vr29,r11,r0
+_GLOBAL(_savevr_30)
+ li r11,-32
+ stvx vr30,r11,r0
+_GLOBAL(_savevr_31)
+ li r11,-16
+ stvx vr31,r11,r0
+ blr
+
+_GLOBAL(_restvr_20)
+ li r11,-192
+ lvx vr20,r11,r0
+_GLOBAL(_restvr_21)
+ li r11,-176
+ lvx vr21,r11,r0
+_GLOBAL(_restvr_22)
+ li r11,-160
+ lvx vr22,r11,r0
+_GLOBAL(_restvr_23)
+ li r11,-144
+ lvx vr23,r11,r0
+_GLOBAL(_restvr_24)
+ li r11,-128
+ lvx vr24,r11,r0
+_GLOBAL(_restvr_25)
+ li r11,-112
+ lvx vr25,r11,r0
+_GLOBAL(_restvr_26)
+ li r11,-96
+ lvx vr26,r11,r0
+_GLOBAL(_restvr_27)
+ li r11,-80
+ lvx vr27,r11,r0
+_GLOBAL(_restvr_28)
+ li r11,-64
+ lvx vr28,r11,r0
+_GLOBAL(_restvr_29)
+ li r11,-48
+ lvx vr29,r11,r0
+_GLOBAL(_restvr_30)
+ li r11,-32
+ lvx vr30,r11,r0
+_GLOBAL(_restvr_31)
+ li r11,-16
+ lvx vr31,r11,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
#else /* CONFIG_PPC64 */
.section ".text.save.restore","ax",@progbits
@@ -356,6 +437,111 @@ _restgpr0_31:
mtlr r0
blr
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+.globl _savevr_20
+_savevr_20:
+ li r12,-192
+ stvx vr20,r12,r0
+.globl _savevr_21
+_savevr_21:
+ li r12,-176
+ stvx vr21,r12,r0
+.globl _savevr_22
+_savevr_22:
+ li r12,-160
+ stvx vr22,r12,r0
+.globl _savevr_23
+_savevr_23:
+ li r12,-144
+ stvx vr23,r12,r0
+.globl _savevr_24
+_savevr_24:
+ li r12,-128
+ stvx vr24,r12,r0
+.globl _savevr_25
+_savevr_25:
+ li r12,-112
+ stvx vr25,r12,r0
+.globl _savevr_26
+_savevr_26:
+ li r12,-96
+ stvx vr26,r12,r0
+.globl _savevr_27
+_savevr_27:
+ li r12,-80
+ stvx vr27,r12,r0
+.globl _savevr_28
+_savevr_28:
+ li r12,-64
+ stvx vr28,r12,r0
+.globl _savevr_29
+_savevr_29:
+ li r12,-48
+ stvx vr29,r12,r0
+.globl _savevr_30
+_savevr_30:
+ li r12,-32
+ stvx vr30,r12,r0
+.globl _savevr_31
+_savevr_31:
+ li r12,-16
+ stvx vr31,r12,r0
+ blr
+
+.globl _restvr_20
+_restvr_20:
+ li r12,-192
+ lvx vr20,r12,r0
+.globl _restvr_21
+_restvr_21:
+ li r12,-176
+ lvx vr21,r12,r0
+.globl _restvr_22
+_restvr_22:
+ li r12,-160
+ lvx vr22,r12,r0
+.globl _restvr_23
+_restvr_23:
+ li r12,-144
+ lvx vr23,r12,r0
+.globl _restvr_24
+_restvr_24:
+ li r12,-128
+ lvx vr24,r12,r0
+.globl _restvr_25
+_restvr_25:
+ li r12,-112
+ lvx vr25,r12,r0
+.globl _restvr_26
+_restvr_26:
+ li r12,-96
+ lvx vr26,r12,r0
+.globl _restvr_27
+_restvr_27:
+ li r12,-80
+ lvx vr27,r12,r0
+.globl _restvr_28
+_restvr_28:
+ li r12,-64
+ lvx vr28,r12,r0
+.globl _restvr_29
+_restvr_29:
+ li r12,-48
+ lvx vr29,r12,r0
+.globl _restvr_30
+_restvr_30:
+ li r12,-32
+ lvx vr30,r12,r0
+.globl _restvr_31
+_restvr_31:
+ li r12,-16
+ lvx vr31,r12,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
#endif /* CONFIG_PPC64 */
#endif
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 9b96ff2ecd4..19e66001a4f 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S
@@ -24,7 +24,7 @@
_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
- b .__sw_hweight8
+ b __sw_hweight8
nop
nop
FTR_SECTION_ELSE
@@ -35,7 +35,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
- b .__sw_hweight16
+ b __sw_hweight16
nop
nop
nop
@@ -57,7 +57,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
- b .__sw_hweight32
+ b __sw_hweight32
nop
nop
nop
@@ -82,7 +82,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
- b .__sw_hweight64
+ b __sw_hweight64
nop
nop
nop
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index f4fcb0bc656..43435c6892f 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -77,10 +77,10 @@ _GLOBAL(memset)
stb r4,0(r6)
blr
-_GLOBAL(memmove)
+_GLOBAL_TOC(memmove)
cmplw 0,r3,r4
- bgt .backwards_memcpy
- b .memcpy
+ bgt backwards_memcpy
+ b memcpy
_GLOBAL(backwards_memcpy)
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index d2bbbc8d7dc..32a06ec395d 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -10,12 +10,29 @@
#include <asm/ppc_asm.h>
.align 7
-_GLOBAL(memcpy)
+_GLOBAL_TOC(memcpy)
BEGIN_FTR_SECTION
- std r3,48(r1) /* save destination pointer for return value */
+#ifdef __LITTLE_ENDIAN__
+ cmpdi cr7,r5,0
+#else
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
+#endif
FTR_SECTION_ELSE
+#ifndef SELFTEST
b memcpy_power7
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+ /* dumb little-endian memcpy that will get replaced at runtime */
+ addi r9,r3,-1
+ addi r4,r4,-1
+ beqlr cr7
+ mtctr r5
+1: lbzu r10,1(r4)
+ stbu r10,1(r9)
+ bdnz 1b
+ blr
+#else
PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
@@ -71,7 +88,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
2: bf cr7*4+3,3f
lbz r9,8(r4)
stb r9,0(r3)
-3: ld r3,48(r1) /* return dest pointer */
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
.Lsrc_unaligned:
@@ -154,7 +171,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
2: bf cr7*4+3,3f
rotldi r9,r9,8
stb r9,0(r3)
-3: ld r3,48(r1) /* return dest pointer */
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
.Ldst_unaligned:
@@ -199,5 +216,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
3: bf cr7*4+3,4f
lbz r0,0(r4)
stb r0,0(r3)
-4: ld r3,48(r1) /* return dest pointer */
+4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
+#endif
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 0663630baf3..2ff5c142f87 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -20,18 +20,27 @@
#include <asm/ppc_asm.h>
_GLOBAL(memcpy_power7)
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
cmpldi cr1,r5,4096
- std r3,48(r1)
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blt .Lshort_copy
bgt cr1,.Lvmx_copy
#else
cmpldi r5,16
- std r3,48(r1)
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blt .Lshort_copy
#endif
@@ -207,7 +216,7 @@ _GLOBAL(memcpy_power7)
lbz r0,0(r4)
stb r0,0(r3)
-15: ld r3,48(r1)
+15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
blr
.Lunwind_stack_nonvmx_copy:
@@ -217,16 +226,16 @@ _GLOBAL(memcpy_power7)
#ifdef CONFIG_ALTIVEC
.Lvmx_copy:
mflr r0
- std r4,56(r1)
- std r5,64(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl .enter_vmx_copy
+ bl enter_vmx_copy
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
- ld r3,STACKFRAMESIZE+48(r1)
- ld r4,STACKFRAMESIZE+56(r1)
- ld r5,STACKFRAMESIZE+64(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
mtlr r0
/*
@@ -438,8 +447,8 @@ _GLOBAL(memcpy_power7)
stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- ld r3,48(r1)
- b .exit_vmx_copy /* tail call optimise */
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b exit_vmx_copy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)
li r10,32
li r11,48
- lvsl vr16,0,r4 /* Setup permute control vector */
+ LVS(vr16,0,r4) /* Setup permute control vector */
lvx vr0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
@@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)
5: bf cr7*4+2,6f
lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
- vperm vr9,vr1,vr0,vr16
+ VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
@@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)
6: bf cr7*4+1,7f
lvx vr3,r0,r4
- vperm vr8,vr0,vr3,vr16
+ VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
- vperm vr9,vr3,vr2,vr16
+ VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
- vperm vr10,vr2,vr1,vr16
+ VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
- vperm vr11,vr1,vr0,vr16
+ VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
@@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)
.align 5
8:
lvx vr7,r0,r4
- vperm vr8,vr0,vr7,vr16
+ VPERM(vr8,vr0,vr7,vr16)
lvx vr6,r4,r9
- vperm vr9,vr7,vr6,vr16
+ VPERM(vr9,vr7,vr6,vr16)
lvx vr5,r4,r10
- vperm vr10,vr6,vr5,vr16
+ VPERM(vr10,vr6,vr5,vr16)
lvx vr4,r4,r11
- vperm vr11,vr5,vr4,vr16
+ VPERM(vr11,vr5,vr4,vr16)
lvx vr3,r4,r12
- vperm vr12,vr4,vr3,vr16
+ VPERM(vr12,vr4,vr3,vr16)
lvx vr2,r4,r14
- vperm vr13,vr3,vr2,vr16
+ VPERM(vr13,vr3,vr2,vr16)
lvx vr1,r4,r15
- vperm vr14,vr2,vr1,vr16
+ VPERM(vr14,vr2,vr1,vr16)
lvx vr0,r4,r16
- vperm vr15,vr1,vr0,vr16
+ VPERM(vr15,vr1,vr0,vr16)
addi r4,r4,128
stvx vr8,r0,r3
stvx vr9,r3,r9
@@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)
bf cr7*4+1,9f
lvx vr3,r0,r4
- vperm vr8,vr0,vr3,vr16
+ VPERM(vr8,vr0,vr3,vr16)
lvx vr2,r4,r9
- vperm vr9,vr3,vr2,vr16
+ VPERM(vr9,vr3,vr2,vr16)
lvx vr1,r4,r10
- vperm vr10,vr2,vr1,vr16
+ VPERM(vr10,vr2,vr1,vr16)
lvx vr0,r4,r11
- vperm vr11,vr1,vr0,vr16
+ VPERM(vr11,vr1,vr0,vr16)
addi r4,r4,64
stvx vr8,r0,r3
stvx vr9,r3,r9
@@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)
9: bf cr7*4+2,10f
lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
lvx vr0,r4,r9
- vperm vr9,vr1,vr0,vr16
+ VPERM(vr9,vr1,vr0,vr16)
addi r4,r4,32
stvx vr8,r0,r3
stvx vr9,r3,r9
@@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)
10: bf cr7*4+3,11f
lvx vr1,r0,r4
- vperm vr8,vr0,vr1,vr16
+ VPERM(vr8,vr0,vr1,vr16)
addi r4,r4,16
stvx vr8,r0,r3
addi r3,r3,16
@@ -642,6 +651,6 @@ _GLOBAL(memcpy_power7)
stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- ld r3,48(r1)
- b .exit_vmx_copy /* tail call optimise */
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b exit_vmx_copy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a7ee978fb86..5c09f365c84 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -212,11 +212,19 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
{
int err;
unsigned long x, b, c;
+#ifdef __LITTLE_ENDIAN__
+ int len = nb; /* save a copy of the length for byte reversal */
+#endif
/* unaligned, do this in pieces */
x = 0;
for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+ c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
c = max_align(ea);
+#endif
if (c > nb)
c = max_align(nb);
err = read_mem_aligned(&b, ea, c);
@@ -225,7 +233,24 @@ static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
x = (x << (8 * c)) + b;
ea += c;
}
+#ifdef __LITTLE_ENDIAN__
+ switch (len) {
+ case 2:
+ *dest = byterev_2(x);
+ break;
+ case 4:
+ *dest = byterev_4(x);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *dest = byterev_8(x);
+ break;
+#endif
+ }
+#endif
+#ifdef __BIG_ENDIAN__
*dest = x;
+#endif
return 0;
}
@@ -273,9 +298,29 @@ static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,
int err;
unsigned long c;
+#ifdef __LITTLE_ENDIAN__
+ switch (nb) {
+ case 2:
+ val = byterev_2(val);
+ break;
+ case 4:
+ val = byterev_4(val);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ val = byterev_8(val);
+ break;
+#endif
+ }
+#endif
/* unaligned or little-endian, do this in pieces */
for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+ c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
c = max_align(ea);
+#endif
if (c > nb)
c = max_align(nb);
err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
@@ -310,22 +355,36 @@ static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
struct pt_regs *regs)
{
int err;
- unsigned long val[sizeof(double) / sizeof(long)];
+ union {
+ double dbl;
+ unsigned long ul[2];
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned _pad_;
+ unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+ unsigned word;
+ unsigned _pad_;
+#endif
+ } single;
+ } data;
unsigned long ptr;
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & 3) == 0)
return (*func)(rn, ea);
- ptr = (unsigned long) &val[0];
+ ptr = (unsigned long) &data.ul;
if (sizeof(unsigned long) == 8 || nb == 4) {
- err = read_mem_unaligned(&val[0], ea, nb, regs);
- ptr += sizeof(unsigned long) - nb;
+ err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
+ if (nb == 4)
+ ptr = (unsigned long)&(data.single.word);
} else {
/* reading a double on 32-bit */
- err = read_mem_unaligned(&val[0], ea, 4, regs);
+ err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
if (!err)
- err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
+ err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
}
if (err)
return err;
@@ -337,28 +396,42 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
struct pt_regs *regs)
{
int err;
- unsigned long val[sizeof(double) / sizeof(long)];
+ union {
+ double dbl;
+ unsigned long ul[2];
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned _pad_;
+ unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+ unsigned word;
+ unsigned _pad_;
+#endif
+ } single;
+ } data;
unsigned long ptr;
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & 3) == 0)
return (*func)(rn, ea);
- ptr = (unsigned long) &val[0];
+ ptr = (unsigned long) &data.ul[0];
if (sizeof(unsigned long) == 8 || nb == 4) {
- ptr += sizeof(unsigned long) - nb;
+ if (nb == 4)
+ ptr = (unsigned long)&(data.single.word);
err = (*func)(rn, ptr);
if (err)
return err;
- err = write_mem_unaligned(val[0], ea, nb, regs);
+ err = write_mem_unaligned(data.ul[0], ea, nb, regs);
} else {
/* writing a double on 32-bit */
err = (*func)(rn, ptr);
if (err)
return err;
- err = write_mem_unaligned(val[0], ea, 4, regs);
+ err = write_mem_unaligned(data.ul[0], ea, 4, regs);
if (!err)
- err = write_mem_unaligned(val[1], ea + 4, 4, regs);
+ err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
}
return err;
}
@@ -1125,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
sh = regs->gpr[rb] & 0x3f;
ival = (signed int) regs->gpr[rd];
regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
- if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0))
+ if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
regs->xer |= XER_CA;
else
regs->xer &= ~XER_CA;
@@ -1135,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
sh = rb;
ival = (signed int) regs->gpr[rd];
regs->gpr[ra] = ival >> sh;
- if (ival < 0 && (ival & ((1 << sh) - 1)) != 0)
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
regs->xer |= XER_CA;
else
regs->xer &= ~XER_CA;
@@ -1143,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
#ifdef __powerpc64__
case 27: /* sld */
- sh = regs->gpr[rd] & 0x7f;
+ sh = regs->gpr[rb] & 0x7f;
if (sh < 64)
regs->gpr[ra] = regs->gpr[rd] << sh;
else
@@ -1162,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
sh = regs->gpr[rb] & 0x7f;
ival = (signed long int) regs->gpr[rd];
regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
- if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0))
+ if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
regs->xer |= XER_CA;
else
regs->xer &= ~XER_CA;
@@ -1173,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
sh = rb | ((instr & 2) << 4);
ival = (signed long int) regs->gpr[rd];
regs->gpr[ra] = ival >> sh;
- if (ival < 0 && (ival & ((1 << sh) - 1)) != 0)
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
regs->xer |= XER_CA;
else
regs->xer &= ~XER_CA;
@@ -1397,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
regs->gpr[rd] = byterev_4(val);
goto ldst_done;
-#ifdef CONFIG_PPC_CPU
+#ifdef CONFIG_PPC_FPU
case 535: /* lfsx */
case 567: /* lfsux */
if (!(regs->msr & MSR_FP))
@@ -1505,6 +1578,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
*/
if ((ra == 1) && !(regs->msr & MSR_PR) \
&& (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+#ifdef CONFIG_PPC32
/*
* Check if we will touch kernel sack overflow
*/
@@ -1513,7 +1587,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
err = -EINVAL;
break;
}
-
+#endif /* CONFIG_PPC32 */
/*
* Check if we already set since that means we'll
* lose the previous value.
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 3b1e48049fa..7bd9549a90a 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -77,7 +77,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
-_GLOBAL(__clear_user)
+_GLOBAL_TOC(__clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
new file mode 100644
index 00000000000..e905f7c2ea7
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -0,0 +1,177 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <altivec.h>
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V) \
+ unative_t *V = (unative_t *)V##_in; \
+ unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V) \
+ do { \
+ V##_0 = V[0]; \
+ V##_1 = V[1]; \
+ V##_2 = V[2]; \
+ V##_3 = V[3]; \
+ } while (0)
+
+#define STORE(V) \
+ do { \
+ V[0] = V##_0; \
+ V[1] = V##_1; \
+ V[2] = V##_2; \
+ V[3] = V##_3; \
+ } while (0)
+
+#define XOR(V1, V2) \
+ do { \
+ V1##_0 = vec_xor(V1##_0, V2##_0); \
+ V1##_1 = vec_xor(V1##_1, V2##_1); \
+ V1##_2 = vec_xor(V1##_2, V2##_2); \
+ V1##_3 = vec_xor(V1##_3, V2##_3); \
+ } while (0)
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ XOR(v1, v2);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ XOR(v1, v2);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ DEFINE(v5);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ LOAD(v5);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v5);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ v5 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);