aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile37
-rw-r--r--arch/powerpc/lib/alloc.c10
-rw-r--r--arch/powerpc/lib/checksum_64.S505
-rw-r--r--arch/powerpc/lib/checksum_wrappers_64.c102
-rw-r--r--arch/powerpc/lib/code-patching.c470
-rw-r--r--arch/powerpc/lib/copy_32.S26
-rw-r--r--arch/powerpc/lib/copypage_64.S205
-rw-r--r--arch/powerpc/lib/copypage_power7.S168
-rw-r--r--arch/powerpc/lib/copyuser_64.S196
-rw-r--r--arch/powerpc/lib/copyuser_power7.S721
-rw-r--r--arch/powerpc/lib/crtsavres.S547
-rw-r--r--arch/powerpc/lib/devres.c43
-rw-r--r--arch/powerpc/lib/dma-noncoherent.c420
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S761
-rw-r--r--arch/powerpc/lib/feature-fixups.c376
-rw-r--r--arch/powerpc/lib/hweight_64.S110
-rw-r--r--arch/powerpc/lib/ldstfp.S379
-rw-r--r--arch/powerpc/lib/locks.c38
-rw-r--r--arch/powerpc/lib/mem_64.S12
-rw-r--r--arch/powerpc/lib/memcpy_64.S78
-rw-r--r--arch/powerpc/lib/memcpy_power7.S656
-rw-r--r--arch/powerpc/lib/rheap.c7
-rw-r--r--arch/powerpc/lib/sstep.c1652
-rw-r--r--arch/powerpc/lib/string.S89
-rw-r--r--arch/powerpc/lib/string_64.S202
-rw-r--r--arch/powerpc/lib/vmx-helper.c74
-rw-r--r--arch/powerpc/lib/xor_vmx.c177
27 files changed, 7147 insertions, 914 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4bb023f4c86..59fa2de9546 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -2,24 +2,41 @@
# Makefile for ppc-specific library files..
#
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS += -mno-minimal-toc
-endif
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+CFLAGS_REMOVE_code-patching.o = -pg
+CFLAGS_REMOVE_feature-fixups.o = -pg
-ifeq ($(CONFIG_PPC_MERGE),y)
obj-y := string.o alloc.o \
- checksum_$(CONFIG_WORD_SIZE).o
+ crtsavres.o
obj-$(CONFIG_PPC32) += div64.o copy_32.o
-endif
+obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
- memcpy_64.o usercopy_64.o mem_64.o string.o
-obj-$(CONFIG_XMON) += sstep.o
-obj-$(CONFIG_KPROBES) += sstep.o
-obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
+ usercopy_64.o mem_64.o string.o \
+ hweight_64.o \
+ copyuser_power7.o string_64.o copypage_power7.o
+ifeq ($(CONFIG_GENERIC_CSUM),)
+obj-y += checksum_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
+endif
+
+obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
+
+obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_SMP) += locks.o
+obj-$(CONFIG_ALTIVEC) += vmx-helper.o
endif
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
+
+obj-y += code-patching.o
+obj-y += feature-fixups.o
+obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o
+CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index f53e09c7dac..da22c84a8fe 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -3,16 +3,8 @@
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/string.h>
+#include <asm/setup.h>
-#include <asm/system.h>
-
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- if (mem_init_done)
- return kmalloc(size, mask);
- else
- return alloc_bootmem(size);
-}
void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
{
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index ef96c6c58ef..57a07206505 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -69,161 +69,412 @@ _GLOBAL(csum_tcpudp_magic)
* Computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit).
*
- * This code assumes at least halfword alignment, though the length
- * can be any number of bytes. The sum is accumulated in r5.
- *
* csum_partial(r3=buff, r4=len, r5=sum)
*/
_GLOBAL(csum_partial)
- subi r3,r3,8 /* we'll offset by 8 for the loads */
- srdi. r6,r4,3 /* divide by 8 for doubleword count */
- addic r5,r5,0 /* clear carry */
- beq 3f /* if we're doing < 8 bytes */
- andi. r0,r3,2 /* aligned on a word boundary already? */
- beq+ 1f
- lhz r6,8(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
- subi r4,r4,2
- addc r5,r5,r6
- srdi. r6,r4,3 /* recompute number of doublewords */
- beq 3f /* any left? */
-1: mtctr r6
-2: ldu r6,8(r3) /* main sum loop */
- adde r5,r5,r6
- bdnz 2b
- andi. r4,r4,7 /* compute bytes left to sum after doublewords */
-3: cmpwi 0,r4,4 /* is at least a full word left? */
- blt 4f
- lwz r6,8(r3) /* sum this word */
+ addic r0,r5,0 /* clear carry */
+
+ srdi. r6,r4,3 /* less than 8 bytes? */
+ beq .Lcsum_tail_word
+
+ /*
+ * If only halfword aligned, align to a double word. Since odd
+ * aligned addresses should be rare and they would require more
+ * work to calculate the correct checksum, we ignore that case
+ * and take the potential slowdown of unaligned loads.
+ */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ beq .Lcsum_aligned
+
+ li r7,4
+ sub r6,r7,r6
+ mtctr r6
+
+1:
+ lhz r6,0(r3) /* align to doubleword */
+ subi r4,r4,2
+ addi r3,r3,2
+ adde r0,r0,r6
+ bdnz 1b
+
+.Lcsum_aligned:
+ /*
+ * We unroll the loop such that each iteration is 64 bytes with an
+ * entry and exit limb of 64 bytes, meaning a minimum size of
+ * 128 bytes.
+ */
+ srdi. r6,r4,7
+ beq .Lcsum_tail_doublewords /* len < 128 */
+
+ srdi r6,r4,6
+ subi r6,r6,1
+ mtctr r6
+
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ ld r6,0(r3)
+ ld r9,8(r3)
+
+ ld r10,16(r3)
+ ld r11,24(r3)
+
+ /*
+ * On POWER6 and POWER7 back to back addes take 2 cycles because of
+ * the XER dependency. This means the fastest this loop can go is
+ * 16 cycles per iteration. The scheduling of the loop below has
+ * been shown to hit this on both POWER6 and POWER7.
+ */
+ .align 5
+2:
+ adde r0,r0,r6
+ ld r12,32(r3)
+ ld r14,40(r3)
+
+ adde r0,r0,r9
+ ld r15,48(r3)
+ ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+
+ adde r0,r0,r11
+
+ adde r0,r0,r12
+
+ adde r0,r0,r14
+
+ adde r0,r0,r15
+ ld r6,0(r3)
+ ld r9,8(r3)
+
+ adde r0,r0,r16
+ ld r10,16(r3)
+ ld r11,24(r3)
+ bdnz 2b
+
+
+ adde r0,r0,r6
+ ld r12,32(r3)
+ ld r14,40(r3)
+
+ adde r0,r0,r9
+ ld r15,48(r3)
+ ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+ adde r0,r0,r11
+ adde r0,r0,r12
+ adde r0,r0,r14
+ adde r0,r0,r15
+ adde r0,r0,r16
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ andi. r4,r4,63
+
+.Lcsum_tail_doublewords: /* Up to 127 bytes to go */
+ srdi. r6,r4,3
+ beq .Lcsum_tail_word
+
+ mtctr r6
+3:
+ ld r6,0(r3)
+ addi r3,r3,8
+ adde r0,r0,r6
+ bdnz 3b
+
+ andi. r4,r4,7
+
+.Lcsum_tail_word: /* Up to 7 bytes to go */
+ srdi. r6,r4,2
+ beq .Lcsum_tail_halfword
+
+ lwz r6,0(r3)
addi r3,r3,4
+ adde r0,r0,r6
subi r4,r4,4
- adde r5,r5,r6
-4: cmpwi 0,r4,2 /* is at least a halfword left? */
- blt+ 5f
- lhz r6,8(r3) /* sum this halfword */
- addi r3,r3,2
- subi r4,r4,2
- adde r5,r5,r6
-5: cmpwi 0,r4,1 /* is at least a byte left? */
- bne+ 6f
- lbz r6,8(r3) /* sum this byte */
- slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */
- adde r5,r5,r6
-6: addze r5,r5 /* add in final carry */
- rldicl r4,r5,32,0 /* fold two 32-bit halves together */
- add r3,r4,r5
- srdi r3,r3,32
- blr
+
+.Lcsum_tail_halfword: /* Up to 3 bytes to go */
+ srdi. r6,r4,1
+ beq .Lcsum_tail_byte
+
+ lhz r6,0(r3)
+ addi r3,r3,2
+ adde r0,r0,r6
+ subi r4,r4,2
+
+.Lcsum_tail_byte: /* Up to 1 byte to go */
+ andi. r6,r4,1
+ beq .Lcsum_finish
+
+ lbz r6,0(r3)
+ sldi r9,r6,8 /* Pad the byte out to 16 bits */
+ adde r0,r0,r9
+
+.Lcsum_finish:
+ addze r0,r0 /* add in final carry */
+ rldicl r4,r0,32,0 /* fold two 32 bit halves together */
+ add r3,r4,r0
+ srdi r3,r3,32
+ blr
+
+
+ .macro srcnr
+100:
+ .section __ex_table,"a"
+ .align 3
+ .llong 100b,.Lsrc_error_nr
+ .previous
+ .endm
+
+ .macro source
+150:
+ .section __ex_table,"a"
+ .align 3
+ .llong 150b,.Lsrc_error
+ .previous
+ .endm
+
+ .macro dstnr
+200:
+ .section __ex_table,"a"
+ .align 3
+ .llong 200b,.Ldest_error_nr
+ .previous
+ .endm
+
+ .macro dest
+250:
+ .section __ex_table,"a"
+ .align 3
+ .llong 250b,.Ldest_error
+ .previous
+ .endm
/*
* Computes the checksum of a memory block at src, length len,
* and adds in "sum" (32-bit), while copying the block to dst.
* If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * This code needs to be reworked to take advantage of 64 bit sum+copy.
- * However, due to tokenring halfword alignment problems this will be very
- * tricky. For now we'll leave it until we instrument it somehow.
+ * to *src_err or *dst_err respectively. The caller must take any action
+ * required in this case (zeroing memory, recalculating partial checksum etc).
*
* csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
*/
_GLOBAL(csum_partial_copy_generic)
- addic r0,r6,0
- subi r3,r3,4
- subi r4,r4,4
- srwi. r6,r5,2
- beq 3f /* if we're doing < 4 bytes */
- andi. r9,r4,2 /* Align dst to longword boundary */
- beq+ 1f
-81: lhz r6,4(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
+ addic r0,r6,0 /* clear carry */
+
+ srdi. r6,r5,3 /* less than 8 bytes? */
+ beq .Lcopy_tail_word
+
+ /*
+ * If only halfword aligned, align to a double word. Since odd
+ * aligned addresses should be rare and they would require more
+ * work to calculate the correct checksum, we ignore that case
+ * and take the potential slowdown of unaligned loads.
+ *
+ * If the source and destination are relatively unaligned we only
+ * align the source. This keeps things simple.
+ */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ beq .Lcopy_aligned
+
+ li r9,4
+ sub r6,r9,r6
+ mtctr r6
+
+1:
+srcnr; lhz r6,0(r3) /* align to doubleword */
subi r5,r5,2
-91: sth r6,4(r4)
- addi r4,r4,2
- addc r0,r0,r6
- srwi. r6,r5,2 /* # words to do */
- beq 3f
-1: mtctr r6
-82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */
-92: stwu r6,4(r4) /* be unnecessary to unroll this loop */
- adde r0,r0,r6
- bdnz 82b
- andi. r5,r5,3
-3: cmpwi 0,r5,2
- blt+ 4f
-83: lhz r6,4(r3)
addi r3,r3,2
- subi r5,r5,2
-93: sth r6,4(r4)
+ adde r0,r0,r6
+dstnr; sth r6,0(r4)
addi r4,r4,2
+ bdnz 1b
+
+.Lcopy_aligned:
+ /*
+ * We unroll the loop such that each iteration is 64 bytes with an
+ * entry and exit limb of 64 bytes, meaning a minimum size of
+ * 128 bytes.
+ */
+ srdi. r6,r5,7
+ beq .Lcopy_tail_doublewords /* len < 128 */
+
+ srdi r6,r5,6
+ subi r6,r6,1
+ mtctr r6
+
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+source; ld r6,0(r3)
+source; ld r9,8(r3)
+
+source; ld r10,16(r3)
+source; ld r11,24(r3)
+
+ /*
+ * On POWER6 and POWER7 back to back addes take 2 cycles because of
+ * the XER dependency. This means the fastest this loop can go is
+ * 16 cycles per iteration. The scheduling of the loop below has
+ * been shown to hit this on both POWER6 and POWER7.
+ */
+ .align 5
+2:
adde r0,r0,r6
-4: cmpwi 0,r5,1
- bne+ 5f
-84: lbz r6,4(r3)
-94: stb r6,4(r4)
- slwi r6,r6,8 /* Upper byte of word */
+source; ld r12,32(r3)
+source; ld r14,40(r3)
+
+ adde r0,r0,r9
+source; ld r15,48(r3)
+source; ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+dest; std r6,0(r4)
+dest; std r9,8(r4)
+
+ adde r0,r0,r11
+dest; std r10,16(r4)
+dest; std r11,24(r4)
+
+ adde r0,r0,r12
+dest; std r12,32(r4)
+dest; std r14,40(r4)
+
+ adde r0,r0,r14
+dest; std r15,48(r4)
+dest; std r16,56(r4)
+ addi r4,r4,64
+
+ adde r0,r0,r15
+source; ld r6,0(r3)
+source; ld r9,8(r3)
+
+ adde r0,r0,r16
+source; ld r10,16(r3)
+source; ld r11,24(r3)
+ bdnz 2b
+
+
adde r0,r0,r6
-5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */
- rldicl r4,r3,32,0 /* fold 64 bit value */
- add r3,r4,r3
- srdi r3,r3,32
- blr
+source; ld r12,32(r3)
+source; ld r14,40(r3)
-/* These shouldn't go in the fixup section, since that would
- cause the ex_table addresses to get out of order. */
+ adde r0,r0,r9
+source; ld r15,48(r3)
+source; ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+dest; std r6,0(r4)
+dest; std r9,8(r4)
+
+ adde r0,r0,r11
+dest; std r10,16(r4)
+dest; std r11,24(r4)
+
+ adde r0,r0,r12
+dest; std r12,32(r4)
+dest; std r14,40(r4)
+
+ adde r0,r0,r14
+dest; std r15,48(r4)
+dest; std r16,56(r4)
+ addi r4,r4,64
+
+ adde r0,r0,r15
+ adde r0,r0,r16
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ andi. r5,r5,63
+
+.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
+ srdi. r6,r5,3
+ beq .Lcopy_tail_word
- .globl src_error_1
-src_error_1:
- li r6,0
- subi r5,r5,2
-95: sth r6,4(r4)
- addi r4,r4,2
- srwi. r6,r5,2
- beq 3f
mtctr r6
- .globl src_error_2
-src_error_2:
- li r6,0
-96: stwu r6,4(r4)
- bdnz 96b
-3: andi. r5,r5,3
- beq src_error
- .globl src_error_3
-src_error_3:
- li r6,0
- mtctr r5
- addi r4,r4,3
-97: stbu r6,1(r4)
- bdnz 97b
- .globl src_error
-src_error:
+3:
+srcnr; ld r6,0(r3)
+ addi r3,r3,8
+ adde r0,r0,r6
+dstnr; std r6,0(r4)
+ addi r4,r4,8
+ bdnz 3b
+
+ andi. r5,r5,7
+
+.Lcopy_tail_word: /* Up to 7 bytes to go */
+ srdi. r6,r5,2
+ beq .Lcopy_tail_halfword
+
+srcnr; lwz r6,0(r3)
+ addi r3,r3,4
+ adde r0,r0,r6
+dstnr; stw r6,0(r4)
+ addi r4,r4,4
+ subi r5,r5,4
+
+.Lcopy_tail_halfword: /* Up to 3 bytes to go */
+ srdi. r6,r5,1
+ beq .Lcopy_tail_byte
+
+srcnr; lhz r6,0(r3)
+ addi r3,r3,2
+ adde r0,r0,r6
+dstnr; sth r6,0(r4)
+ addi r4,r4,2
+ subi r5,r5,2
+
+.Lcopy_tail_byte: /* Up to 1 byte to go */
+ andi. r6,r5,1
+ beq .Lcopy_finish
+
+srcnr; lbz r6,0(r3)
+ sldi r9,r6,8 /* Pad the byte out to 16 bits */
+ adde r0,r0,r9
+dstnr; stb r6,0(r4)
+
+.Lcopy_finish:
+ addze r0,r0 /* add in final carry */
+ rldicl r4,r0,32,0 /* fold two 32 bit halves together */
+ add r3,r4,r0
+ srdi r3,r3,32
+ blr
+
+.Lsrc_error:
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Lsrc_error_nr:
cmpdi 0,r7,0
- beq 1f
+ beqlr
li r6,-EFAULT
stw r6,0(r7)
-1: addze r3,r0
blr
- .globl dst_error
-dst_error:
+.Ldest_error:
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldest_error_nr:
cmpdi 0,r8,0
- beq 1f
+ beqlr
li r6,-EFAULT
stw r6,0(r8)
-1: addze r3,r0
blr
-
-.section __ex_table,"a"
- .align 3
- .llong 81b,src_error_1
- .llong 91b,dst_error
- .llong 82b,src_error_2
- .llong 92b,dst_error
- .llong 83b,src_error_3
- .llong 93b,dst_error
- .llong 84b,src_error_3
- .llong 94b,dst_error
- .llong 95b,dst_error
- .llong 96b,dst_error
- .llong 97b,dst_error
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
new file mode 100644
index 00000000000..08e3a3356c4
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers_64.c
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum sum, int *err_ptr)
+{
+ unsigned int csum;
+
+ might_sleep();
+
+ *err_ptr = 0;
+
+ if (!len) {
+ csum = 0;
+ goto out;
+ }
+
+ if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
+ *err_ptr = -EFAULT;
+ csum = (__force unsigned int)sum;
+ goto out;
+ }
+
+ csum = csum_partial_copy_generic((void __force *)src, dst,
+ len, sum, err_ptr, NULL);
+
+ if (unlikely(*err_ptr)) {
+ int missing = __copy_from_user(dst, src, len);
+
+ if (missing) {
+ memset(dst + len - missing, 0, missing);
+ *err_ptr = -EFAULT;
+ } else {
+ *err_ptr = 0;
+ }
+
+ csum = csum_partial(dst, len, sum);
+ }
+
+out:
+ return (__force __wsum)csum;
+}
+EXPORT_SYMBOL(csum_and_copy_from_user);
+
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
+ __wsum sum, int *err_ptr)
+{
+ unsigned int csum;
+
+ might_sleep();
+
+ *err_ptr = 0;
+
+ if (!len) {
+ csum = 0;
+ goto out;
+ }
+
+ if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
+ *err_ptr = -EFAULT;
+ csum = -1; /* invalid checksum */
+ goto out;
+ }
+
+ csum = csum_partial_copy_generic(src, (void __force *)dst,
+ len, sum, NULL, err_ptr);
+
+ if (unlikely(*err_ptr)) {
+ csum = csum_partial(src, len, sum);
+
+ if (copy_to_user(dst, src, len)) {
+ *err_ptr = -EFAULT;
+ csum = -1; /* invalid checksum */
+ }
+ }
+
+out:
+ return (__force __wsum)csum;
+}
+EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
new file mode 100644
index 00000000000..d5edbeb8eb8
--- /dev/null
+++ b/arch/powerpc/lib/code-patching.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/code-patching.h>
+#include <asm/uaccess.h>
+
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ int err;
+
+ __put_user_size(instr, addr, 4, err);
+ if (err)
+ return err;
+ asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
+ return 0;
+}
+
+int patch_branch(unsigned int *addr, unsigned long target, int flags)
+{
+ return patch_instruction(addr, create_branch(addr, target, flags));
+}
+
+unsigned int create_branch(const unsigned int *addr,
+ unsigned long target, int flags)
+{
+ unsigned int instruction;
+ long offset;
+
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3)
+ return 0;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC);
+
+ return instruction;
+}
+
+unsigned int create_cond_branch(const unsigned int *addr,
+ unsigned long target, int flags)
+{
+ unsigned int instruction;
+ long offset;
+
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
+ return 0;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC);
+
+ return instruction;
+}
+
+static unsigned int branch_opcode(unsigned int instr)
+{
+ return (instr >> 26) & 0x3F;
+}
+
+static int instr_is_branch_iform(unsigned int instr)
+{
+ return branch_opcode(instr) == 18;
+}
+
+static int instr_is_branch_bform(unsigned int instr)
+{
+ return branch_opcode(instr) == 16;
+}
+
+int instr_is_relative_branch(unsigned int instr)
+{
+ if (instr & BRANCH_ABSOLUTE)
+ return 0;
+
+ return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
+}
+
+static unsigned long branch_iform_target(const unsigned int *instr)
+{
+ signed long imm;
+
+ imm = *instr & 0x3FFFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x2000000)
+ imm -= 0x4000000;
+
+ if ((*instr & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+static unsigned long branch_bform_target(const unsigned int *instr)
+{
+ signed long imm;
+
+ imm = *instr & 0xFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x8000)
+ imm -= 0x10000;
+
+ if ((*instr & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+unsigned long branch_target(const unsigned int *instr)
+{
+ if (instr_is_branch_iform(*instr))
+ return branch_iform_target(instr);
+ else if (instr_is_branch_bform(*instr))
+ return branch_bform_target(instr);
+
+ return 0;
+}
+
+int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
+unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
+{
+ unsigned long target;
+
+ target = branch_target(src);
+
+ if (instr_is_branch_iform(*src))
+ return create_branch(dest, target, *src);
+ else if (instr_is_branch_bform(*src))
+ return create_cond_branch(dest, target, *src);
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC_BOOK3E_64
+void __patch_exception(int exc, unsigned long addr)
+{
+ extern unsigned int interrupt_base_book3e;
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /* Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
+#endif
+
+#ifdef CONFIG_CODE_PATCHING_SELFTEST
+
+static void __init test_trampoline(void)
+{
+ asm ("nop;\n");
+}
+
+#define check(x) \
+ if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
+
+static void __init test_branch_iform(void)
+{
+ unsigned int instr;
+ unsigned long addr;
+
+ addr = (unsigned long)&instr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_iform(0x48000000));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_iform(0x4bffffff));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_iform(0xcbffffff));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_iform(0x7bffffff));
+
+ /* Simplest case, branch to self with link */
+ check(instr_is_branch_iform(0x48000001));
+ /* All bits of targets set */
+ check(instr_is_branch_iform(0x4bfffffd));
+ /* Some bits of targets set */
+ check(instr_is_branch_iform(0x4bff00fd));
+ /* Must be a valid branch to start with */
+ check(!instr_is_branch_iform(0x7bfffffd));
+
+ /* Absolute branch to 0x100 */
+ instr = 0x48000103;
+ check(instr_is_branch_to_addr(&instr, 0x100));
+ /* Absolute branch to 0x420fc */
+ instr = 0x480420ff;
+ check(instr_is_branch_to_addr(&instr, 0x420fc));
+ /* Maximum positive relative branch, + 20MB - 4B */
+ instr = 0x49fffffc;
+ check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
+ /* Smallest negative relative branch, - 4B */
+ instr = 0x4bfffffc;
+ check(instr_is_branch_to_addr(&instr, addr - 4));
+ /* Largest negative relative branch, - 32 MB */
+ instr = 0x4a000000;
+ check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+
+ /* Branch to self, with link */
+ instr = create_branch(&instr, addr, BRANCH_SET_LINK);
+ check(instr_is_branch_to_addr(&instr, addr));
+
+ /* Branch to self - 0x100, with link */
+ instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK);
+ check(instr_is_branch_to_addr(&instr, addr - 0x100));
+
+ /* Branch to self + 0x100, no link */
+ instr = create_branch(&instr, addr + 0x100, 0);
+ check(instr_is_branch_to_addr(&instr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 MB */
+ instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK);
+ check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
+
+ /* Out of range relative negative offset, - 32 MB + 4*/
+ instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK);
+ check(instr == 0);
+
+ /* Out of range relative positive offset, + 32 MB */
+ instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK);
+ check(instr == 0);
+
+ /* Unaligned target */
+ instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK);
+ check(instr == 0);
+
+ /* Check flags are masked correctly */
+ instr = create_branch(&instr, addr, 0xFFFFFFFC);
+ check(instr_is_branch_to_addr(&instr, addr));
+ check(instr == 0x48000000);
+}
+
+static void __init test_create_function_call(void)
+{
+ unsigned int *iptr;
+ unsigned long dest;
+
+ /* Check we can create a function call */
+ iptr = (unsigned int *)ppc_function_entry(test_trampoline);
+ dest = ppc_function_entry(test_create_function_call);
+ patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK));
+ check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+ unsigned long addr;
+ unsigned int *iptr, instr, flags;
+
+ iptr = &instr;
+ addr = (unsigned long)iptr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_bform(0x40000000));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_bform(0x43ffffff));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_bform(0xc3ffffff));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_bform(0x7bffffff));
+
+ /* Absolute conditional branch to 0x100 */
+ instr = 0x43ff0103;
+ check(instr_is_branch_to_addr(&instr, 0x100));
+ /* Absolute conditional branch to 0x20fc */
+ instr = 0x43ff20ff;
+ check(instr_is_branch_to_addr(&instr, 0x20fc));
+ /* Maximum positive relative conditional branch, + 32 KB - 4B */
+ instr = 0x43ff7ffc;
+ check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
+ /* Smallest negative relative conditional branch, - 4B */
+ instr = 0x43fffffc;
+ check(instr_is_branch_to_addr(&instr, addr - 4));
+ /* Largest negative relative conditional branch, - 32 KB */
+ instr = 0x43ff8000;
+ check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+
+ /* All condition code bits set & link */
+ flags = 0x3ff000 | BRANCH_SET_LINK;
+
+ /* Branch to self */
+ instr = create_cond_branch(iptr, addr, flags);
+ check(instr_is_branch_to_addr(&instr, addr));
+
+ /* Branch to self - 0x100 */
+ instr = create_cond_branch(iptr, addr - 0x100, flags);
+ check(instr_is_branch_to_addr(&instr, addr - 0x100));
+
+ /* Branch to self + 0x100 */
+ instr = create_cond_branch(iptr, addr + 0x100, flags);
+ check(instr_is_branch_to_addr(&instr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 KB */
+ instr = create_cond_branch(iptr, addr - 0x8000, flags);
+ check(instr_is_branch_to_addr(&instr, addr - 0x8000));
+
+ /* Out of range relative negative offset, - 32 KB + 4*/
+ instr = create_cond_branch(iptr, addr - 0x8004, flags);
+ check(instr == 0);
+
+ /* Out of range relative positive offset, + 32 KB */
+ instr = create_cond_branch(iptr, addr + 0x8000, flags);
+ check(instr == 0);
+
+ /* Unaligned target */
+ instr = create_cond_branch(iptr, addr + 3, flags);
+ check(instr == 0);
+
+ /* Check flags are masked correctly */
+ instr = create_cond_branch(iptr, addr, 0xFFFFFFFC);
+ check(instr_is_branch_to_addr(&instr, addr));
+ check(instr == 0x43FF0000);
+}
+
+static void __init test_translate_branch(void)
+{
+ unsigned long addr;
+ unsigned int *p, *q;
+ void *buf;
+
+ buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ patch_branch(p, addr, 0);
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 1;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 MB */
+ p = buf;
+ addr = (unsigned long)p;
+ patch_branch(p, addr, 0);
+ q = buf + 0x2000000;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(*q == 0x4a000000);
+
+ /* Maximum positive case, move x to x - 32 MB + 4 */
+ p = buf + 0x2000000;
+ addr = (unsigned long)p;
+ patch_branch(p, addr, 0);
+ q = buf + 4;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(*q == 0x49fffffc);
+
+ /* Jump to x + 16 MB moved to x + 20 MB */
+ p = buf;
+ addr = 0x1000000 + (unsigned long)buf;
+ patch_branch(p, addr, BRANCH_SET_LINK);
+ q = buf + 0x1400000;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 16 MB moved to x - 16 MB + 4 */
+ p = buf + 0x1000000;
+ addr = 0x2000000 + (unsigned long)buf;
+ patch_branch(p, addr, 0);
+ q = buf + 4;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+
+ /* Conditional branch tests */
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ patch_instruction(p, create_cond_branch(p, addr, 0));
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 1;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 KB */
+ p = buf;
+ addr = (unsigned long)p;
+ patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
+ q = buf + 0x8000;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(*q == 0x43ff8000);
+
+ /* Maximum positive case, move x to x - 32 KB + 4 */
+ p = buf + 0x8000;
+ addr = (unsigned long)p;
+ patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
+ q = buf + 4;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(*q == 0x43ff7ffc);
+
+ /* Jump to x + 12 KB moved to x + 20 KB */
+ p = buf;
+ addr = 0x3000 + (unsigned long)buf;
+ patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK));
+ q = buf + 0x5000;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 8 KB moved to x - 8 KB + 4 */
+ p = buf + 0x2000;
+ addr = 0x4000 + (unsigned long)buf;
+ patch_instruction(p, create_cond_branch(p, addr, 0));
+ q = buf + 4;
+ patch_instruction(q, translate_branch(q, p));
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Free the buffer we were using */
+ vfree(buf);
+}
+
+static int __init test_code_patching(void)
+{
+ printk(KERN_DEBUG "Running code patching self-tests ...\n");
+
+ test_branch_iform();
+ test_branch_bform();
+ test_create_function_call();
+ test_translate_branch();
+
+ return 0;
+}
+late_initcall(test_code_patching);
+
+#endif /* CONFIG_CODE_PATCHING_SELFTEST */
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index c657de59abc..55f19f9fd70 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -62,7 +62,7 @@
.text
.stabs "arch/powerpc/lib/",N_SO,0,0,0f
- .stabs "copy32.S",N_SO,0,0,0f
+ .stabs "copy_32.S",N_SO,0,0,0f
0:
CACHELINE_BYTES = L1_CACHE_BYTES
@@ -98,20 +98,7 @@ _GLOBAL(cacheable_memzero)
bdnz 4b
3: mtctr r9
li r7,4
-#if !defined(CONFIG_8xx)
10: dcbz r7,r6
-#else
-10: stw r4, 4(r6)
- stw r4, 8(r6)
- stw r4, 12(r6)
- stw r4, 16(r6)
-#if CACHE_LINE_SIZE >= 32
- stw r4, 20(r6)
- stw r4, 24(r6)
- stw r4, 28(r6)
- stw r4, 32(r6)
-#endif /* CACHE_LINE_SIZE */
-#endif
addi r6,r6,CACHELINE_BYTES
bdnz 10b
clrlwi r5,r8,32-LG_CACHELINE_BYTES
@@ -200,9 +187,7 @@ _GLOBAL(cacheable_memcpy)
mtctr r0
beq 63f
53:
-#if !defined(CONFIG_8xx)
dcbz r11,r6
-#endif
COPY_16_BYTES
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES
@@ -356,14 +341,6 @@ _GLOBAL(__copy_tofrom_user)
li r11,4
beq 63f
-#ifdef CONFIG_8xx
- /* Don't use prefetch on 8xx */
- mtctr r0
- li r0,0
-53: COPY_16_BYTES_WITHEX(0)
- bdnz 53b
-
-#else /* not CONFIG_8xx */
/* Here we decide how far ahead to prefetch the source */
li r3,4
cmpwi r0,1
@@ -416,7 +393,6 @@ _GLOBAL(__copy_tofrom_user)
li r3,4
li r7,0
bne 114b
-#endif /* CONFIG_8xx */
63: srwi. r0,r5,2
mtctr r0
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index f9837f44ac0..a3c4dc4defd 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -1,119 +1,112 @@
/*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
-_GLOBAL(copy_4K_page)
- std r31,-8(1)
- std r30,-16(1)
- std r29,-24(1)
- std r28,-32(1)
- std r27,-40(1)
- std r26,-48(1)
- std r25,-56(1)
- std r24,-64(1)
- std r23,-72(1)
- std r22,-80(1)
- std r21,-88(1)
- std r20,-96(1)
- li r5,4096/32 - 1
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
+_GLOBAL_TOC(copy_page)
+BEGIN_FTR_SECTION
+ lis r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+ b copypage_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+ ori r5,r5,PAGE_SIZE@l
+BEGIN_FTR_SECTION
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
+ lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
+ li r9,0
+ srd r8,r5,r11
+
+ mtctr r8
+.Lsetup:
+ dcbt r9,r4
+ dcbz r9,r3
+ add r9,r9,r12
+ bdnz .Lsetup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
- li r12,5
-0: addi r5,r5,-24
- mtctr r12
- ld r22,640(4)
- ld r21,512(4)
- ld r20,384(4)
- ld r11,256(4)
- ld r9,128(4)
- ld r7,0(4)
- ld r25,648(4)
- ld r24,520(4)
- ld r23,392(4)
- ld r10,264(4)
- ld r8,136(4)
- ldu r6,8(4)
- cmpwi r5,24
-1: std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- ld r28,648(4)
- ld r27,520(4)
- ld r26,392(4)
- ld r31,264(4)
- ld r30,136(4)
- ld r29,8(4)
- std r25,656(3)
- std r24,528(3)
- std r23,400(3)
- std r10,272(3)
- std r8,144(3)
- std r6,16(3)
- ld r22,656(4)
- ld r21,528(4)
- ld r20,400(4)
- ld r11,272(4)
- ld r9,144(4)
- ld r7,16(4)
- std r28,664(3)
- std r27,536(3)
- std r26,408(3)
- std r31,280(3)
- std r30,152(3)
- stdu r29,24(3)
- ld r25,664(4)
- ld r24,536(4)
- ld r23,408(4)
- ld r10,280(4)
- ld r8,152(4)
- ldu r6,24(4)
+ srdi r8,r5,7 /* page is copied in 128 byte strides */
+ addi r8,r8,-1 /* one stride copied outside loop */
+
+ mtctr r8
+
+ ld r5,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ldu r8,24(r4)
+1: std r5,8(r3)
+ std r6,16(r3)
+ ld r9,8(r4)
+ ld r10,16(r4)
+ std r7,24(r3)
+ std r8,32(r3)
+ ld r11,24(r4)
+ ld r12,32(r4)
+ std r9,40(r3)
+ std r10,48(r3)
+ ld r5,40(r4)
+ ld r6,48(r4)
+ std r11,56(r3)
+ std r12,64(r3)
+ ld r7,56(r4)
+ ld r8,64(r4)
+ std r5,72(r3)
+ std r6,80(r3)
+ ld r9,72(r4)
+ ld r10,80(r4)
+ std r7,88(r3)
+ std r8,96(r3)
+ ld r11,88(r4)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ ld r5,104(r4)
+ ld r6,112(r4)
+ std r11,120(r3)
+ stdu r12,128(r3)
+ ld r7,120(r4)
+ ldu r8,128(r4)
bdnz 1b
- std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- addi r4,r4,640
- addi r3,r3,648
- bge 0b
- mtctr r5
- ld r7,0(4)
- ld r8,8(4)
- ldu r9,16(4)
-3: ld r10,8(4)
- std r7,8(3)
- ld r7,16(4)
- std r8,16(3)
- ld r8,24(4)
- std r9,24(3)
- ldu r9,32(4)
- stdu r10,32(3)
- bdnz 3b
-4: ld r10,8(4)
- std r7,8(3)
- std r8,16(3)
- std r9,24(3)
- std r10,32(3)
-9: ld r20,-96(1)
- ld r21,-88(1)
- ld r22,-80(1)
- ld r23,-72(1)
- ld r24,-64(1)
- ld r25,-56(1)
- ld r26,-48(1)
- ld r27,-40(1)
- ld r28,-32(1)
- ld r29,-24(1)
- ld r30,-16(1)
- ld r31,-8(1)
+
+ std r5,8(r3)
+ std r6,16(r3)
+ ld r9,8(r4)
+ ld r10,16(r4)
+ std r7,24(r3)
+ std r8,32(r3)
+ ld r11,24(r4)
+ ld r12,32(r4)
+ std r9,40(r3)
+ std r10,48(r3)
+ ld r5,40(r4)
+ ld r6,48(r4)
+ std r11,56(r3)
+ std r12,64(r3)
+ ld r7,56(r4)
+ ld r8,64(r4)
+ std r5,72(r3)
+ std r6,80(r3)
+ ld r9,72(r4)
+ ld r10,80(r4)
+ std r7,88(r3)
+ std r8,96(r3)
+ ld r11,88(r4)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ std r11,120(r3)
+ std r12,128(r3)
blr
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
new file mode 100644
index 00000000000..d7dafb3777a
--- /dev/null
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -0,0 +1,168 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copypage_power7)
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side. Since source and destination are page
+ * aligned we don't need to clear the bottom 7 bits of either
+ * address.
+ */
+ ori r9,r3,1 /* stream=1 => to */
+
+#ifdef CONFIG_PPC_64K_PAGES
+ lis r7,0x0E01 /* depth=7
+ * units/cachelines=512 */
+#else
+ lis r7,0x0E00 /* depth=7 */
+ ori r7,r7,0x1000 /* units/cachelines=32 */
+#endif
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ /* setup read stream 0 */
+ dcbt r0,r4,0b01000 /* addr from */
+ dcbt r0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst r0,r9,0b01000 /* addr to */
+ dcbtst r0,r10,0b01010 /* length and depth to */
+ eieio
+ dcbt r0,r8,0b01010 /* all streams GO */
+.machine pop
+
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl enter_vmx_copy
+ cmpwi r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ mtlr r0
+
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ beq .Lnonvmx_copy
+
+ addi r1,r1,STACKFRAMESIZE
+
+ li r6,16
+ li r7,32
+ li r8,48
+ li r9,64
+ li r10,80
+ li r11,96
+ li r12,112
+
+ .align 5
+1: lvx vr7,r0,r4
+ lvx vr6,r4,r6
+ lvx vr5,r4,r7
+ lvx vr4,r4,r8
+ lvx vr3,r4,r9
+ lvx vr2,r4,r10
+ lvx vr1,r4,r11
+ lvx vr0,r4,r12
+ addi r4,r4,128
+ stvx vr7,r0,r3
+ stvx vr6,r3,r6
+ stvx vr5,r3,r7
+ stvx vr4,r3,r8
+ stvx vr3,r3,r9
+ stvx vr2,r3,r10
+ stvx vr1,r3,r11
+ stvx vr0,r3,r12
+ addi r3,r3,128
+ bdnz 1b
+
+ b exit_vmx_copy /* tail call optimise */
+
+#else
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ stdu r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+
+1: ld r0,0(r4)
+ ld r5,8(r4)
+ ld r6,16(r4)
+ ld r7,24(r4)
+ ld r8,32(r4)
+ ld r9,40(r4)
+ ld r10,48(r4)
+ ld r11,56(r4)
+ ld r12,64(r4)
+ ld r14,72(r4)
+ ld r15,80(r4)
+ ld r16,88(r4)
+ ld r17,96(r4)
+ ld r18,104(r4)
+ ld r19,112(r4)
+ ld r20,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r5,8(r3)
+ std r6,16(r3)
+ std r7,24(r3)
+ std r8,32(r3)
+ std r9,40(r3)
+ std r10,48(r3)
+ std r11,56(r3)
+ std r12,64(r3)
+ std r14,72(r3)
+ std r15,80(r3)
+ std r16,88(r3)
+ std r17,96(r3)
+ std r18,104(r3)
+ std r19,112(r3)
+ std r20,120(r3)
+ addi r3,r3,128
+ bdnz 1b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ blr
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 25ec5378afa..0860ee46013 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,8 +9,22 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#ifdef __BIG_ENDIAN__
+#define sLd sld /* Shift towards low-numbered address. */
+#define sHd srd /* Shift towards high-numbered address. */
+#else
+#define sLd srd /* Shift towards low-numbered address. */
+#define sHd sld /* Shift towards high-numbered address. */
+#endif
+
.align 7
-_GLOBAL(__copy_tofrom_user)
+_GLOBAL_TOC(__copy_tofrom_user)
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
+ b __copy_tofrom_user_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+_GLOBAL(__copy_tofrom_user_base)
/* first check for a whole page copy on a page boundary */
cmpldi cr1,r5,16
cmpdi cr6,r5,4096
@@ -24,43 +38,75 @@ _GLOBAL(__copy_tofrom_user)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- PPC_MTOCRF 0x01,r5
+ PPC_MTOCRF(0x01,r5)
blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
- andi. r0,r4,7
addi r3,r3,-16
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
bne .Lsrc_unaligned
- srdi r7,r5,4
-20: ld r9,0(r4)
- addi r4,r4,-8
- mtctr r7
- andi. r5,r5,7
- bf cr7*4+0,22f
- addi r3,r3,8
- addi r4,r4,8
- mr r8,r9
- blt cr1,72f
-21: ld r9,8(r4)
-70: std r8,8(r3)
-22: ldu r8,16(r4)
-71: stdu r9,16(r3)
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
+ blt cr1,.Ldo_tail /* if < 16 bytes to copy */
+ srdi r0,r5,5
+ cmpdi cr1,r0,0
+20: ld r7,0(r4)
+220: ld r6,8(r4)
+ addi r4,r4,16
+ mtctr r0
+ andi. r0,r5,0x10
+ beq 22f
+ addi r3,r3,16
+ addi r4,r4,-16
+ mr r9,r7
+ mr r8,r6
+ beq cr1,72f
+21: ld r7,16(r4)
+221: ld r6,24(r4)
+ addi r4,r4,32
+70: std r9,0(r3)
+270: std r8,8(r3)
+22: ld r9,0(r4)
+222: ld r8,8(r4)
+71: std r7,16(r3)
+271: std r6,24(r3)
+ addi r3,r3,32
bdnz 21b
-72: std r8,8(r3)
+72: std r9,0(r3)
+272: std r8,8(r3)
+ andi. r5,r5,0xf
beq+ 3f
- addi r3,r3,16
-23: ld r9,8(r4)
+ addi r4,r4,16
.Ldo_tail:
- bf cr7*4+1,1f
- rotldi r9,r9,32
+ addi r3,r3,16
+ bf cr7*4+0,246f
+244: ld r9,0(r4)
+ addi r4,r4,8
+245: std r9,0(r3)
+ addi r3,r3,8
+246: bf cr7*4+1,1f
+23: lwz r9,0(r4)
+ addi r4,r4,4
73: stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
- rotldi r9,r9,16
+44: lhz r9,0(r4)
+ addi r4,r4,2
74: sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
- rotldi r9,r9,8
+45: lbz r9,0(r4)
75: stb r9,0(r3)
3: li r3,0
blr
@@ -80,10 +126,10 @@ _GLOBAL(__copy_tofrom_user)
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
25: ld r0,8(r4)
- sld r6,r9,r10
+ sLd r6,r9,r10
26: ldu r9,16(r4)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
27: ld r0,8(r4)
@@ -91,35 +137,35 @@ _GLOBAL(__copy_tofrom_user)
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
29: ldu r9,8(r4)
- sld r8,r0,r10
+ sLd r8,r0,r10
addi r3,r3,-8
blt cr6,5f
30: ld r0,8(r4)
- srd r12,r9,r11
- sld r6,r9,r10
+ sHd r12,r9,r11
+ sLd r6,r9,r10
31: ldu r9,16(r4)
or r12,r8,r12
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
addi r3,r3,16
beq cr6,78f
1: or r7,r7,r6
32: ld r0,8(r4)
76: std r12,8(r3)
-2: srd r12,r9,r11
- sld r6,r9,r10
+2: sHd r12,r9,r11
+ sLd r6,r9,r10
33: ldu r9,16(r4)
or r12,r8,r12
77: stdu r7,16(r3)
- srd r7,r0,r11
- sld r8,r0,r10
+ sHd r7,r0,r11
+ sLd r8,r0,r10
bdnz 1b
78: std r12,8(r3)
or r7,r7,r6
79: std r7,16(r3)
-5: srd r12,r9,r11
+5: sHd r12,r9,r11
or r12,r8,r12
80: std r12,24(r3)
bne 6f
@@ -127,18 +173,46 @@ _GLOBAL(__copy_tofrom_user)
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
- sld r9,r9,r10
- ble cr1,.Ldo_tail
+ sLd r9,r9,r10
+ ble cr1,7f
34: ld r0,8(r4)
- srd r7,r0,r11
+ sHd r7,r0,r11
or r9,r7,r9
- b .Ldo_tail
+7:
+ bf cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,32
+#endif
+94: stw r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,32
+#endif
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,16
+#endif
+95: sth r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,16
+#endif
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,8
+#endif
+96: stb r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,8
+#endif
+3: li r3,0
+ blr
.Ldst_unaligned:
- PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
- cmpldi r1,r5,16
+ cmpldi cr1,r5,16
bf cr7*4+3,1f
35: lbz r0,0(r4)
81: stb r0,0(r3)
@@ -150,7 +224,7 @@ _GLOBAL(__copy_tofrom_user)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: PPC_MTOCRF 0x01,r5
+3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
@@ -193,7 +267,9 @@ _GLOBAL(__copy_tofrom_user)
131:
addi r3,r3,8
120:
+320:
122:
+322:
124:
125:
126:
@@ -202,10 +278,11 @@ _GLOBAL(__copy_tofrom_user)
129:
133:
addi r3,r3,8
-121:
132:
addi r3,r3,8
-123:
+121:
+321:
+344:
134:
135:
138:
@@ -213,6 +290,9 @@ _GLOBAL(__copy_tofrom_user)
140:
141:
142:
+123:
+144:
+145:
/*
* here we have had a fault on a load and r3 points to the first
@@ -274,18 +354,22 @@ _GLOBAL(__copy_tofrom_user)
183:
add r3,r3,r7
b 1f
+371:
180:
addi r3,r3,8
171:
177:
addi r3,r3,8
-170:
-172:
+370:
+372:
176:
178:
addi r3,r3,4
185:
addi r3,r3,4
+170:
+172:
+345:
173:
174:
175:
@@ -296,6 +380,9 @@ _GLOBAL(__copy_tofrom_user)
187:
188:
189:
+194:
+195:
+196:
1:
ld r6,-24(r1)
ld r5,-8(r1)
@@ -309,14 +396,24 @@ _GLOBAL(__copy_tofrom_user)
.section __ex_table,"a"
.align 3
.llong 20b,120b
+ .llong 220b,320b
.llong 21b,121b
+ .llong 221b,321b
.llong 70b,170b
+ .llong 270b,370b
.llong 22b,122b
+ .llong 222b,322b
.llong 71b,171b
+ .llong 271b,371b
.llong 72b,172b
+ .llong 272b,372b
+ .llong 244b,344b
+ .llong 245b,345b
.llong 23b,123b
.llong 73b,173b
+ .llong 44b,144b
.llong 74b,174b
+ .llong 45b,145b
.llong 75b,175b
.llong 24b,124b
.llong 25b,125b
@@ -334,6 +431,9 @@ _GLOBAL(__copy_tofrom_user)
.llong 79b,179b
.llong 80b,180b
.llong 34b,134b
+ .llong 94b,194b
+ .llong 95b,195b
+ .llong 96b,196b
.llong 35b,135b
.llong 81b,181b
.llong 36b,136b
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
new file mode 100644
index 00000000000..c46c876ac96
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -0,0 +1,721 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
+ .macro err1
+100:
+ .section __ex_table,"a"
+ .align 3
+ .llong 100b,.Ldo_err1
+ .previous
+ .endm
+
+ .macro err2
+200:
+ .section __ex_table,"a"
+ .align 3
+ .llong 200b,.Ldo_err2
+ .previous
+ .endm
+
+#ifdef CONFIG_ALTIVEC
+ .macro err3
+300:
+ .section __ex_table,"a"
+ .align 3
+ .llong 300b,.Ldo_err3
+ .previous
+ .endm
+
+ .macro err4
+400:
+ .section __ex_table,"a"
+ .align 3
+ .llong 400b,.Ldo_err4
+ .previous
+ .endm
+
+
+.Ldo_err4:
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+.Ldo_err3:
+ bl exit_vmx_usercopy
+ ld r0,STACKFRAMESIZE+16(r1)
+ mtlr r0
+ b .Lexit
+#endif /* CONFIG_ALTIVEC */
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+.Lexit:
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+ b __copy_tofrom_user_base
+
+
+_GLOBAL(__copy_tofrom_user_power7)
+#ifdef CONFIG_ALTIVEC
+ cmpldi r5,16
+ cmpldi cr1,r5,4096
+
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+
+ blt .Lshort_copy
+ bgt cr1,.Lvmx_copy
+#else
+ cmpldi r5,16
+
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+
+ blt .Lshort_copy
+#endif
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r7,16(r4)
+err2; ld r8,24(r4)
+err2; ld r9,32(r4)
+err2; ld r10,40(r4)
+err2; ld r11,48(r4)
+err2; ld r12,56(r4)
+err2; ld r14,64(r4)
+err2; ld r15,72(r4)
+err2; ld r16,80(r4)
+err2; ld r17,88(r4)
+err2; ld r18,96(r4)
+err2; ld r19,104(r4)
+err2; ld r20,112(r4)
+err2; ld r21,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r7,16(r3)
+err2; std r8,24(r3)
+err2; std r9,32(r3)
+err2; std r10,40(r3)
+err2; std r11,48(r3)
+err2; std r12,56(r3)
+err2; std r14,64(r3)
+err2; std r15,72(r3)
+err2; std r16,80(r3)
+err2; std r17,88(r3)
+err2; std r18,96(r3)
+err2; std r19,104(r3)
+err2; std r20,112(r3)
+err2; std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r7,16(r4)
+err1; ld r8,24(r4)
+err1; ld r9,32(r4)
+err1; ld r10,40(r4)
+err1; ld r11,48(r4)
+err1; ld r12,56(r4)
+ addi r4,r4,64
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r7,16(r3)
+err1; std r8,24(r3)
+err1; std r9,32(r3)
+err1; std r10,40(r3)
+err1; std r11,48(r3)
+err1; std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r7,16(r4)
+err1; ld r8,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r7,16(r3)
+err1; std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
+#ifdef CONFIG_ALTIVEC
+.Lvmx_copy:
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl enter_vmx_usercopy
+ cmpwi cr1,r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
+ mtlr r0
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ /* setup read stream 0 */
+ dcbt r0,r6,0b01000 /* addr from */
+ dcbt r0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst r0,r9,0b01000 /* addr to */
+ dcbtst r0,r10,0b01010 /* length and depth to */
+ eieio
+ dcbt r0,r8,0b01010 /* all streams GO */
+.machine pop
+
+ beq cr1,.Lunwind_stack_nonvmx_copy
+
+ /*
+ * If source and destination are not relatively aligned we use a
+ * slower permute loop.
+ */
+ xor r6,r4,r3
+ rldicl. r6,r6,0,(64-4)
+ bne .Lvmx_unaligned_copy
+
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+err3; ld r0,0(r4)
+ addi r4,r4,8
+err3; std r0,0(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ bf cr7*4+3,5f
+err3; lvx vr1,r0,r4
+ addi r4,r4,16
+err3; stvx vr1,r0,r3
+ addi r3,r3,16
+
+5: bf cr7*4+2,6f
+err3; lvx vr1,r0,r4
+err3; lvx vr0,r4,r9
+ addi r4,r4,32
+err3; stvx vr1,r0,r3
+err3; stvx vr0,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+err3; lvx vr3,r0,r4
+err3; lvx vr2,r4,r9
+err3; lvx vr1,r4,r10
+err3; lvx vr0,r4,r11
+ addi r4,r4,64
+err3; stvx vr3,r0,r3
+err3; stvx vr2,r3,r9
+err3; stvx vr1,r3,r10
+err3; stvx vr0,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+err4; lvx vr7,r0,r4
+err4; lvx vr6,r4,r9
+err4; lvx vr5,r4,r10
+err4; lvx vr4,r4,r11
+err4; lvx vr3,r4,r12
+err4; lvx vr2,r4,r14
+err4; lvx vr1,r4,r15
+err4; lvx vr0,r4,r16
+ addi r4,r4,128
+err4; stvx vr7,r0,r3
+err4; stvx vr6,r3,r9
+err4; stvx vr5,r3,r10
+err4; stvx vr4,r3,r11
+err4; stvx vr3,r3,r12
+err4; stvx vr2,r3,r14
+err4; stvx vr1,r3,r15
+err4; stvx vr0,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+err3; lvx vr3,r0,r4
+err3; lvx vr2,r4,r9
+err3; lvx vr1,r4,r10
+err3; lvx vr0,r4,r11
+ addi r4,r4,64
+err3; stvx vr3,r0,r3
+err3; stvx vr2,r3,r9
+err3; stvx vr1,r3,r10
+err3; stvx vr0,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+err3; lvx vr1,r0,r4
+err3; lvx vr0,r4,r9
+ addi r4,r4,32
+err3; stvx vr1,r0,r3
+err3; stvx vr0,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+err3; lvx vr1,r0,r4
+ addi r4,r4,16
+err3; stvx vr1,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err3; ld r0,0(r4)
+ addi r4,r4,8
+err3; std r0,0(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err3; lbz r0,0(r4)
+err3; stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ b exit_vmx_usercopy /* tail call optimise */
+
+.Lvmx_unaligned_copy:
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err3; lwz r7,4(r4)
+ addi r4,r4,8
+err3; stw r0,0(r3)
+err3; stw r7,4(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ LVS(vr16,0,r4) /* Setup permute control vector */
+err3; lvx vr0,0,r4
+ addi r4,r4,16
+
+ bf cr7*4+3,5f
+err3; lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ addi r4,r4,16
+err3; stvx vr8,r0,r3
+ addi r3,r3,16
+ vor vr0,vr1,vr1
+
+5: bf cr7*4+2,6f
+err3; lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+err3; lvx vr0,r4,r9
+ VPERM(vr9,vr1,vr0,vr16)
+ addi r4,r4,32
+err3; stvx vr8,r0,r3
+err3; stvx vr9,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+err3; lvx vr3,r0,r4
+ VPERM(vr8,vr0,vr3,vr16)
+err3; lvx vr2,r4,r9
+ VPERM(vr9,vr3,vr2,vr16)
+err3; lvx vr1,r4,r10
+ VPERM(vr10,vr2,vr1,vr16)
+err3; lvx vr0,r4,r11
+ VPERM(vr11,vr1,vr0,vr16)
+ addi r4,r4,64
+err3; stvx vr8,r0,r3
+err3; stvx vr9,r3,r9
+err3; stvx vr10,r3,r10
+err3; stvx vr11,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+err4; lvx vr7,r0,r4
+ VPERM(vr8,vr0,vr7,vr16)
+err4; lvx vr6,r4,r9
+ VPERM(vr9,vr7,vr6,vr16)
+err4; lvx vr5,r4,r10
+ VPERM(vr10,vr6,vr5,vr16)
+err4; lvx vr4,r4,r11
+ VPERM(vr11,vr5,vr4,vr16)
+err4; lvx vr3,r4,r12
+ VPERM(vr12,vr4,vr3,vr16)
+err4; lvx vr2,r4,r14
+ VPERM(vr13,vr3,vr2,vr16)
+err4; lvx vr1,r4,r15
+ VPERM(vr14,vr2,vr1,vr16)
+err4; lvx vr0,r4,r16
+ VPERM(vr15,vr1,vr0,vr16)
+ addi r4,r4,128
+err4; stvx vr8,r0,r3
+err4; stvx vr9,r3,r9
+err4; stvx vr10,r3,r10
+err4; stvx vr11,r3,r11
+err4; stvx vr12,r3,r12
+err4; stvx vr13,r3,r14
+err4; stvx vr14,r3,r15
+err4; stvx vr15,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+err3; lvx vr3,r0,r4
+ VPERM(vr8,vr0,vr3,vr16)
+err3; lvx vr2,r4,r9
+ VPERM(vr9,vr3,vr2,vr16)
+err3; lvx vr1,r4,r10
+ VPERM(vr10,vr2,vr1,vr16)
+err3; lvx vr0,r4,r11
+ VPERM(vr11,vr1,vr0,vr16)
+ addi r4,r4,64
+err3; stvx vr8,r0,r3
+err3; stvx vr9,r3,r9
+err3; stvx vr10,r3,r10
+err3; stvx vr11,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+err3; lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+err3; lvx vr0,r4,r9
+ VPERM(vr9,vr1,vr0,vr16)
+ addi r4,r4,32
+err3; stvx vr8,r0,r3
+err3; stvx vr9,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+err3; lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ addi r4,r4,16
+err3; stvx vr8,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ addi r4,r4,-16 /* Unwind the +16 load offset */
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err3; lwz r6,4(r4)
+ addi r4,r4,8
+err3; stw r0,0(r3)
+err3; stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err3; lbz r0,0(r4)
+err3; stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ b exit_vmx_usercopy /* tail call optimise */
+#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
new file mode 100644
index 00000000000..a5b30c71a8d
--- /dev/null
+++ b/arch/powerpc/lib/crtsavres.S
@@ -0,0 +1,547 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ * Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ * Written By Michael Meissner
+ *
+ * Based on gcc/config/rs6000/crtsavres.asm from gcc
+ * 64 bit additions from reading the PPC elf64abi document.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#include <asm/ppc_asm.h>
+
+ .file "crtsavres.S"
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+#ifndef CONFIG_PPC64
+
+ .section ".text"
+
+/* Routines for saving integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer save area. */
+
+_GLOBAL(_savegpr_14)
+_GLOBAL(_save32gpr_14)
+ stw 14,-72(11) /* save gp registers */
+_GLOBAL(_savegpr_15)
+_GLOBAL(_save32gpr_15)
+ stw 15,-68(11)
+_GLOBAL(_savegpr_16)
+_GLOBAL(_save32gpr_16)
+ stw 16,-64(11)
+_GLOBAL(_savegpr_17)
+_GLOBAL(_save32gpr_17)
+ stw 17,-60(11)
+_GLOBAL(_savegpr_18)
+_GLOBAL(_save32gpr_18)
+ stw 18,-56(11)
+_GLOBAL(_savegpr_19)
+_GLOBAL(_save32gpr_19)
+ stw 19,-52(11)
+_GLOBAL(_savegpr_20)
+_GLOBAL(_save32gpr_20)
+ stw 20,-48(11)
+_GLOBAL(_savegpr_21)
+_GLOBAL(_save32gpr_21)
+ stw 21,-44(11)
+_GLOBAL(_savegpr_22)
+_GLOBAL(_save32gpr_22)
+ stw 22,-40(11)
+_GLOBAL(_savegpr_23)
+_GLOBAL(_save32gpr_23)
+ stw 23,-36(11)
+_GLOBAL(_savegpr_24)
+_GLOBAL(_save32gpr_24)
+ stw 24,-32(11)
+_GLOBAL(_savegpr_25)
+_GLOBAL(_save32gpr_25)
+ stw 25,-28(11)
+_GLOBAL(_savegpr_26)
+_GLOBAL(_save32gpr_26)
+ stw 26,-24(11)
+_GLOBAL(_savegpr_27)
+_GLOBAL(_save32gpr_27)
+ stw 27,-20(11)
+_GLOBAL(_savegpr_28)
+_GLOBAL(_save32gpr_28)
+ stw 28,-16(11)
+_GLOBAL(_savegpr_29)
+_GLOBAL(_save32gpr_29)
+ stw 29,-12(11)
+_GLOBAL(_savegpr_30)
+_GLOBAL(_save32gpr_30)
+ stw 30,-8(11)
+_GLOBAL(_savegpr_31)
+_GLOBAL(_save32gpr_31)
+ stw 31,-4(11)
+ blr
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+
+_GLOBAL(_restgpr_14)
+_GLOBAL(_rest32gpr_14)
+ lwz 14,-72(11) /* restore gp registers */
+_GLOBAL(_restgpr_15)
+_GLOBAL(_rest32gpr_15)
+ lwz 15,-68(11)
+_GLOBAL(_restgpr_16)
+_GLOBAL(_rest32gpr_16)
+ lwz 16,-64(11)
+_GLOBAL(_restgpr_17)
+_GLOBAL(_rest32gpr_17)
+ lwz 17,-60(11)
+_GLOBAL(_restgpr_18)
+_GLOBAL(_rest32gpr_18)
+ lwz 18,-56(11)
+_GLOBAL(_restgpr_19)
+_GLOBAL(_rest32gpr_19)
+ lwz 19,-52(11)
+_GLOBAL(_restgpr_20)
+_GLOBAL(_rest32gpr_20)
+ lwz 20,-48(11)
+_GLOBAL(_restgpr_21)
+_GLOBAL(_rest32gpr_21)
+ lwz 21,-44(11)
+_GLOBAL(_restgpr_22)
+_GLOBAL(_rest32gpr_22)
+ lwz 22,-40(11)
+_GLOBAL(_restgpr_23)
+_GLOBAL(_rest32gpr_23)
+ lwz 23,-36(11)
+_GLOBAL(_restgpr_24)
+_GLOBAL(_rest32gpr_24)
+ lwz 24,-32(11)
+_GLOBAL(_restgpr_25)
+_GLOBAL(_rest32gpr_25)
+ lwz 25,-28(11)
+_GLOBAL(_restgpr_26)
+_GLOBAL(_rest32gpr_26)
+ lwz 26,-24(11)
+_GLOBAL(_restgpr_27)
+_GLOBAL(_rest32gpr_27)
+ lwz 27,-20(11)
+_GLOBAL(_restgpr_28)
+_GLOBAL(_rest32gpr_28)
+ lwz 28,-16(11)
+_GLOBAL(_restgpr_29)
+_GLOBAL(_rest32gpr_29)
+ lwz 29,-12(11)
+_GLOBAL(_restgpr_30)
+_GLOBAL(_rest32gpr_30)
+ lwz 30,-8(11)
+_GLOBAL(_restgpr_31)
+_GLOBAL(_rest32gpr_31)
+ lwz 31,-4(11)
+ blr
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+
+_GLOBAL(_restgpr_14_x)
+_GLOBAL(_rest32gpr_14_x)
+ lwz 14,-72(11) /* restore gp registers */
+_GLOBAL(_restgpr_15_x)
+_GLOBAL(_rest32gpr_15_x)
+ lwz 15,-68(11)
+_GLOBAL(_restgpr_16_x)
+_GLOBAL(_rest32gpr_16_x)
+ lwz 16,-64(11)
+_GLOBAL(_restgpr_17_x)
+_GLOBAL(_rest32gpr_17_x)
+ lwz 17,-60(11)
+_GLOBAL(_restgpr_18_x)
+_GLOBAL(_rest32gpr_18_x)
+ lwz 18,-56(11)
+_GLOBAL(_restgpr_19_x)
+_GLOBAL(_rest32gpr_19_x)
+ lwz 19,-52(11)
+_GLOBAL(_restgpr_20_x)
+_GLOBAL(_rest32gpr_20_x)
+ lwz 20,-48(11)
+_GLOBAL(_restgpr_21_x)
+_GLOBAL(_rest32gpr_21_x)
+ lwz 21,-44(11)
+_GLOBAL(_restgpr_22_x)
+_GLOBAL(_rest32gpr_22_x)
+ lwz 22,-40(11)
+_GLOBAL(_restgpr_23_x)
+_GLOBAL(_rest32gpr_23_x)
+ lwz 23,-36(11)
+_GLOBAL(_restgpr_24_x)
+_GLOBAL(_rest32gpr_24_x)
+ lwz 24,-32(11)
+_GLOBAL(_restgpr_25_x)
+_GLOBAL(_rest32gpr_25_x)
+ lwz 25,-28(11)
+_GLOBAL(_restgpr_26_x)
+_GLOBAL(_rest32gpr_26_x)
+ lwz 26,-24(11)
+_GLOBAL(_restgpr_27_x)
+_GLOBAL(_rest32gpr_27_x)
+ lwz 27,-20(11)
+_GLOBAL(_restgpr_28_x)
+_GLOBAL(_rest32gpr_28_x)
+ lwz 28,-16(11)
+_GLOBAL(_restgpr_29_x)
+_GLOBAL(_rest32gpr_29_x)
+ lwz 29,-12(11)
+_GLOBAL(_restgpr_30_x)
+_GLOBAL(_rest32gpr_30_x)
+ lwz 30,-8(11)
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+ lwz 0,4(11)
+ lwz 31,-4(11)
+ mtlr 0
+ mr 1,11
+ blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+_GLOBAL(_savevr_20)
+ li r11,-192
+ stvx vr20,r11,r0
+_GLOBAL(_savevr_21)
+ li r11,-176
+ stvx vr21,r11,r0
+_GLOBAL(_savevr_22)
+ li r11,-160
+ stvx vr22,r11,r0
+_GLOBAL(_savevr_23)
+ li r11,-144
+ stvx vr23,r11,r0
+_GLOBAL(_savevr_24)
+ li r11,-128
+ stvx vr24,r11,r0
+_GLOBAL(_savevr_25)
+ li r11,-112
+ stvx vr25,r11,r0
+_GLOBAL(_savevr_26)
+ li r11,-96
+ stvx vr26,r11,r0
+_GLOBAL(_savevr_27)
+ li r11,-80
+ stvx vr27,r11,r0
+_GLOBAL(_savevr_28)
+ li r11,-64
+ stvx vr28,r11,r0
+_GLOBAL(_savevr_29)
+ li r11,-48
+ stvx vr29,r11,r0
+_GLOBAL(_savevr_30)
+ li r11,-32
+ stvx vr30,r11,r0
+_GLOBAL(_savevr_31)
+ li r11,-16
+ stvx vr31,r11,r0
+ blr
+
+_GLOBAL(_restvr_20)
+ li r11,-192
+ lvx vr20,r11,r0
+_GLOBAL(_restvr_21)
+ li r11,-176
+ lvx vr21,r11,r0
+_GLOBAL(_restvr_22)
+ li r11,-160
+ lvx vr22,r11,r0
+_GLOBAL(_restvr_23)
+ li r11,-144
+ lvx vr23,r11,r0
+_GLOBAL(_restvr_24)
+ li r11,-128
+ lvx vr24,r11,r0
+_GLOBAL(_restvr_25)
+ li r11,-112
+ lvx vr25,r11,r0
+_GLOBAL(_restvr_26)
+ li r11,-96
+ lvx vr26,r11,r0
+_GLOBAL(_restvr_27)
+ li r11,-80
+ lvx vr27,r11,r0
+_GLOBAL(_restvr_28)
+ li r11,-64
+ lvx vr28,r11,r0
+_GLOBAL(_restvr_29)
+ li r11,-48
+ lvx vr29,r11,r0
+_GLOBAL(_restvr_30)
+ li r11,-32
+ lvx vr30,r11,r0
+_GLOBAL(_restvr_31)
+ li r11,-16
+ lvx vr31,r11,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#else /* CONFIG_PPC64 */
+
+ .section ".text.save.restore","ax",@progbits
+
+.globl _savegpr0_14
+_savegpr0_14:
+ std r14,-144(r1)
+.globl _savegpr0_15
+_savegpr0_15:
+ std r15,-136(r1)
+.globl _savegpr0_16
+_savegpr0_16:
+ std r16,-128(r1)
+.globl _savegpr0_17
+_savegpr0_17:
+ std r17,-120(r1)
+.globl _savegpr0_18
+_savegpr0_18:
+ std r18,-112(r1)
+.globl _savegpr0_19
+_savegpr0_19:
+ std r19,-104(r1)
+.globl _savegpr0_20
+_savegpr0_20:
+ std r20,-96(r1)
+.globl _savegpr0_21
+_savegpr0_21:
+ std r21,-88(r1)
+.globl _savegpr0_22
+_savegpr0_22:
+ std r22,-80(r1)
+.globl _savegpr0_23
+_savegpr0_23:
+ std r23,-72(r1)
+.globl _savegpr0_24
+_savegpr0_24:
+ std r24,-64(r1)
+.globl _savegpr0_25
+_savegpr0_25:
+ std r25,-56(r1)
+.globl _savegpr0_26
+_savegpr0_26:
+ std r26,-48(r1)
+.globl _savegpr0_27
+_savegpr0_27:
+ std r27,-40(r1)
+.globl _savegpr0_28
+_savegpr0_28:
+ std r28,-32(r1)
+.globl _savegpr0_29
+_savegpr0_29:
+ std r29,-24(r1)
+.globl _savegpr0_30
+_savegpr0_30:
+ std r30,-16(r1)
+.globl _savegpr0_31
+_savegpr0_31:
+ std r31,-8(r1)
+ std r0,16(r1)
+ blr
+
+.globl _restgpr0_14
+_restgpr0_14:
+ ld r14,-144(r1)
+.globl _restgpr0_15
+_restgpr0_15:
+ ld r15,-136(r1)
+.globl _restgpr0_16
+_restgpr0_16:
+ ld r16,-128(r1)
+.globl _restgpr0_17
+_restgpr0_17:
+ ld r17,-120(r1)
+.globl _restgpr0_18
+_restgpr0_18:
+ ld r18,-112(r1)
+.globl _restgpr0_19
+_restgpr0_19:
+ ld r19,-104(r1)
+.globl _restgpr0_20
+_restgpr0_20:
+ ld r20,-96(r1)
+.globl _restgpr0_21
+_restgpr0_21:
+ ld r21,-88(r1)
+.globl _restgpr0_22
+_restgpr0_22:
+ ld r22,-80(r1)
+.globl _restgpr0_23
+_restgpr0_23:
+ ld r23,-72(r1)
+.globl _restgpr0_24
+_restgpr0_24:
+ ld r24,-64(r1)
+.globl _restgpr0_25
+_restgpr0_25:
+ ld r25,-56(r1)
+.globl _restgpr0_26
+_restgpr0_26:
+ ld r26,-48(r1)
+.globl _restgpr0_27
+_restgpr0_27:
+ ld r27,-40(r1)
+.globl _restgpr0_28
+_restgpr0_28:
+ ld r28,-32(r1)
+.globl _restgpr0_29
+_restgpr0_29:
+ ld r0,16(r1)
+ ld r29,-24(r1)
+ mtlr r0
+ ld r30,-16(r1)
+ ld r31,-8(r1)
+ blr
+
+.globl _restgpr0_30
+_restgpr0_30:
+ ld r30,-16(r1)
+.globl _restgpr0_31
+_restgpr0_31:
+ ld r0,16(r1)
+ ld r31,-8(r1)
+ mtlr r0
+ blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+.globl _savevr_20
+_savevr_20:
+ li r12,-192
+ stvx vr20,r12,r0
+.globl _savevr_21
+_savevr_21:
+ li r12,-176
+ stvx vr21,r12,r0
+.globl _savevr_22
+_savevr_22:
+ li r12,-160
+ stvx vr22,r12,r0
+.globl _savevr_23
+_savevr_23:
+ li r12,-144
+ stvx vr23,r12,r0
+.globl _savevr_24
+_savevr_24:
+ li r12,-128
+ stvx vr24,r12,r0
+.globl _savevr_25
+_savevr_25:
+ li r12,-112
+ stvx vr25,r12,r0
+.globl _savevr_26
+_savevr_26:
+ li r12,-96
+ stvx vr26,r12,r0
+.globl _savevr_27
+_savevr_27:
+ li r12,-80
+ stvx vr27,r12,r0
+.globl _savevr_28
+_savevr_28:
+ li r12,-64
+ stvx vr28,r12,r0
+.globl _savevr_29
+_savevr_29:
+ li r12,-48
+ stvx vr29,r12,r0
+.globl _savevr_30
+_savevr_30:
+ li r12,-32
+ stvx vr30,r12,r0
+.globl _savevr_31
+_savevr_31:
+ li r12,-16
+ stvx vr31,r12,r0
+ blr
+
+.globl _restvr_20
+_restvr_20:
+ li r12,-192
+ lvx vr20,r12,r0
+.globl _restvr_21
+_restvr_21:
+ li r12,-176
+ lvx vr21,r12,r0
+.globl _restvr_22
+_restvr_22:
+ li r12,-160
+ lvx vr22,r12,r0
+.globl _restvr_23
+_restvr_23:
+ li r12,-144
+ lvx vr23,r12,r0
+.globl _restvr_24
+_restvr_24:
+ li r12,-128
+ lvx vr24,r12,r0
+.globl _restvr_25
+_restvr_25:
+ li r12,-112
+ lvx vr25,r12,r0
+.globl _restvr_26
+_restvr_26:
+ li r12,-96
+ lvx vr26,r12,r0
+.globl _restvr_27
+_restvr_27:
+ li r12,-80
+ lvx vr27,r12,r0
+.globl _restvr_28
+_restvr_28:
+ li r12,-64
+ lvx vr28,r12,r0
+.globl _restvr_29
+_restvr_29:
+ li r12,-48
+ lvx vr29,r12,r0
+.globl _restvr_30
+_restvr_30:
+ li r12,-32
+ lvx vr30,r12,r0
+.globl _restvr_31
+_restvr_31:
+ li r12,-16
+ lvx vr31,r12,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#endif /* CONFIG_PPC64 */
+
+#endif
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
new file mode 100644
index 00000000000..8df55fc3aad
--- /dev/null
+++ b/arch/powerpc/lib/devres.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
+#include <linux/gfp.h>
+#include <linux/io.h> /* ioremap_prot() */
+#include <linux/export.h> /* EXPORT_SYMBOL() */
+
+/**
+ * devm_ioremap_prot - Managed ioremap_prot()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ * @flags: Page flags
+ *
+ * Managed ioremap_prot(). Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ void __iomem **ptr, *addr;
+
+ ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = ioremap_prot(offset, size, flags);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_prot);
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
deleted file mode 100644
index 6656d47841d..00000000000
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * PowerPC version derived from arch/arm/mm/consistent.c
- * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
- *
- * Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core. The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/highmem.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/tlbflush.h>
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE (CONFIG_CONSISTENT_START)
-#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte;
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
-};
-
-static struct vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct vm_region *c, *new;
-
- new = kmalloc(sizeof(struct vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
-{
- struct vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *
-__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
-{
- struct page *page;
- struct vm_region *c;
- unsigned long order;
- u64 mask = 0x00ffffff, limit; /* ISA default */
-
- if (!consistent_pte) {
- printk(KERN_ERR "%s: not initialised\n", __func__);
- dump_stack();
- return NULL;
- }
-
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
- size, mask);
- return NULL;
- }
-
- order = get_order(size);
-
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- flush_dcache_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- unsigned long vaddr = c->vm_start;
- pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
- struct page *end = page + (1 << order);
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *handle = page_to_bus(page);
-
- do {
- BUG_ON(!pte_none(*pte));
-
- SetPageReserved(page);
- set_pte_at(&init_mm, vaddr,
- pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
- page++;
- pte++;
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- return NULL;
-}
-EXPORT_SYMBOL(__dma_alloc_coherent);
-
-/*
- * free a page as defined by the above mapping.
- */
-void __dma_free_coherent(size_t size, void *vaddr)
-{
- struct vm_region *c;
- unsigned long flags, addr;
- pte_t *ptep;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- c = vm_region_find(&consistent_head, (unsigned long)vaddr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
- addr = c->vm_start;
- do {
- pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
- unsigned long pfn;
-
- ptep++;
- addr += PAGE_SIZE;
-
- if (!pte_none(pte) && pte_present(pte)) {
- pfn = pte_pfn(pte);
-
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- ClearPageReserved(page);
-
- __free_page(page);
- continue;
- }
- }
-
- printk(KERN_CRIT "%s: bad page in kernel page table\n",
- __func__);
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, vaddr);
- dump_stack();
-}
-EXPORT_SYMBOL(__dma_free_coherent);
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init dma_alloc_init(void)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int ret = 0;
-
- do {
- pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
- pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE);
- pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE);
- if (!pmd) {
- printk(KERN_ERR "%s: no pmd tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
- WARN_ON(!pmd_none(*pmd));
-
- pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
- if (!pte) {
- printk(KERN_ERR "%s: no pte tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
-
- consistent_pte = pte;
- } while (0);
-
- return ret;
-}
-
-core_initcall(dma_alloc_init);
-
-/*
- * make an area consistent.
- */
-void __dma_sync(void *vaddr, size_t size, int direction)
-{
- unsigned long start = (unsigned long)vaddr;
- unsigned long end = start + size;
-
- switch (direction) {
- case DMA_NONE:
- BUG();
- case DMA_FROM_DEVICE: /* invalidate only */
- invalidate_dcache_range(start, end);
- break;
- case DMA_TO_DEVICE: /* writeback only */
- clean_dcache_range(start, end);
- break;
- case DMA_BIDIRECTIONAL: /* writeback and invalidate */
- flush_dcache_range(start, end);
- break;
- }
-}
-EXPORT_SYMBOL(__dma_sync);
-
-#ifdef CONFIG_HIGHMEM
-/*
- * __dma_sync_page() implementation for systems using highmem.
- * In this case, each page of a buffer must be kmapped/kunmapped
- * in order to have a virtual address for __dma_sync(). This must
- * not sleep so kmap_atomic()/kunmap_atomic() are used.
- *
- * Note: yes, it is possible and correct to have a buffer extend
- * beyond the first page.
- */
-static inline void __dma_sync_page_highmem(struct page *page,
- unsigned long offset, size_t size, int direction)
-{
- size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
- size_t cur_size = seg_size;
- unsigned long flags, start, seg_offset = offset;
- int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
- int seg_nr = 0;
-
- local_irq_save(flags);
-
- do {
- start = (unsigned long)kmap_atomic(page + seg_nr,
- KM_PPC_SYNC_PAGE) + seg_offset;
-
- /* Sync this buffer segment */
- __dma_sync((void *)start, seg_size, direction);
- kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE);
- seg_nr++;
-
- /* Calculate next buffer segment size */
- seg_size = min((size_t)PAGE_SIZE, size - cur_size);
-
- /* Add the segment size to our running total */
- cur_size += seg_size;
- seg_offset = 0;
- } while (seg_nr < nr_segs);
-
- local_irq_restore(flags);
-}
-#endif /* CONFIG_HIGHMEM */
-
-/*
- * __dma_sync_page makes memory consistent. identical to __dma_sync, but
- * takes a struct page instead of a virtual address
- */
-void __dma_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction)
-{
-#ifdef CONFIG_HIGHMEM
- __dma_sync_page_highmem(page, offset, size, direction);
-#else
- unsigned long start = (unsigned long)page_address(page) + offset;
- __dma_sync((void *)start, size, direction);
-#endif
-}
-EXPORT_SYMBOL(__dma_sync_page);
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
new file mode 100644
index 00000000000..f4613118132
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -0,0 +1,761 @@
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/feature-fixups.h>
+#include <asm/ppc_asm.h>
+#include <asm/synch.h>
+
+ .text
+
+#define globl(x) \
+ .globl x; \
+x:
+
+globl(ftr_fixup_test1)
+ or 1,1,1
+ or 2,2,2 /* fixup will nop out this instruction */
+ or 3,3,3
+
+globl(end_ftr_fixup_test1)
+
+globl(ftr_fixup_test1_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test1_expected)
+ or 1,1,1
+ nop
+ or 3,3,3
+
+globl(ftr_fixup_test2)
+ or 1,1,1
+ or 2,2,2 /* fixup will replace this with ftr_fixup_test2_alt */
+ or 3,3,3
+
+globl(end_ftr_fixup_test2)
+
+globl(ftr_fixup_test2_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test2_alt)
+ or 31,31,31
+
+globl(ftr_fixup_test2_expected)
+ or 1,1,1
+ or 31,31,31
+ or 3,3,3
+
+globl(ftr_fixup_test3)
+ or 1,1,1
+ or 2,2,2 /* fixup will fail to replace this */
+ or 3,3,3
+
+globl(end_ftr_fixup_test3)
+
+globl(ftr_fixup_test3_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test3_alt)
+ or 31,31,31
+ or 31,31,31
+
+globl(ftr_fixup_test4)
+ or 1,1,1
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 3,3,3
+
+globl(end_ftr_fixup_test4)
+
+globl(ftr_fixup_test4_expected)
+ or 1,1,1
+ or 31,31,31
+ or 31,31,31
+ nop
+ nop
+ or 3,3,3
+
+globl(ftr_fixup_test4_orig)
+ or 1,1,1
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test4_alt)
+ or 31,31,31
+ or 31,31,31
+
+
+globl(ftr_fixup_test5)
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+1: bdnz 3b
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+
+globl(end_ftr_fixup_test5)
+
+globl(ftr_fixup_test5_expected)
+ or 1,1,1
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+1: bdnz 3b
+ or 1,1,1
+
+globl(ftr_fixup_test6)
+1: or 1,1,1
+BEGIN_FTR_SECTION
+ or 5,5,5
+2: PPC_LCMPI r3,0
+ beq 4f
+ blt 2b
+ b 1b
+ b 4f
+FTR_SECTION_ELSE
+2: or 2,2,2
+ PPC_LCMPI r3,1
+ beq 3f
+ blt 2b
+ b 3f
+ b 1b
+ALT_FTR_SECTION_END(0, 1)
+3: or 1,1,1
+ or 2,2,2
+4: or 3,3,3
+
+globl(end_ftr_fixup_test6)
+
+globl(ftr_fixup_test6_expected)
+1: or 1,1,1
+2: or 2,2,2
+ PPC_LCMPI r3,1
+ beq 3f
+ blt 2b
+ b 3f
+ b 1b
+2: or 1,1,1
+ or 2,2,2
+3: or 3,3,3
+
+
+#if 0
+/* Test that if we have a larger else case the assembler spots it and
+ * reports an error. #if 0'ed so as not to break the build normally.
+ */
+ftr_fixup_test7:
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+ or 3,3,3
+ or 3,3,3
+ or 3,3,3
+ or 3,3,3
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+#endif
+
+#define MAKE_MACRO_TEST(TYPE) \
+globl(ftr_fixup_test_ ##TYPE##_macros) \
+ or 1,1,1; \
+ /* Basic test, this section should all be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic test, this section should NOT be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, inner section should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, whole section should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 0, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, none should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 0, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, default case should be taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, else case should be taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+ or 31,31,31; \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt with smaller else case, should be padded with nops */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in default case */ \
+ /* Default case should be taken, with nop'ed inner section */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 2,2,2; \
+ or 2,2,2; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, else taken & nop */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, all nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else can have large else case */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+ or 5,5,5; \
+ or 5,5,5; \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1;
+
+#define MAKE_MACRO_TEST_EXPECTED(TYPE) \
+globl(ftr_fixup_test_ ##TYPE##_macros_expected) \
+ or 1,1,1; \
+ /* Basic test, this section should all be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic test, this section should NOT be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, inner section should be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, whole section should be nop'ed */ \
+ /* NB. inner section is not nop'ed, but then entire outer is */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+ nop; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, none should be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ or 3,3,3; \
+ or 3,3,3; \
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, default case should be taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, else case should be taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+ or 31,31,31; \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt with smaller else case, should be padded with nops */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+ nop; \
+ nop; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in default case */ \
+ /* Default case should be taken, with nop'ed inner section */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 5,5,5; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 3,3,3; */ \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, else taken & nop */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 5,5,5; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ or 1,1,1; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, all nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ or 1,1,1; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ or 2,2,2; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 31,31,31; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ /* or 31,31,31; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ or 2,2,2; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 31,31,31; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ /* or 31,31,31; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ or 5,5,5; \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ or 1,1,1; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else can have large else case */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1;
+
+MAKE_MACRO_TEST(FTR);
+MAKE_MACRO_TEST_EXPECTED(FTR);
+
+#ifdef CONFIG_PPC64
+MAKE_MACRO_TEST(FW_FTR);
+MAKE_MACRO_TEST_EXPECTED(FW_FTR);
+#endif
+
+globl(lwsync_fixup_test)
+1: or 1,1,1
+ LWSYNC
+globl(end_lwsync_fixup_test)
+
+globl(lwsync_fixup_test_expected_LWSYNC)
+1: or 1,1,1
+ lwsync
+
+globl(lwsync_fixup_test_expected_SYNC)
+1: or 1,1,1
+ sync
+
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
new file mode 100644
index 00000000000..7a8a7487cee
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <asm/cputable.h>
+#include <asm/code-patching.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+
+
+struct fixup_entry {
+ unsigned long mask;
+ unsigned long value;
+ long start_off;
+ long end_off;
+ long alt_start_off;
+ long alt_end_off;
+};
+
+static unsigned int *calc_addr(struct fixup_entry *fcur, long offset)
+{
+ /*
+ * We store the offset to the code as a negative offset from
+ * the start of the alt_entry, to support the VDSO. This
+ * routine converts that back into an actual address.
+ */
+ return (unsigned int *)((unsigned long)fcur + offset);
+}
+
+static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
+ unsigned int *alt_start, unsigned int *alt_end)
+{
+ unsigned int instr;
+
+ instr = *src;
+
+ if (instr_is_relative_branch(*src)) {
+ unsigned int *target = (unsigned int *)branch_target(src);
+
+ /* Branch within the section doesn't need translating */
+ if (target < alt_start || target >= alt_end) {
+ instr = translate_branch(dest, src);
+ if (!instr)
+ return 1;
+ }
+ }
+
+ patch_instruction(dest, instr);
+
+ return 0;
+}
+
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+ unsigned int *start, *end, *alt_start, *alt_end, *src, *dest;
+
+ start = calc_addr(fcur, fcur->start_off);
+ end = calc_addr(fcur, fcur->end_off);
+ alt_start = calc_addr(fcur, fcur->alt_start_off);
+ alt_end = calc_addr(fcur, fcur->alt_end_off);
+
+ if ((alt_end - alt_start) > (end - start))
+ return 1;
+
+ if ((value & fcur->mask) == fcur->value)
+ return 0;
+
+ src = alt_start;
+ dest = start;
+
+ for (; src < alt_end; src++, dest++) {
+ if (patch_alt_instruction(src, dest, alt_start, alt_end))
+ return 1;
+ }
+
+ for (; dest < end; dest++)
+ patch_instruction(dest, PPC_INST_NOP);
+
+ return 0;
+}
+
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ struct fixup_entry *fcur, *fend;
+
+ fcur = fixup_start;
+ fend = fixup_end;
+
+ for (; fcur < fend; fcur++) {
+ if (patch_feature_section(value, fcur)) {
+ WARN_ON(1);
+ printk("Unable to patch feature section at %p - %p" \
+ " with %p - %p\n",
+ calc_addr(fcur, fcur->start_off),
+ calc_addr(fcur, fcur->end_off),
+ calc_addr(fcur, fcur->alt_start_off),
+ calc_addr(fcur, fcur->alt_end_off));
+ }
+ }
+}
+
+void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ long *start, *end;
+ unsigned int *dest;
+
+ if (!(value & CPU_FTR_LWSYNC))
+ return ;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ for (; start < end; start++) {
+ dest = (void *)start + *start;
+ patch_instruction(dest, PPC_INST_LWSYNC);
+ }
+}
+
+void do_final_fixups(void)
+{
+#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
+ int *src, *dest;
+ unsigned long length;
+
+ if (PHYSICAL_START == 0)
+ return;
+
+ src = (int *)(KERNELBASE + PHYSICAL_START);
+ dest = (int *)KERNELBASE;
+ length = (__end_interrupts - _stext) / sizeof(int);
+
+ while (length--) {
+ patch_instruction(dest, *src);
+ src++;
+ dest++;
+ }
+#endif
+}
+
+#ifdef CONFIG_FTR_FIXUP_SELFTEST
+
+#define check(x) \
+ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+
+/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
+static struct fixup_entry fixup;
+
+static long calc_offset(struct fixup_entry *entry, unsigned int *p)
+{
+ return (unsigned long)p - (unsigned long)entry;
+}
+
+void test_basic_patching(void)
+{
+ extern unsigned int ftr_fixup_test1;
+ extern unsigned int end_ftr_fixup_test1;
+ extern unsigned int ftr_fixup_test1_orig;
+ extern unsigned int ftr_fixup_test1_expected;
+ int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
+ fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+ fixup.alt_start_off = fixup.alt_end_off = 0;
+
+ /* Sanity check */
+ check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(8, &fixup);
+ check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
+ check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ patch_feature_section(~8, &fixup);
+ check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+}
+
+static void test_alternative_patching(void)
+{
+ extern unsigned int ftr_fixup_test2;
+ extern unsigned int end_ftr_fixup_test2;
+ extern unsigned int ftr_fixup_test2_orig;
+ extern unsigned int ftr_fixup_test2_alt;
+ extern unsigned int ftr_fixup_test2_expected;
+ int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+
+ fixup.value = fixup.mask = 0xF;
+ fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
+ fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+
+ /* Sanity check */
+ check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(0xF, &fixup);
+ check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
+ check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ patch_feature_section(~0xF, &fixup);
+ check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+}
+
+static void test_alternative_case_too_big(void)
+{
+ extern unsigned int ftr_fixup_test3;
+ extern unsigned int end_ftr_fixup_test3;
+ extern unsigned int ftr_fixup_test3_orig;
+ extern unsigned int ftr_fixup_test3_alt;
+ int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+
+ fixup.value = fixup.mask = 0xC;
+ fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
+ fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+
+ /* Sanity check */
+ check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+
+ /* Expect nothing to be patched, and the error returned to us */
+ check(patch_feature_section(0xF, &fixup) == 1);
+ check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(patch_feature_section(0, &fixup) == 1);
+ check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(patch_feature_section(~0xF, &fixup) == 1);
+ check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+}
+
+static void test_alternative_case_too_small(void)
+{
+ extern unsigned int ftr_fixup_test4;
+ extern unsigned int end_ftr_fixup_test4;
+ extern unsigned int ftr_fixup_test4_orig;
+ extern unsigned int ftr_fixup_test4_alt;
+ extern unsigned int ftr_fixup_test4_expected;
+ int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+ unsigned long flag;
+
+ /* Check a high-bit flag */
+ flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
+ fixup.value = fixup.mask = flag;
+ fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
+ fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
+ fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
+ fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+
+ /* Sanity check */
+ check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(flag, &fixup);
+ check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
+ check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ patch_feature_section(~flag, &fixup);
+ check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+}
+
+static void test_alternative_case_with_branch(void)
+{
+ extern unsigned int ftr_fixup_test5;
+ extern unsigned int end_ftr_fixup_test5;
+ extern unsigned int ftr_fixup_test5_expected;
+ int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+
+ check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+}
+
+static void test_alternative_case_with_external_branch(void)
+{
+ extern unsigned int ftr_fixup_test6;
+ extern unsigned int end_ftr_fixup_test6;
+ extern unsigned int ftr_fixup_test6_expected;
+ int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+
+ check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+}
+
+static void test_cpu_macros(void)
+{
+ extern u8 ftr_fixup_test_FTR_macros;
+ extern u8 ftr_fixup_test_FTR_macros_expected;
+ unsigned long size = &ftr_fixup_test_FTR_macros_expected -
+ &ftr_fixup_test_FTR_macros;
+
+ /* The fixups have already been done for us during boot */
+ check(memcmp(&ftr_fixup_test_FTR_macros,
+ &ftr_fixup_test_FTR_macros_expected, size) == 0);
+}
+
+static void test_fw_macros(void)
+{
+#ifdef CONFIG_PPC64
+ extern u8 ftr_fixup_test_FW_FTR_macros;
+ extern u8 ftr_fixup_test_FW_FTR_macros_expected;
+ unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
+ &ftr_fixup_test_FW_FTR_macros;
+
+ /* The fixups have already been done for us during boot */
+ check(memcmp(&ftr_fixup_test_FW_FTR_macros,
+ &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+#endif
+}
+
+static void test_lwsync_macros(void)
+{
+ extern u8 lwsync_fixup_test;
+ extern u8 end_lwsync_fixup_test;
+ extern u8 lwsync_fixup_test_expected_LWSYNC;
+ extern u8 lwsync_fixup_test_expected_SYNC;
+ unsigned long size = &end_lwsync_fixup_test -
+ &lwsync_fixup_test;
+
+ /* The fixups have already been done for us during boot */
+ if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
+ check(memcmp(&lwsync_fixup_test,
+ &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+ } else {
+ check(memcmp(&lwsync_fixup_test,
+ &lwsync_fixup_test_expected_SYNC, size) == 0);
+ }
+}
+
+static int __init test_feature_fixups(void)
+{
+ printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
+
+ test_basic_patching();
+ test_alternative_patching();
+ test_alternative_case_too_big();
+ test_alternative_case_too_small();
+ test_alternative_case_with_branch();
+ test_alternative_case_with_external_branch();
+ test_cpu_macros();
+ test_fw_macros();
+ test_lwsync_macros();
+
+ return 0;
+}
+late_initcall(test_feature_fixups);
+
+#endif /* CONFIG_FTR_FIXUP_SELFTEST */
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
new file mode 100644
index 00000000000..19e66001a4f
--- /dev/null
+++ b/arch/powerpc/lib/hweight_64.S
@@ -0,0 +1,110 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+/* Note: This code relies on -mminimal-toc */
+
+_GLOBAL(__arch_hweight8)
+BEGIN_FTR_SECTION
+ b __sw_hweight8
+ nop
+ nop
+FTR_SECTION_ELSE
+ PPC_POPCNTB(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight16)
+BEGIN_FTR_SECTION
+ b __sw_hweight16
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(50)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(50)
+ clrlwi r3,r3,16
+ PPC_POPCNTW(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight32)
+BEGIN_FTR_SECTION
+ b __sw_hweight32
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(51)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(51)
+ PPC_POPCNTW(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+
+_GLOBAL(__arch_hweight64)
+BEGIN_FTR_SECTION
+ b __sw_hweight64
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(52)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,32
+ add r3,r4,r3
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(52)
+ PPC_POPCNTD(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
new file mode 100644
index 00000000000..85aec08ab23
--- /dev/null
+++ b/arch/powerpc/lib/ldstfp.S
@@ -0,0 +1,379 @@
+/*
+ * Floating-point, VMX/Altivec and VSX loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+#ifdef CONFIG_PPC_FPU
+
+#define STKFRM (PPC_MIN_STKFRM + 16)
+
+ .macro extab instr,handler
+ .section __ex_table,"a"
+ PPC_LONG \instr,\handler
+ .previous
+ .endm
+
+ .macro inst32 op
+reg = 0
+ .rept 32
+20: \op reg,0,r4
+ b 3f
+ extab 20b,99f
+reg = reg + 1
+ .endr
+ .endm
+
+/* Get the contents of frN into fr0; N is in r3. */
+_GLOBAL(get_fpr)
+ mflr r0
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+ blr /* fr0 is already in fr0 */
+ nop
+reg = 1
+ .rept 31
+ fmr fr0,reg
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Put the contents of fr0 into frN; N is in r3. */
+_GLOBAL(put_fpr)
+ mflr r0
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+ blr /* fr0 is already in fr0 */
+ nop
+reg = 1
+ .rept 31
+ fmr reg,fr0
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Load FP reg N from float at *p. N is in r3, p in r4. */
+_GLOBAL(do_lfs)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ ori r7,r6,MSR_FP
+ cmpwi cr7,r3,0
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stfd fr0,STKFRM-16(r1)
+1: li r9,-EFAULT
+2: lfs fr0,0(r4)
+ li r9,0
+3: bl put_fpr
+ beq cr7,4f
+ lfd fr0,STKFRM-16(r1)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+/* Load FP reg N from double at *p. N is in r3, p in r4. */
+_GLOBAL(do_lfd)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ ori r7,r6,MSR_FP
+ cmpwi cr7,r3,0
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stfd fr0,STKFRM-16(r1)
+1: li r9,-EFAULT
+2: lfd fr0,0(r4)
+ li r9,0
+3: beq cr7,4f
+ bl put_fpr
+ lfd fr0,STKFRM-16(r1)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+/* Store FP reg N to float at *p. N is in r3, p in r4. */
+_GLOBAL(do_stfs)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ ori r7,r6,MSR_FP
+ cmpwi cr7,r3,0
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stfd fr0,STKFRM-16(r1)
+ bl get_fpr
+1: li r9,-EFAULT
+2: stfs fr0,0(r4)
+ li r9,0
+3: beq cr7,4f
+ lfd fr0,STKFRM-16(r1)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+/* Store FP reg N to double at *p. N is in r3, p in r4. */
+_GLOBAL(do_stfd)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ ori r7,r6,MSR_FP
+ cmpwi cr7,r3,0
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stfd fr0,STKFRM-16(r1)
+ bl get_fpr
+1: li r9,-EFAULT
+2: stfd fr0,0(r4)
+ li r9,0
+3: beq cr7,4f
+ lfd fr0,STKFRM-16(r1)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into vr0; N is in r3. */
+_GLOBAL(get_vr)
+ mflr r0
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+ blr /* vr0 is already in vr0 */
+ nop
+reg = 1
+ .rept 31
+ vor vr0,reg,reg /* assembler doesn't know vmr? */
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Put the contents of vr0 into vrN; N is in r3. */
+_GLOBAL(put_vr)
+ mflr r0
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+ blr /* vr0 is already in vr0 */
+ nop
+reg = 1
+ .rept 31
+ vor reg,vr0,vr0
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Load vector reg N from *p. N is in r3, p in r4. */
+_GLOBAL(do_lvx)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VEC@h
+ cmpwi cr7,r3,0
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stvx vr0,r1,r8
+1: li r9,-EFAULT
+2: lvx vr0,0,r4
+ li r9,0
+3: beq cr7,4f
+ bl put_vr
+ lvx vr0,r1,r8
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+/* Store vector reg N to *p. N is in r3, p in r4. */
+_GLOBAL(do_stvx)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VEC@h
+ cmpwi cr7,r3,0
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ stvx vr0,r1,r8
+ bl get_vr
+1: li r9,-EFAULT
+2: stvx vr0,0,r4
+ li r9,0
+3: beq cr7,4f
+ lvx vr0,r1,r8
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/* Get the contents of vsrN into vsr0; N is in r3. */
+_GLOBAL(get_vsr)
+ mflr r0
+ rlwinm r3,r3,3,0x1f8
+ bcl 20,31,1f
+ blr /* vsr0 is already in vsr0 */
+ nop
+reg = 1
+ .rept 63
+ XXLOR(0,reg,reg)
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Put the contents of vsr0 into vsrN; N is in r3. */
+_GLOBAL(put_vsr)
+ mflr r0
+ rlwinm r3,r3,3,0x1f8
+ bcl 20,31,1f
+ blr /* vr0 is already in vr0 */
+ nop
+reg = 1
+ .rept 63
+ XXLOR(reg,0,0)
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
+_GLOBAL(do_lxvd2x)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VSX@h
+ cmpwi cr7,r3,0
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ STXVD2X(0,R1,R8)
+1: li r9,-EFAULT
+2: LXVD2X(0,R0,R4)
+ li r9,0
+3: beq cr7,4f
+ bl put_vsr
+ LXVD2X(0,R1,R8)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
+_GLOBAL(do_stxvd2x)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VSX@h
+ cmpwi cr7,r3,0
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ STXVD2X(0,R1,R8)
+ bl get_vsr
+1: li r9,-EFAULT
+2: STXVD2X(0,R0,R4)
+ li r9,0
+3: beq cr7,4f
+ LXVD2X(0,R1,R8)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+ extab 2b,3b
+
+#endif /* CONFIG_VSX */
+
+#endif /* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 79d0fa3a470..0c9c8d7d073 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -14,18 +14,16 @@
#include <linux/kernel.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/stringify.h>
#include <linux/smp.h>
/* waiting for a spinlock... */
-#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
+#if defined(CONFIG_PPC_SPLPAR)
#include <asm/hvcall.h>
-#include <asm/iseries/hv_call.h>
#include <asm/smp.h>
-#include <asm/firmware.h>
-void __spin_yield(raw_spinlock_t *lock)
+void __spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -34,20 +32,14 @@ void __spin_yield(raw_spinlock_t *lock)
return;
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = lppaca[holder_cpu].yield_count;
+ yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (lock->slock != lock_value)
return; /* something has changed */
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
- ((u64)holder_cpu << 32) | yield_count);
-#ifdef CONFIG_PPC_SPLPAR
- else
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
-#endif
+ plpar_hcall_norets(H_CONFER,
+ get_hard_smp_processor_id(holder_cpu), yield_count);
}
/*
@@ -55,7 +47,7 @@ void __spin_yield(raw_spinlock_t *lock)
* This turns out to be the same for read and write locks, since
* we only know the holder if it is write-locked.
*/
-void __rw_yield(raw_rwlock_t *rw)
+void __rw_yield(arch_rwlock_t *rw)
{
int lock_value;
unsigned int holder_cpu, yield_count;
@@ -65,24 +57,18 @@ void __rw_yield(raw_rwlock_t *rw)
return; /* no write lock at present */
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = lppaca[holder_cpu].yield_count;
+ yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (rw->lock != lock_value)
return; /* something has changed */
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
- ((u64)holder_cpu << 32) | yield_count);
-#ifdef CONFIG_PPC_SPLPAR
- else
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
-#endif
+ plpar_hcall_norets(H_CONFER,
+ get_hard_smp_processor_id(holder_cpu), yield_count);
}
#endif
-void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
while (lock->slock) {
HMT_low();
@@ -92,4 +78,4 @@ void __raw_spin_unlock_wait(raw_spinlock_t *lock)
HMT_medium();
}
-EXPORT_SYMBOL(__raw_spin_unlock_wait);
+EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 11ce045e21f..43435c6892f 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- PPC_MTOCRF 1,r0
+ PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
- PPC_MTOCRF 1,r0
+ PPC_MTOCRF(1,r0)
beq 8f
bf 29,6f
std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
- PPC_MTOCRF 1,r5
+ PPC_MTOCRF(1,r5)
beqlr+
bf 29,9f
stw r4,0(r6)
@@ -77,10 +77,10 @@ _GLOBAL(memset)
stb r4,0(r6)
blr
-_GLOBAL(memmove)
+_GLOBAL_TOC(memmove)
cmplw 0,r3,r4
- bgt .backwards_memcpy
- b .memcpy
+ bgt backwards_memcpy
+ b memcpy
_GLOBAL(backwards_memcpy)
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 3f131129d1c..32a06ec395d 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -10,19 +10,52 @@
#include <asm/ppc_asm.h>
.align 7
-_GLOBAL(memcpy)
- std r3,48(r1) /* save destination pointer for return value */
- PPC_MTOCRF 0x01,r5
+_GLOBAL_TOC(memcpy)
+BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+ cmpdi cr7,r5,0
+#else
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
+#endif
+FTR_SECTION_ELSE
+#ifndef SELFTEST
+ b memcpy_power7
+#endif
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+ /* dumb little-endian memcpy that will get replaced at runtime */
+ addi r9,r3,-1
+ addi r4,r4,-1
+ beqlr cr7
+ mtctr r5
+1: lbzu r10,1(r4)
+ stbu r10,1(r9)
+ bdnz 1b
+ blr
+#else
+ PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
dcbt 0,r4
blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ cleared.
+ At the time of writing the only CPU that has this combination of bits
+ set is Power6. */
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
- andi. r0,r4,7
addi r3,r3,-16
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
srdi r7,r5,4
ld r9,0(r4)
addi r4,r4,-8
@@ -41,20 +74,21 @@ _GLOBAL(memcpy)
3: std r8,8(r3)
beq 3f
addi r3,r3,16
- ld r9,8(r4)
.Ldo_tail:
bf cr7*4+1,1f
- rotldi r9,r9,32
+ lwz r9,8(r4)
+ addi r4,r4,4
stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
- rotldi r9,r9,16
+ lhz r9,8(r4)
+ addi r4,r4,2
sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
- rotldi r9,r9,8
+ lbz r9,8(r4)
stb r9,0(r3)
-3: ld r3,48(r1) /* return dest pointer */
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
.Lsrc_unaligned:
@@ -121,17 +155,30 @@ _GLOBAL(memcpy)
cmpwi cr1,r5,8
addi r3,r3,32
sld r9,r9,r10
- ble cr1,.Ldo_tail
+ ble cr1,6f
ld r0,8(r4)
srd r7,r0,r11
or r9,r7,r9
- b .Ldo_tail
+6:
+ bf cr7*4+1,1f
+ rotldi r9,r9,32
+ stw r9,0(r3)
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+ rotldi r9,r9,16
+ sth r9,0(r3)
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+ rotldi r9,r9,8
+ stb r9,0(r3)
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
+ blr
.Ldst_unaligned:
- PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
+ PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
- cmpldi r1,r5,16
+ cmpldi cr1,r5,16
bf cr7*4+3,1f
lbz r0,0(r4)
stb r0,0(r3)
@@ -143,7 +190,7 @@ _GLOBAL(memcpy)
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
-3: PPC_MTOCRF 0x01,r5
+3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
@@ -169,5 +216,6 @@ _GLOBAL(memcpy)
3: bf cr7*4+3,4f
lbz r0,0(r4)
stb r0,0(r3)
-4: ld r3,48(r1) /* return dest pointer */
+4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
+#endif
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
new file mode 100644
index 00000000000..2ff5c142f87
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -0,0 +1,656 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+_GLOBAL(memcpy_power7)
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
+#ifdef CONFIG_ALTIVEC
+ cmpldi r5,16
+ cmpldi cr1,r5,4096
+
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+
+ blt .Lshort_copy
+ bgt cr1,.Lvmx_copy
+#else
+ cmpldi r5,16
+
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+
+ blt .Lshort_copy
+#endif
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ ld r14,64(r4)
+ ld r15,72(r4)
+ ld r16,80(r4)
+ ld r17,88(r4)
+ ld r18,96(r4)
+ ld r19,104(r4)
+ ld r20,112(r4)
+ ld r21,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ std r14,64(r3)
+ std r15,72(r3)
+ std r16,80(r3)
+ std r17,88(r3)
+ std r18,96(r3)
+ std r19,104(r3)
+ std r20,112(r3)
+ std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ addi r4,r4,64
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ addi r4,r4,32
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ addi r4,r4,16
+ std r0,0(r3)
+ std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
+#ifdef CONFIG_ALTIVEC
+.Lvmx_copy:
+ mflr r0
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl enter_vmx_copy
+ cmpwi cr1,r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
+ mtlr r0
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r6,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
+ beq cr1,.Lunwind_stack_nonvmx_copy
+
+ /*
+ * If source and destination are not relatively aligned we use a
+ * slower permute loop.
+ */
+ xor r6,r4,r3
+ rldicl. r6,r6,0,(64-4)
+ bne .Lvmx_unaligned_copy
+
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ bf cr7*4+3,5f
+ lvx vr1,r0,r4
+ addi r4,r4,16
+ stvx vr1,r0,r3
+ addi r3,r3,16
+
+5: bf cr7*4+2,6f
+ lvx vr1,r0,r4
+ lvx vr0,r4,r9
+ addi r4,r4,32
+ stvx vr1,r0,r3
+ stvx vr0,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx vr3,r0,r4
+ lvx vr2,r4,r9
+ lvx vr1,r4,r10
+ lvx vr0,r4,r11
+ addi r4,r4,64
+ stvx vr3,r0,r3
+ stvx vr2,r3,r9
+ stvx vr1,r3,r10
+ stvx vr0,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx vr7,r0,r4
+ lvx vr6,r4,r9
+ lvx vr5,r4,r10
+ lvx vr4,r4,r11
+ lvx vr3,r4,r12
+ lvx vr2,r4,r14
+ lvx vr1,r4,r15
+ lvx vr0,r4,r16
+ addi r4,r4,128
+ stvx vr7,r0,r3
+ stvx vr6,r3,r9
+ stvx vr5,r3,r10
+ stvx vr4,r3,r11
+ stvx vr3,r3,r12
+ stvx vr2,r3,r14
+ stvx vr1,r3,r15
+ stvx vr0,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx vr3,r0,r4
+ lvx vr2,r4,r9
+ lvx vr1,r4,r10
+ lvx vr0,r4,r11
+ addi r4,r4,64
+ stvx vr3,r0,r3
+ stvx vr2,r3,r9
+ stvx vr1,r3,r10
+ stvx vr0,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx vr1,r0,r4
+ lvx vr0,r4,r9
+ addi r4,r4,32
+ stvx vr1,r0,r3
+ stvx vr0,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx vr1,r0,r4
+ addi r4,r4,16
+ stvx vr1,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b exit_vmx_copy /* tail call optimise */
+
+.Lvmx_unaligned_copy:
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r7,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r7,4(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ LVS(vr16,0,r4) /* Setup permute control vector */
+ lvx vr0,0,r4
+ addi r4,r4,16
+
+ bf cr7*4+3,5f
+ lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ addi r4,r4,16
+ stvx vr8,r0,r3
+ addi r3,r3,16
+ vor vr0,vr1,vr1
+
+5: bf cr7*4+2,6f
+ lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ lvx vr0,r4,r9
+ VPERM(vr9,vr1,vr0,vr16)
+ addi r4,r4,32
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx vr3,r0,r4
+ VPERM(vr8,vr0,vr3,vr16)
+ lvx vr2,r4,r9
+ VPERM(vr9,vr3,vr2,vr16)
+ lvx vr1,r4,r10
+ VPERM(vr10,vr2,vr1,vr16)
+ lvx vr0,r4,r11
+ VPERM(vr11,vr1,vr0,vr16)
+ addi r4,r4,64
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx vr7,r0,r4
+ VPERM(vr8,vr0,vr7,vr16)
+ lvx vr6,r4,r9
+ VPERM(vr9,vr7,vr6,vr16)
+ lvx vr5,r4,r10
+ VPERM(vr10,vr6,vr5,vr16)
+ lvx vr4,r4,r11
+ VPERM(vr11,vr5,vr4,vr16)
+ lvx vr3,r4,r12
+ VPERM(vr12,vr4,vr3,vr16)
+ lvx vr2,r4,r14
+ VPERM(vr13,vr3,vr2,vr16)
+ lvx vr1,r4,r15
+ VPERM(vr14,vr2,vr1,vr16)
+ lvx vr0,r4,r16
+ VPERM(vr15,vr1,vr0,vr16)
+ addi r4,r4,128
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ stvx vr12,r3,r12
+ stvx vr13,r3,r14
+ stvx vr14,r3,r15
+ stvx vr15,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx vr3,r0,r4
+ VPERM(vr8,vr0,vr3,vr16)
+ lvx vr2,r4,r9
+ VPERM(vr9,vr3,vr2,vr16)
+ lvx vr1,r4,r10
+ VPERM(vr10,vr2,vr1,vr16)
+ lvx vr0,r4,r11
+ VPERM(vr11,vr1,vr0,vr16)
+ addi r4,r4,64
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ stvx vr10,r3,r10
+ stvx vr11,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ lvx vr0,r4,r9
+ VPERM(vr9,vr1,vr0,vr16)
+ addi r4,r4,32
+ stvx vr8,r0,r3
+ stvx vr9,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx vr1,r0,r4
+ VPERM(vr8,vr0,vr1,vr16)
+ addi r4,r4,16
+ stvx vr8,r0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ addi r4,r4,-16 /* Unwind the +16 load offset */
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b exit_vmx_copy /* tail call optimise */
+#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index 22c3b4f53de..a1060a868e6 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -15,7 +15,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -54,7 +54,7 @@ static int grow(rh_info_t * info, int max_blocks)
new_blocks = max_blocks - info->max_blocks;
- block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL);
+ block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC);
if (block == NULL)
return -ENOMEM;
@@ -258,7 +258,7 @@ rh_info_t *rh_create(unsigned int alignment)
if ((alignment & (alignment - 1)) != 0)
return ERR_PTR(-EINVAL);
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kmalloc(sizeof(*info), GFP_ATOMIC);
if (info == NULL)
return ERR_PTR(-ENOMEM);
@@ -556,6 +556,7 @@ unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, co
be = blk->start + blk->size;
if (s >= bs && e <= be)
break;
+ blk = NULL;
}
if (blk == NULL)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 4aae0c38764..5c09f365c84 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -11,8 +11,11 @@
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
+#include <linux/prefetch.h>
#include <asm/sstep.h>
#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/cputable.h>
extern char system_call_common[];
@@ -23,6 +26,37 @@ extern char system_call_common[];
#define MSR_MASK 0x87c0ffff
#endif
+/* Bits in XER */
+#define XER_SO 0x80000000U
+#define XER_OV 0x40000000U
+#define XER_CA 0x20000000U
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * Functions in ldstfp.S
+ */
+extern int do_lfs(int rn, unsigned long ea);
+extern int do_lfd(int rn, unsigned long ea);
+extern int do_stfs(int rn, unsigned long ea);
+extern int do_stfd(int rn, unsigned long ea);
+extern int do_lvx(int rn, unsigned long ea);
+extern int do_stvx(int rn, unsigned long ea);
+extern int do_lxvd2x(int rn, unsigned long ea);
+extern int do_stxvd2x(int rn, unsigned long ea);
+#endif
+
+/*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+{
+#ifdef __powerpc64__
+ if ((msr & MSR_64BIT) == 0)
+ val &= 0xffffffffUL;
+#endif
+ return val;
+}
+
/*
* Determine whether a conditional branch instruction would branch.
*/
@@ -46,16 +80,569 @@ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
return 1;
}
+
+static long __kprobes address_ok(struct pt_regs *regs, unsigned long ea, int nb)
+{
+ if (!user_mode(regs))
+ return 1;
+ return __access_ok(ea, nb, USER_DS);
+}
+
+/*
+ * Calculate effective address for a D-form instruction
+ */
+static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) instr; /* sign-extend */
+ if (ra) {
+ ea += regs->gpr[ra];
+ if (instr & 0x04000000) { /* update forms */
+ if ((instr>>26) != 47) /* stmw is not an update form */
+ regs->gpr[ra] = ea;
+ }
+ }
+
+ return truncate_if_32bit(regs->msr, ea);
+}
+
+#ifdef __powerpc64__
+/*
+ * Calculate effective address for a DS-form instruction
+ */
+static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) (instr & ~3); /* sign-extend */
+ if (ra) {
+ ea += regs->gpr[ra];
+ if ((instr & 3) == 1) /* update forms */
+ regs->gpr[ra] = ea;
+ }
+
+ return truncate_if_32bit(regs->msr, ea);
+}
+#endif /* __powerpc64 */
+
+/*
+ * Calculate effective address for an X-form instruction
+ */
+static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs,
+ int do_update)
+{
+ int ra, rb;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ ea = regs->gpr[rb];
+ if (ra) {
+ ea += regs->gpr[ra];
+ if (do_update) /* update forms */
+ regs->gpr[ra] = ea;
+ }
+
+ return truncate_if_32bit(regs->msr, ea);
+}
+
+/*
+ * Return the largest power of 2, not greater than sizeof(unsigned long),
+ * such that x is a multiple of it.
+ */
+static inline unsigned long max_align(unsigned long x)
+{
+ x |= sizeof(unsigned long);
+ return x & -x; /* isolates rightmost bit */
+}
+
+
+static inline unsigned long byterev_2(unsigned long x)
+{
+ return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
+}
+
+static inline unsigned long byterev_4(unsigned long x)
+{
+ return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) |
+ ((x & 0xff00) << 8) | ((x & 0xff) << 24);
+}
+
+#ifdef __powerpc64__
+static inline unsigned long byterev_8(unsigned long x)
+{
+ return (byterev_4(x) << 32) | byterev_4(x >> 32);
+}
+#endif
+
+static int __kprobes read_mem_aligned(unsigned long *dest, unsigned long ea,
+ int nb)
+{
+ int err = 0;
+ unsigned long x = 0;
+
+ switch (nb) {
+ case 1:
+ err = __get_user(x, (unsigned char __user *) ea);
+ break;
+ case 2:
+ err = __get_user(x, (unsigned short __user *) ea);
+ break;
+ case 4:
+ err = __get_user(x, (unsigned int __user *) ea);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ err = __get_user(x, (unsigned long __user *) ea);
+ break;
+#endif
+ }
+ if (!err)
+ *dest = x;
+ return err;
+}
+
+static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
+ int nb, struct pt_regs *regs)
+{
+ int err;
+ unsigned long x, b, c;
+#ifdef __LITTLE_ENDIAN__
+ int len = nb; /* save a copy of the length for byte reversal */
+#endif
+
+ /* unaligned, do this in pieces */
+ x = 0;
+ for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+ c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
+ c = max_align(ea);
+#endif
+ if (c > nb)
+ c = max_align(nb);
+ err = read_mem_aligned(&b, ea, c);
+ if (err)
+ return err;
+ x = (x << (8 * c)) + b;
+ ea += c;
+ }
+#ifdef __LITTLE_ENDIAN__
+ switch (len) {
+ case 2:
+ *dest = byterev_2(x);
+ break;
+ case 4:
+ *dest = byterev_4(x);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *dest = byterev_8(x);
+ break;
+#endif
+ }
+#endif
+#ifdef __BIG_ENDIAN__
+ *dest = x;
+#endif
+ return 0;
+}
+
/*
- * Emulate instructions that cause a transfer of control.
+ * Read memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.
+ */
+static int __kprobes read_mem(unsigned long *dest, unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & (nb - 1)) == 0)
+ return read_mem_aligned(dest, ea, nb);
+ return read_mem_unaligned(dest, ea, nb, regs);
+}
+
+static int __kprobes write_mem_aligned(unsigned long val, unsigned long ea,
+ int nb)
+{
+ int err = 0;
+
+ switch (nb) {
+ case 1:
+ err = __put_user(val, (unsigned char __user *) ea);
+ break;
+ case 2:
+ err = __put_user(val, (unsigned short __user *) ea);
+ break;
+ case 4:
+ err = __put_user(val, (unsigned int __user *) ea);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ err = __put_user(val, (unsigned long __user *) ea);
+ break;
+#endif
+ }
+ return err;
+}
+
+static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,
+ int nb, struct pt_regs *regs)
+{
+ int err;
+ unsigned long c;
+
+#ifdef __LITTLE_ENDIAN__
+ switch (nb) {
+ case 2:
+ val = byterev_2(val);
+ break;
+ case 4:
+ val = byterev_4(val);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ val = byterev_8(val);
+ break;
+#endif
+ }
+#endif
+ /* unaligned or little-endian, do this in pieces */
+ for (; nb > 0; nb -= c) {
+#ifdef __LITTLE_ENDIAN__
+ c = 1;
+#endif
+#ifdef __BIG_ENDIAN__
+ c = max_align(ea);
+#endif
+ if (c > nb)
+ c = max_align(nb);
+ err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
+ if (err)
+ return err;
+ ea += c;
+ }
+ return 0;
+}
+
+/*
+ * Write memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.
+ */
+static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & (nb - 1)) == 0)
+ return write_mem_aligned(val, ea, nb);
+ return write_mem_unaligned(val, ea, nb, regs);
+}
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * Check the address and alignment, and call func to do the actual
+ * load or store.
+ */
+static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ int err;
+ union {
+ double dbl;
+ unsigned long ul[2];
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned _pad_;
+ unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+ unsigned word;
+ unsigned _pad_;
+#endif
+ } single;
+ } data;
+ unsigned long ptr;
+
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & 3) == 0)
+ return (*func)(rn, ea);
+ ptr = (unsigned long) &data.ul;
+ if (sizeof(unsigned long) == 8 || nb == 4) {
+ err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
+ if (nb == 4)
+ ptr = (unsigned long)&(data.single.word);
+ } else {
+ /* reading a double on 32-bit */
+ err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
+ if (!err)
+ err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
+ }
+ if (err)
+ return err;
+ return (*func)(rn, ptr);
+}
+
+static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ int err;
+ union {
+ double dbl;
+ unsigned long ul[2];
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned _pad_;
+ unsigned word;
+#endif
+#ifdef __LITTLE_ENDIAN__
+ unsigned word;
+ unsigned _pad_;
+#endif
+ } single;
+ } data;
+ unsigned long ptr;
+
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & 3) == 0)
+ return (*func)(rn, ea);
+ ptr = (unsigned long) &data.ul[0];
+ if (sizeof(unsigned long) == 8 || nb == 4) {
+ if (nb == 4)
+ ptr = (unsigned long)&(data.single.word);
+ err = (*func)(rn, ptr);
+ if (err)
+ return err;
+ err = write_mem_unaligned(data.ul[0], ea, nb, regs);
+ } else {
+ /* writing a double on 32-bit */
+ err = (*func)(rn, ptr);
+ if (err)
+ return err;
+ err = write_mem_unaligned(data.ul[0], ea, 4, regs);
+ if (!err)
+ err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
+ }
+ return err;
+}
+#endif
+
+#ifdef CONFIG_ALTIVEC
+/* For Altivec/VMX, no need to worry about alignment */
+static int __kprobes do_vec_load(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea & ~0xfUL, 16))
+ return -EFAULT;
+ return (*func)(rn, ea);
+}
+
+static int __kprobes do_vec_store(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea & ~0xfUL, 16))
+ return -EFAULT;
+ return (*func)(rn, ea);
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static int __kprobes do_vsx_load(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, struct pt_regs *regs)
+{
+ int err;
+ unsigned long val[2];
+
+ if (!address_ok(regs, ea, 16))
+ return -EFAULT;
+ if ((ea & 3) == 0)
+ return (*func)(rn, ea);
+ err = read_mem_unaligned(&val[0], ea, 8, regs);
+ if (!err)
+ err = read_mem_unaligned(&val[1], ea + 8, 8, regs);
+ if (!err)
+ err = (*func)(rn, (unsigned long) &val[0]);
+ return err;
+}
+
+static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long),
+ unsigned long ea, struct pt_regs *regs)
+{
+ int err;
+ unsigned long val[2];
+
+ if (!address_ok(regs, ea, 16))
+ return -EFAULT;
+ if ((ea & 3) == 0)
+ return (*func)(rn, ea);
+ err = (*func)(rn, (unsigned long) &val[0]);
+ if (err)
+ return err;
+ err = write_mem_unaligned(val[0], ea, 8, regs);
+ if (!err)
+ err = write_mem_unaligned(val[1], ea + 8, 8, regs);
+ return err;
+}
+#endif /* CONFIG_VSX */
+
+#define __put_user_asmx(x, addr, err, op, cr) \
+ __asm__ __volatile__( \
+ "1: " op " %2,0,%3\n" \
+ " mfcr %1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%4\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ PPC_LONG_ALIGN "\n" \
+ PPC_LONG "1b,3b\n" \
+ ".previous" \
+ : "=r" (err), "=r" (cr) \
+ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __get_user_asmx(x, addr, err, op) \
+ __asm__ __volatile__( \
+ "1: "op" %1,0,%2\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ PPC_LONG_ALIGN "\n" \
+ PPC_LONG "1b,3b\n" \
+ ".previous" \
+ : "=r" (err), "=r" (x) \
+ : "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __cacheop_user_asmx(addr, err, op) \
+ __asm__ __volatile__( \
+ "1: "op" 0,%1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ PPC_LONG_ALIGN "\n" \
+ PPC_LONG "1b,3b\n" \
+ ".previous" \
+ : "=r" (err) \
+ : "r" (addr), "i" (-EFAULT), "0" (err))
+
+static void __kprobes set_cr0(struct pt_regs *regs, int rd)
+{
+ long val = regs->gpr[rd];
+
+ regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+#ifdef __powerpc64__
+ if (!(regs->msr & MSR_64BIT))
+ val = (int) val;
+#endif
+ if (val < 0)
+ regs->ccr |= 0x80000000;
+ else if (val > 0)
+ regs->ccr |= 0x40000000;
+ else
+ regs->ccr |= 0x20000000;
+}
+
+static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
+ unsigned long val1, unsigned long val2,
+ unsigned long carry_in)
+{
+ unsigned long val = val1 + val2;
+
+ if (carry_in)
+ ++val;
+ regs->gpr[rd] = val;
+#ifdef __powerpc64__
+ if (!(regs->msr & MSR_64BIT)) {
+ val = (unsigned int) val;
+ val1 = (unsigned int) val1;
+ }
+#endif
+ if (val < val1 || (carry_in && val == val1))
+ regs->xer |= XER_CA;
+ else
+ regs->xer &= ~XER_CA;
+}
+
+static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2,
+ int crfld)
+{
+ unsigned int crval, shift;
+
+ crval = (regs->xer >> 31) & 1; /* get SO bit */
+ if (v1 < v2)
+ crval |= 8;
+ else if (v1 > v2)
+ crval |= 4;
+ else
+ crval |= 2;
+ shift = (7 - crfld) * 4;
+ regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
+ unsigned long v2, int crfld)
+{
+ unsigned int crval, shift;
+
+ crval = (regs->xer >> 31) & 1; /* get SO bit */
+ if (v1 < v2)
+ crval |= 8;
+ else if (v1 > v2)
+ crval |= 4;
+ else
+ crval |= 2;
+ shift = (7 - crfld) * 4;
+ regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+/*
+ * Elements of 32-bit rotate and mask instructions.
+ */
+#define MASK32(mb, me) ((0xffffffffUL >> (mb)) + \
+ ((signed long)-0x80000000L >> (me)) + ((me) >= (mb)))
+#ifdef __powerpc64__
+#define MASK64_L(mb) (~0UL >> (mb))
+#define MASK64_R(me) ((signed long)-0x8000000000000000L >> (me))
+#define MASK64(mb, me) (MASK64_L(mb) + MASK64_R(me) + ((me) >= (mb)))
+#define DATA32(x) (((x) & 0xffffffffUL) | (((x) & 0xffffffffUL) << 32))
+#else
+#define DATA32(x) (x)
+#endif
+#define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
* Returns 1 if the step was emulated, 0 if not,
* or -1 if the instruction is one that should not be stepped,
* such as an rfid, or a mtmsrd that would clear MSR_RI.
*/
int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
{
- unsigned int opcode, rs, rb, rd, spr;
+ unsigned int opcode, ra, rb, rd, spr, u;
unsigned long int imm;
+ unsigned long int val, val2;
+ unsigned long int ea;
+ unsigned int cr, mb, me, sh;
+ int err;
+ unsigned long old_ra, val3;
+ long ival;
opcode = instr >> 26;
switch (opcode) {
@@ -64,12 +651,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
if ((instr & 2) == 0)
imm += regs->nip;
regs->nip += 4;
- if ((regs->msr & MSR_SF) == 0)
- regs->nip &= 0xffffffffUL;
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip);
if (instr & 1)
regs->link = regs->nip;
if (branch_taken(instr, regs))
- regs->nip = imm;
+ regs->nip = truncate_if_32bit(regs->msr, imm);
return 1;
#ifdef CONFIG_PPC64
case 17: /* sc */
@@ -78,7 +664,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
* entry code works. If that is changed, this will
* need to be changed also.
*/
+ if (regs->gpr[0] == 0x1ebe &&
+ cpu_has_feature(CPU_FTR_REAL_LE)) {
+ regs->msr ^= MSR_LE;
+ goto instr_done;
+ }
regs->gpr[9] = regs->gpr[13];
+ regs->gpr[10] = MSR_KERNEL;
regs->gpr[11] = regs->nip + 4;
regs->gpr[12] = regs->msr & MSR_MASK;
regs->gpr[13] = (unsigned long) get_paca();
@@ -92,54 +684,250 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
imm -= 0x04000000;
if ((instr & 2) == 0)
imm += regs->nip;
- if (instr & 1) {
- regs->link = regs->nip + 4;
- if ((regs->msr & MSR_SF) == 0)
- regs->link &= 0xffffffffUL;
- }
- if ((regs->msr & MSR_SF) == 0)
- imm &= 0xffffffffUL;
+ if (instr & 1)
+ regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
+ imm = truncate_if_32bit(regs->msr, imm);
regs->nip = imm;
return 1;
case 19:
- switch (instr & 0x7fe) {
- case 0x20: /* bclr */
- case 0x420: /* bcctr */
+ switch ((instr >> 1) & 0x3ff) {
+ case 16: /* bclr */
+ case 528: /* bcctr */
imm = (instr & 0x400)? regs->ctr: regs->link;
- regs->nip += 4;
- if ((regs->msr & MSR_SF) == 0) {
- regs->nip &= 0xffffffffUL;
- imm &= 0xffffffffUL;
- }
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+ imm = truncate_if_32bit(regs->msr, imm);
if (instr & 1)
regs->link = regs->nip;
if (branch_taken(instr, regs))
regs->nip = imm;
return 1;
- case 0x24: /* rfid, scary */
+
+ case 18: /* rfid, scary */
return -1;
+
+ case 150: /* isync */
+ isync();
+ goto instr_done;
+
+ case 33: /* crnor */
+ case 129: /* crandc */
+ case 193: /* crxor */
+ case 225: /* crnand */
+ case 257: /* crand */
+ case 289: /* creqv */
+ case 417: /* crorc */
+ case 449: /* cror */
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ rd = (instr >> 21) & 0x1f;
+ ra = (regs->ccr >> (31 - ra)) & 1;
+ rb = (regs->ccr >> (31 - rb)) & 1;
+ val = (instr >> (6 + ra * 2 + rb)) & 1;
+ regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) |
+ (val << (31 - rd));
+ goto instr_done;
}
+ break;
case 31:
- rd = (instr >> 21) & 0x1f;
- switch (instr & 0x7fe) {
- case 0xa6: /* mfmsr */
+ switch ((instr >> 1) & 0x3ff) {
+ case 598: /* sync */
+#ifdef __powerpc64__
+ switch ((instr >> 21) & 3) {
+ case 1: /* lwsync */
+ asm volatile("lwsync" : : : "memory");
+ goto instr_done;
+ case 2: /* ptesync */
+ asm volatile("ptesync" : : : "memory");
+ goto instr_done;
+ }
+#endif
+ mb();
+ goto instr_done;
+
+ case 854: /* eieio */
+ eieio();
+ goto instr_done;
+ }
+ break;
+ }
+
+ /* Following cases refer to regs->gpr[], so we need all regs */
+ if (!FULL_REGS(regs))
+ return 0;
+
+ rd = (instr >> 21) & 0x1f;
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+
+ switch (opcode) {
+ case 7: /* mulli */
+ regs->gpr[rd] = regs->gpr[ra] * (short) instr;
+ goto instr_done;
+
+ case 8: /* subfic */
+ imm = (short) instr;
+ add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
+ goto instr_done;
+
+ case 10: /* cmpli */
+ imm = (unsigned short) instr;
+ val = regs->gpr[ra];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0)
+ val = (unsigned int) val;
+#endif
+ do_cmp_unsigned(regs, val, imm, rd >> 2);
+ goto instr_done;
+
+ case 11: /* cmpi */
+ imm = (short) instr;
+ val = regs->gpr[ra];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0)
+ val = (int) val;
+#endif
+ do_cmp_signed(regs, val, imm, rd >> 2);
+ goto instr_done;
+
+ case 12: /* addic */
+ imm = (short) instr;
+ add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+ goto instr_done;
+
+ case 13: /* addic. */
+ imm = (short) instr;
+ add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+ set_cr0(regs, rd);
+ goto instr_done;
+
+ case 14: /* addi */
+ imm = (short) instr;
+ if (ra)
+ imm += regs->gpr[ra];
+ regs->gpr[rd] = imm;
+ goto instr_done;
+
+ case 15: /* addis */
+ imm = ((short) instr) << 16;
+ if (ra)
+ imm += regs->gpr[ra];
+ regs->gpr[rd] = imm;
+ goto instr_done;
+
+ case 20: /* rlwimi */
+ mb = (instr >> 6) & 0x1f;
+ me = (instr >> 1) & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ imm = MASK32(mb, me);
+ regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+ goto logical_done;
+
+ case 21: /* rlwinm */
+ mb = (instr >> 6) & 0x1f;
+ me = (instr >> 1) & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ goto logical_done;
+
+ case 23: /* rlwnm */
+ mb = (instr >> 6) & 0x1f;
+ me = (instr >> 1) & 0x1f;
+ rb = regs->gpr[rb] & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ goto logical_done;
+
+ case 24: /* ori */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] | imm;
+ goto instr_done;
+
+ case 25: /* oris */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] | (imm << 16);
+ goto instr_done;
+
+ case 26: /* xori */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] ^ imm;
+ goto instr_done;
+
+ case 27: /* xoris */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16);
+ goto instr_done;
+
+ case 28: /* andi. */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] & imm;
+ set_cr0(regs, ra);
+ goto instr_done;
+
+ case 29: /* andis. */
+ imm = (unsigned short) instr;
+ regs->gpr[ra] = regs->gpr[rd] & (imm << 16);
+ set_cr0(regs, ra);
+ goto instr_done;
+
+#ifdef __powerpc64__
+ case 30: /* rld* */
+ mb = ((instr >> 6) & 0x1f) | (instr & 0x20);
+ val = regs->gpr[rd];
+ if ((instr & 0x10) == 0) {
+ sh = rb | ((instr & 2) << 4);
+ val = ROTATE(val, sh);
+ switch ((instr >> 2) & 3) {
+ case 0: /* rldicl */
+ regs->gpr[ra] = val & MASK64_L(mb);
+ goto logical_done;
+ case 1: /* rldicr */
+ regs->gpr[ra] = val & MASK64_R(mb);
+ goto logical_done;
+ case 2: /* rldic */
+ regs->gpr[ra] = val & MASK64(mb, 63 - sh);
+ goto logical_done;
+ case 3: /* rldimi */
+ imm = MASK64(mb, 63 - sh);
+ regs->gpr[ra] = (regs->gpr[ra] & ~imm) |
+ (val & imm);
+ goto logical_done;
+ }
+ } else {
+ sh = regs->gpr[rb] & 0x3f;
+ val = ROTATE(val, sh);
+ switch ((instr >> 1) & 7) {
+ case 0: /* rldcl */
+ regs->gpr[ra] = val & MASK64_L(mb);
+ goto logical_done;
+ case 1: /* rldcr */
+ regs->gpr[ra] = val & MASK64_R(mb);
+ goto logical_done;
+ }
+ }
+#endif
+
+ case 31:
+ switch ((instr >> 1) & 0x3ff) {
+ case 83: /* mfmsr */
+ if (regs->msr & MSR_PR)
+ break;
regs->gpr[rd] = regs->msr & MSR_MASK;
- regs->nip += 4;
- if ((regs->msr & MSR_SF) == 0)
- regs->nip &= 0xffffffffUL;
- return 1;
- case 0x124: /* mtmsr */
+ goto instr_done;
+ case 146: /* mtmsr */
+ if (regs->msr & MSR_PR)
+ break;
imm = regs->gpr[rd];
if ((imm & MSR_RI) == 0)
/* can't step mtmsr that would clear MSR_RI */
return -1;
regs->msr = imm;
- regs->nip += 4;
- return 1;
+ goto instr_done;
#ifdef CONFIG_PPC64
- case 0x164: /* mtmsrd */
+ case 178: /* mtmsrd */
/* only MSR_EE and MSR_RI get changed if bit 15 set */
/* mtmsrd doesn't change MSR_HV and MSR_ME */
+ if (regs->msr & MSR_PR)
+ break;
imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
imm = (regs->msr & MSR_MASK & ~imm)
| (regs->gpr[rd] & imm);
@@ -147,55 +935,803 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
/* can't step mtmsrd that would clear MSR_RI */
return -1;
regs->msr = imm;
- regs->nip += 4;
- if ((imm & MSR_SF) == 0)
- regs->nip &= 0xffffffffUL;
- return 1;
+ goto instr_done;
#endif
- case 0x26: /* mfcr */
+ case 19: /* mfcr */
regs->gpr[rd] = regs->ccr;
regs->gpr[rd] &= 0xffffffffUL;
- goto mtspr_out;
- case 0x2a6: /* mfspr */
+ goto instr_done;
+
+ case 144: /* mtcrf */
+ imm = 0xf0000000UL;
+ val = regs->gpr[rd];
+ for (sh = 0; sh < 8; ++sh) {
+ if (instr & (0x80000 >> sh))
+ regs->ccr = (regs->ccr & ~imm) |
+ (val & imm);
+ imm >>= 4;
+ }
+ goto instr_done;
+
+ case 339: /* mfspr */
spr = (instr >> 11) & 0x3ff;
switch (spr) {
case 0x20: /* mfxer */
regs->gpr[rd] = regs->xer;
regs->gpr[rd] &= 0xffffffffUL;
- goto mtspr_out;
+ goto instr_done;
case 0x100: /* mflr */
regs->gpr[rd] = regs->link;
- goto mtspr_out;
+ goto instr_done;
case 0x120: /* mfctr */
regs->gpr[rd] = regs->ctr;
- goto mtspr_out;
- }
- break;
- case 0x378: /* orx */
- rs = (instr >> 21) & 0x1f;
- rb = (instr >> 11) & 0x1f;
- if (rs == rb) { /* mr */
- rd = (instr >> 16) & 0x1f;
- regs->gpr[rd] = regs->gpr[rs];
- goto mtspr_out;
+ goto instr_done;
}
break;
- case 0x3a6: /* mtspr */
+
+ case 467: /* mtspr */
spr = (instr >> 11) & 0x3ff;
switch (spr) {
case 0x20: /* mtxer */
regs->xer = (regs->gpr[rd] & 0xffffffffUL);
- goto mtspr_out;
+ goto instr_done;
case 0x100: /* mtlr */
regs->link = regs->gpr[rd];
- goto mtspr_out;
+ goto instr_done;
case 0x120: /* mtctr */
regs->ctr = regs->gpr[rd];
-mtspr_out:
- regs->nip += 4;
- return 1;
+ goto instr_done;
+ }
+ break;
+
+/*
+ * Compare instructions
+ */
+ case 0: /* cmp */
+ val = regs->gpr[ra];
+ val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0) {
+ /* word (32-bit) compare */
+ val = (int) val;
+ val2 = (int) val2;
+ }
+#endif
+ do_cmp_signed(regs, val, val2, rd >> 2);
+ goto instr_done;
+
+ case 32: /* cmpl */
+ val = regs->gpr[ra];
+ val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0) {
+ /* word (32-bit) compare */
+ val = (unsigned int) val;
+ val2 = (unsigned int) val2;
+ }
+#endif
+ do_cmp_unsigned(regs, val, val2, rd >> 2);
+ goto instr_done;
+
+/*
+ * Arithmetic instructions
+ */
+ case 8: /* subfc */
+ add_with_carry(regs, rd, ~regs->gpr[ra],
+ regs->gpr[rb], 1);
+ goto arith_done;
+#ifdef __powerpc64__
+ case 9: /* mulhdu */
+ asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 10: /* addc */
+ add_with_carry(regs, rd, regs->gpr[ra],
+ regs->gpr[rb], 0);
+ goto arith_done;
+
+ case 11: /* mulhwu */
+ asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+
+ case 40: /* subf */
+ regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 73: /* mulhd */
+ asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 75: /* mulhw */
+ asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+
+ case 104: /* neg */
+ regs->gpr[rd] = -regs->gpr[ra];
+ goto arith_done;
+
+ case 136: /* subfe */
+ add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 138: /* adde */
+ add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 200: /* subfze */
+ add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 202: /* addze */
+ add_with_carry(regs, rd, regs->gpr[ra], 0L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 232: /* subfme */
+ add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
+ regs->xer & XER_CA);
+ goto arith_done;
+#ifdef __powerpc64__
+ case 233: /* mulld */
+ regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 234: /* addme */
+ add_with_carry(regs, rd, regs->gpr[ra], -1L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 235: /* mullw */
+ regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
+ (unsigned int) regs->gpr[rb];
+ goto arith_done;
+
+ case 266: /* add */
+ regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 457: /* divdu */
+ regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 459: /* divwu */
+ regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+ (unsigned int) regs->gpr[rb];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 489: /* divd */
+ regs->gpr[rd] = (long int) regs->gpr[ra] /
+ (long int) regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 491: /* divw */
+ regs->gpr[rd] = (int) regs->gpr[ra] /
+ (int) regs->gpr[rb];
+ goto arith_done;
+
+
+/*
+ * Logical instructions
+ */
+ case 26: /* cntlzw */
+ asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
+ "r" (regs->gpr[rd]));
+ goto logical_done;
+#ifdef __powerpc64__
+ case 58: /* cntlzd */
+ asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) :
+ "r" (regs->gpr[rd]));
+ goto logical_done;
+#endif
+ case 28: /* and */
+ regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb];
+ goto logical_done;
+
+ case 60: /* andc */
+ regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
+ goto logical_done;
+
+ case 124: /* nor */
+ regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
+ goto logical_done;
+
+ case 284: /* xor */
+ regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+ goto logical_done;
+
+ case 316: /* xor */
+ regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
+ goto logical_done;
+
+ case 412: /* orc */
+ regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
+ goto logical_done;
+
+ case 444: /* or */
+ regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb];
+ goto logical_done;
+
+ case 476: /* nand */
+ regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
+ goto logical_done;
+
+ case 922: /* extsh */
+ regs->gpr[ra] = (signed short) regs->gpr[rd];
+ goto logical_done;
+
+ case 954: /* extsb */
+ regs->gpr[ra] = (signed char) regs->gpr[rd];
+ goto logical_done;
+#ifdef __powerpc64__
+ case 986: /* extsw */
+ regs->gpr[ra] = (signed int) regs->gpr[rd];
+ goto logical_done;
+#endif
+
+/*
+ * Shift instructions
+ */
+ case 24: /* slw */
+ sh = regs->gpr[rb] & 0x3f;
+ if (sh < 32)
+ regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL;
+ else
+ regs->gpr[ra] = 0;
+ goto logical_done;
+
+ case 536: /* srw */
+ sh = regs->gpr[rb] & 0x3f;
+ if (sh < 32)
+ regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+ else
+ regs->gpr[ra] = 0;
+ goto logical_done;
+
+ case 792: /* sraw */
+ sh = regs->gpr[rb] & 0x3f;
+ ival = (signed int) regs->gpr[rd];
+ regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
+ if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
+ regs->xer |= XER_CA;
+ else
+ regs->xer &= ~XER_CA;
+ goto logical_done;
+
+ case 824: /* srawi */
+ sh = rb;
+ ival = (signed int) regs->gpr[rd];
+ regs->gpr[ra] = ival >> sh;
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+ regs->xer |= XER_CA;
+ else
+ regs->xer &= ~XER_CA;
+ goto logical_done;
+
+#ifdef __powerpc64__
+ case 27: /* sld */
+ sh = regs->gpr[rb] & 0x7f;
+ if (sh < 64)
+ regs->gpr[ra] = regs->gpr[rd] << sh;
+ else
+ regs->gpr[ra] = 0;
+ goto logical_done;
+
+ case 539: /* srd */
+ sh = regs->gpr[rb] & 0x7f;
+ if (sh < 64)
+ regs->gpr[ra] = regs->gpr[rd] >> sh;
+ else
+ regs->gpr[ra] = 0;
+ goto logical_done;
+
+ case 794: /* srad */
+ sh = regs->gpr[rb] & 0x7f;
+ ival = (signed long int) regs->gpr[rd];
+ regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
+ if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
+ regs->xer |= XER_CA;
+ else
+ regs->xer &= ~XER_CA;
+ goto logical_done;
+
+ case 826: /* sradi with sh_5 = 0 */
+ case 827: /* sradi with sh_5 = 1 */
+ sh = rb | ((instr & 2) << 4);
+ ival = (signed long int) regs->gpr[rd];
+ regs->gpr[ra] = ival >> sh;
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+ regs->xer |= XER_CA;
+ else
+ regs->xer &= ~XER_CA;
+ goto logical_done;
+#endif /* __powerpc64__ */
+
+/*
+ * Cache instructions
+ */
+ case 54: /* dcbst */
+ ea = xform_ea(instr, regs, 0);
+ if (!address_ok(regs, ea, 8))
+ return 0;
+ err = 0;
+ __cacheop_user_asmx(ea, err, "dcbst");
+ if (err)
+ return 0;
+ goto instr_done;
+
+ case 86: /* dcbf */
+ ea = xform_ea(instr, regs, 0);
+ if (!address_ok(regs, ea, 8))
+ return 0;
+ err = 0;
+ __cacheop_user_asmx(ea, err, "dcbf");
+ if (err)
+ return 0;
+ goto instr_done;
+
+ case 246: /* dcbtst */
+ if (rd == 0) {
+ ea = xform_ea(instr, regs, 0);
+ prefetchw((void *) ea);
+ }
+ goto instr_done;
+
+ case 278: /* dcbt */
+ if (rd == 0) {
+ ea = xform_ea(instr, regs, 0);
+ prefetch((void *) ea);
}
+ goto instr_done;
+
}
+ break;
}
- return 0;
+
+ /*
+ * Following cases are for loads and stores, so bail out
+ * if we're in little-endian mode.
+ */
+ if (regs->msr & MSR_LE)
+ return 0;
+
+ /*
+ * Save register RA in case it's an update form load or store
+ * and the access faults.
+ */
+ old_ra = regs->gpr[ra];
+
+ switch (opcode) {
+ case 31:
+ u = instr & 0x40;
+ switch ((instr >> 1) & 0x3ff) {
+ case 20: /* lwarx */
+ ea = xform_ea(instr, regs, 0);
+ if (ea & 3)
+ break; /* can't handle misaligned */
+ err = -EFAULT;
+ if (!address_ok(regs, ea, 4))
+ goto ldst_done;
+ err = 0;
+ __get_user_asmx(val, ea, err, "lwarx");
+ if (!err)
+ regs->gpr[rd] = val;
+ goto ldst_done;
+
+ case 150: /* stwcx. */
+ ea = xform_ea(instr, regs, 0);
+ if (ea & 3)
+ break; /* can't handle misaligned */
+ err = -EFAULT;
+ if (!address_ok(regs, ea, 4))
+ goto ldst_done;
+ err = 0;
+ __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr);
+ if (!err)
+ regs->ccr = (regs->ccr & 0x0fffffff) |
+ (cr & 0xe0000000) |
+ ((regs->xer >> 3) & 0x10000000);
+ goto ldst_done;
+
+#ifdef __powerpc64__
+ case 84: /* ldarx */
+ ea = xform_ea(instr, regs, 0);
+ if (ea & 7)
+ break; /* can't handle misaligned */
+ err = -EFAULT;
+ if (!address_ok(regs, ea, 8))
+ goto ldst_done;
+ err = 0;
+ __get_user_asmx(val, ea, err, "ldarx");
+ if (!err)
+ regs->gpr[rd] = val;
+ goto ldst_done;
+
+ case 214: /* stdcx. */
+ ea = xform_ea(instr, regs, 0);
+ if (ea & 7)
+ break; /* can't handle misaligned */
+ err = -EFAULT;
+ if (!address_ok(regs, ea, 8))
+ goto ldst_done;
+ err = 0;
+ __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr);
+ if (!err)
+ regs->ccr = (regs->ccr & 0x0fffffff) |
+ (cr & 0xe0000000) |
+ ((regs->xer >> 3) & 0x10000000);
+ goto ldst_done;
+
+ case 21: /* ldx */
+ case 53: /* ldux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 8, regs);
+ goto ldst_done;
+#endif
+
+ case 23: /* lwzx */
+ case 55: /* lwzux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 4, regs);
+ goto ldst_done;
+
+ case 87: /* lbzx */
+ case 119: /* lbzux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 1, regs);
+ goto ldst_done;
+
+#ifdef CONFIG_ALTIVEC
+ case 103: /* lvx */
+ case 359: /* lvxl */
+ if (!(regs->msr & MSR_VEC))
+ break;
+ ea = xform_ea(instr, regs, 0);
+ err = do_vec_load(rd, do_lvx, ea, regs);
+ goto ldst_done;
+
+ case 231: /* stvx */
+ case 487: /* stvxl */
+ if (!(regs->msr & MSR_VEC))
+ break;
+ ea = xform_ea(instr, regs, 0);
+ err = do_vec_store(rd, do_stvx, ea, regs);
+ goto ldst_done;
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+ case 149: /* stdx */
+ case 181: /* stdux */
+ val = regs->gpr[rd];
+ err = write_mem(val, xform_ea(instr, regs, u), 8, regs);
+ goto ldst_done;
+#endif
+
+ case 151: /* stwx */
+ case 183: /* stwux */
+ val = regs->gpr[rd];
+ err = write_mem(val, xform_ea(instr, regs, u), 4, regs);
+ goto ldst_done;
+
+ case 215: /* stbx */
+ case 247: /* stbux */
+ val = regs->gpr[rd];
+ err = write_mem(val, xform_ea(instr, regs, u), 1, regs);
+ goto ldst_done;
+
+ case 279: /* lhzx */
+ case 311: /* lhzux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 2, regs);
+ goto ldst_done;
+
+#ifdef __powerpc64__
+ case 341: /* lwax */
+ case 373: /* lwaux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 4, regs);
+ if (!err)
+ regs->gpr[rd] = (signed int) regs->gpr[rd];
+ goto ldst_done;
+#endif
+
+ case 343: /* lhax */
+ case 375: /* lhaux */
+ err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+ 2, regs);
+ if (!err)
+ regs->gpr[rd] = (signed short) regs->gpr[rd];
+ goto ldst_done;
+
+ case 407: /* sthx */
+ case 439: /* sthux */
+ val = regs->gpr[rd];
+ err = write_mem(val, xform_ea(instr, regs, u), 2, regs);
+ goto ldst_done;
+
+#ifdef __powerpc64__
+ case 532: /* ldbrx */
+ err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs);
+ if (!err)
+ regs->gpr[rd] = byterev_8(val);
+ goto ldst_done;
+
+#endif
+
+ case 534: /* lwbrx */
+ err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs);
+ if (!err)
+ regs->gpr[rd] = byterev_4(val);
+ goto ldst_done;
+
+#ifdef CONFIG_PPC_FPU
+ case 535: /* lfsx */
+ case 567: /* lfsux */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = xform_ea(instr, regs, u);
+ err = do_fp_load(rd, do_lfs, ea, 4, regs);
+ goto ldst_done;
+
+ case 599: /* lfdx */
+ case 631: /* lfdux */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = xform_ea(instr, regs, u);
+ err = do_fp_load(rd, do_lfd, ea, 8, regs);
+ goto ldst_done;
+
+ case 663: /* stfsx */
+ case 695: /* stfsux */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = xform_ea(instr, regs, u);
+ err = do_fp_store(rd, do_stfs, ea, 4, regs);
+ goto ldst_done;
+
+ case 727: /* stfdx */
+ case 759: /* stfdux */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = xform_ea(instr, regs, u);
+ err = do_fp_store(rd, do_stfd, ea, 8, regs);
+ goto ldst_done;
+#endif
+
+#ifdef __powerpc64__
+ case 660: /* stdbrx */
+ val = byterev_8(regs->gpr[rd]);
+ err = write_mem(val, xform_ea(instr, regs, 0), 8, regs);
+ goto ldst_done;
+
+#endif
+ case 662: /* stwbrx */
+ val = byterev_4(regs->gpr[rd]);
+ err = write_mem(val, xform_ea(instr, regs, 0), 4, regs);
+ goto ldst_done;
+
+ case 790: /* lhbrx */
+ err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs);
+ if (!err)
+ regs->gpr[rd] = byterev_2(val);
+ goto ldst_done;
+
+ case 918: /* sthbrx */
+ val = byterev_2(regs->gpr[rd]);
+ err = write_mem(val, xform_ea(instr, regs, 0), 2, regs);
+ goto ldst_done;
+
+#ifdef CONFIG_VSX
+ case 844: /* lxvd2x */
+ case 876: /* lxvd2ux */
+ if (!(regs->msr & MSR_VSX))
+ break;
+ rd |= (instr & 1) << 5;
+ ea = xform_ea(instr, regs, u);
+ err = do_vsx_load(rd, do_lxvd2x, ea, regs);
+ goto ldst_done;
+
+ case 972: /* stxvd2x */
+ case 1004: /* stxvd2ux */
+ if (!(regs->msr & MSR_VSX))
+ break;
+ rd |= (instr & 1) << 5;
+ ea = xform_ea(instr, regs, u);
+ err = do_vsx_store(rd, do_stxvd2x, ea, regs);
+ goto ldst_done;
+
+#endif /* CONFIG_VSX */
+ }
+ break;
+
+ case 32: /* lwz */
+ case 33: /* lwzu */
+ err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 4, regs);
+ goto ldst_done;
+
+ case 34: /* lbz */
+ case 35: /* lbzu */
+ err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 1, regs);
+ goto ldst_done;
+
+ case 36: /* stw */
+ val = regs->gpr[rd];
+ err = write_mem(val, dform_ea(instr, regs), 4, regs);
+ goto ldst_done;
+
+ case 37: /* stwu */
+ val = regs->gpr[rd];
+ val3 = dform_ea(instr, regs);
+ /*
+ * For PPC32 we always use stwu to change stack point with r1. So
+ * this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+ if ((ra == 1) && !(regs->msr & MSR_PR) \
+ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+#ifdef CONFIG_PPC32
+ /*
+ * Check if we will touch kernel sack overflow
+ */
+ if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
+ printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n");
+ err = -EINVAL;
+ break;
+ }
+#endif /* CONFIG_PPC32 */
+ /*
+ * Check if we already set since that means we'll
+ * lose the previous value.
+ */
+ WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+ set_thread_flag(TIF_EMULATE_STACK_STORE);
+ err = 0;
+ } else
+ err = write_mem(val, val3, 4, regs);
+ goto ldst_done;
+
+ case 38: /* stb */
+ case 39: /* stbu */
+ val = regs->gpr[rd];
+ err = write_mem(val, dform_ea(instr, regs), 1, regs);
+ goto ldst_done;
+
+ case 40: /* lhz */
+ case 41: /* lhzu */
+ err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
+ goto ldst_done;
+
+ case 42: /* lha */
+ case 43: /* lhau */
+ err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
+ if (!err)
+ regs->gpr[rd] = (signed short) regs->gpr[rd];
+ goto ldst_done;
+
+ case 44: /* sth */
+ case 45: /* sthu */
+ val = regs->gpr[rd];
+ err = write_mem(val, dform_ea(instr, regs), 2, regs);
+ goto ldst_done;
+
+ case 46: /* lmw */
+ ra = (instr >> 16) & 0x1f;
+ if (ra >= rd)
+ break; /* invalid form, ra in range to load */
+ ea = dform_ea(instr, regs);
+ do {
+ err = read_mem(&regs->gpr[rd], ea, 4, regs);
+ if (err)
+ return 0;
+ ea += 4;
+ } while (++rd < 32);
+ goto instr_done;
+
+ case 47: /* stmw */
+ ea = dform_ea(instr, regs);
+ do {
+ err = write_mem(regs->gpr[rd], ea, 4, regs);
+ if (err)
+ return 0;
+ ea += 4;
+ } while (++rd < 32);
+ goto instr_done;
+
+#ifdef CONFIG_PPC_FPU
+ case 48: /* lfs */
+ case 49: /* lfsu */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = dform_ea(instr, regs);
+ err = do_fp_load(rd, do_lfs, ea, 4, regs);
+ goto ldst_done;
+
+ case 50: /* lfd */
+ case 51: /* lfdu */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = dform_ea(instr, regs);
+ err = do_fp_load(rd, do_lfd, ea, 8, regs);
+ goto ldst_done;
+
+ case 52: /* stfs */
+ case 53: /* stfsu */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = dform_ea(instr, regs);
+ err = do_fp_store(rd, do_stfs, ea, 4, regs);
+ goto ldst_done;
+
+ case 54: /* stfd */
+ case 55: /* stfdu */
+ if (!(regs->msr & MSR_FP))
+ break;
+ ea = dform_ea(instr, regs);
+ err = do_fp_store(rd, do_stfd, ea, 8, regs);
+ goto ldst_done;
+#endif
+
+#ifdef __powerpc64__
+ case 58: /* ld[u], lwa */
+ switch (instr & 3) {
+ case 0: /* ld */
+ err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+ 8, regs);
+ goto ldst_done;
+ case 1: /* ldu */
+ err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+ 8, regs);
+ goto ldst_done;
+ case 2: /* lwa */
+ err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+ 4, regs);
+ if (!err)
+ regs->gpr[rd] = (signed int) regs->gpr[rd];
+ goto ldst_done;
+ }
+ break;
+
+ case 62: /* std[u] */
+ val = regs->gpr[rd];
+ switch (instr & 3) {
+ case 0: /* std */
+ err = write_mem(val, dsform_ea(instr, regs), 8, regs);
+ goto ldst_done;
+ case 1: /* stdu */
+ err = write_mem(val, dsform_ea(instr, regs), 8, regs);
+ goto ldst_done;
+ }
+ break;
+#endif /* __powerpc64__ */
+
+ }
+ err = -EINVAL;
+
+ ldst_done:
+ if (err) {
+ regs->gpr[ra] = old_ra;
+ return 0; /* invoke DSI if -EFAULT? */
+ }
+ instr_done:
+ regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+ return 1;
+
+ logical_done:
+ if (instr & 1)
+ set_cr0(regs, ra);
+ goto instr_done;
+
+ arith_done:
+ if (instr & 1)
+ set_cr0(regs, rd);
+ goto instr_done;
}
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index c4c622d8e6a..1b5a0a09d60 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -13,13 +13,7 @@
#include <asm/ppc_asm.h>
.section __ex_table,"a"
-#ifdef CONFIG_PPC64
- .align 3
-#define EXTBL .llong
-#else
- .align 2
-#define EXTBL .long
-#endif
+ PPC_LONG_ALIGN
.text
_GLOBAL(strcpy)
@@ -34,7 +28,7 @@ _GLOBAL(strcpy)
/* This clears out any unused part of the destination buffer,
just as the libc version does. -- paulus */
_GLOBAL(strncpy)
- cmpwi 0,r5,0
+ PPC_LCMPI 0,r5,0
beqlr
mtctr r5
addi r6,r3,-1
@@ -45,7 +39,7 @@ _GLOBAL(strncpy)
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
bnelr /* if we didn't hit a null char, we're done */
mfctr r5
- cmpwi 0,r5,0 /* any space left in destination buffer? */
+ PPC_LCMPI 0,r5,0 /* any space left in destination buffer? */
beqlr /* we know r0 == 0 here */
2: stbu r0,1(r6) /* clear it out if so */
bdnz 2b
@@ -75,6 +69,22 @@ _GLOBAL(strcmp)
beq 1b
blr
+_GLOBAL(strncmp)
+ PPC_LCMPI 0,r5,0
+ beq- 2f
+ mtctr r5
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r5)
+ cmpwi 1,r3,0
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ beqlr 1
+ bdnzt eq,1b
+ blr
+2: li r3,0
+ blr
+
_GLOBAL(strlen)
addi r4,r3,-1
1: lbzu r0,1(r4)
@@ -84,8 +94,8 @@ _GLOBAL(strlen)
blr
_GLOBAL(memcmp)
- cmpwi 0,r5,0
- ble- 2f
+ PPC_LCMPI 0,r5,0
+ beq- 2f
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
@@ -98,8 +108,8 @@ _GLOBAL(memcmp)
blr
_GLOBAL(memchr)
- cmpwi 0,r5,0
- ble- 2f
+ PPC_LCMPI 0,r5,0
+ beq- 2f
mtctr r5
addi r3,r3,-1
1: lbzu r0,1(r3)
@@ -109,6 +119,7 @@ _GLOBAL(memchr)
2: li r3,0
blr
+#ifdef CONFIG_PPC32
_GLOBAL(__clear_user)
addi r6,r3,-4
li r3,0
@@ -146,52 +157,8 @@ _GLOBAL(__clear_user)
blr
.section __ex_table,"a"
- EXTBL 11b,90b
- EXTBL 1b,91b
- EXTBL 8b,92b
+ PPC_LONG 11b,90b
+ PPC_LONG 1b,91b
+ PPC_LONG 8b,92b
.text
-
-_GLOBAL(__strncpy_from_user)
- addi r6,r3,-1
- addi r4,r4,-1
- cmpwi 0,r5,0
- beq 2f
- mtctr r5
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- beq 3f
-2: addi r6,r6,1
-3: subf r3,r3,r6
- blr
-99: li r3,-EFAULT
- blr
-
- .section __ex_table,"a"
- EXTBL 1b,99b
- .text
-
-/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
-_GLOBAL(__strnlen_user)
- addi r7,r3,-1
- subf r6,r7,r5 /* top+1 - str */
- cmplw 0,r4,r6
- bge 0f
- mr r6,r4
-0: mtctr r6 /* ctr = min(len, top - str) */
-1: lbzu r0,1(r7) /* get next byte */
- cmpwi 0,r0,0
- bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
- addi r7,r7,1
- subf r3,r3,r7 /* number of bytes we have looked at */
- beqlr /* return if we found a 0 byte */
- cmpw 0,r3,r4 /* did we look at all len bytes? */
- blt 99f /* if not, must have hit top */
- addi r3,r4,1 /* return len + 1 to indicate no null found */
- blr
-99: li r3,0 /* bad address, return 0 */
- blr
-
- .section __ex_table,"a"
- EXTBL 1b,99b
+#endif
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
new file mode 100644
index 00000000000..7bd9549a90a
--- /dev/null
+++ b/arch/powerpc/lib/string_64.S
@@ -0,0 +1,202 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to: Destination address, in user space.
+ * @n: Number of bytes to zero.
+ *
+ * Zero a block of memory in user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+ .macro err1
+100:
+ .section __ex_table,"a"
+ .align 3
+ .llong 100b,.Ldo_err1
+ .previous
+ .endm
+
+ .macro err2
+200:
+ .section __ex_table,"a"
+ .align 3
+ .llong 200b,.Ldo_err2
+ .previous
+ .endm
+
+ .macro err3
+300:
+ .section __ex_table,"a"
+ .align 3
+ .llong 300b,.Ldo_err3
+ .previous
+ .endm
+
+.Ldo_err1:
+ mr r3,r8
+
+.Ldo_err2:
+ mtctr r4
+1:
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ addi r4,r4,-1
+ bdnz 1b
+
+.Ldo_err3:
+ mr r3,r4
+ blr
+
+_GLOBAL_TOC(__clear_user)
+ cmpdi r4,32
+ neg r6,r3
+ li r0,0
+ blt .Lshort_clear
+ mr r8,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ /* Get the destination 8 byte aligned */
+ bf cr7*4+3,1f
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r4,r4,r6
+
+ cmpdi r4,32
+ cmpdi cr1,r4,512
+ blt .Lshort_clear
+ bgt cr1,.Llong_clear
+
+.Lmedium_clear:
+ srdi r6,r4,5
+ mtctr r6
+
+ /* Do 32 byte chunks */
+4:
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+err2; std r0,16(r3)
+err2; std r0,24(r3)
+ addi r3,r3,32
+ addi r4,r4,-32
+ bdnz 4b
+
+.Lshort_clear:
+ /* up to 31 bytes to go */
+ cmpdi r4,16
+ blt 6f
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+ addi r3,r3,16
+ addi r4,r4,-16
+
+ /* Up to 15 bytes to go */
+6: mr r8,r3
+ clrldi r4,r4,(64-4)
+ mtocrf 0x01,r4
+ bf cr7*4+0,7f
+err1; std r0,0(r3)
+ addi r3,r3,8
+
+7: bf cr7*4+1,8f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+8: bf cr7*4+2,9f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+9: bf cr7*4+3,10f
+err1; stb r0,0(r3)
+
+10: li r3,0
+ blr
+
+.Llong_clear:
+ ld r5,PPC64_CACHES@toc(r2)
+
+ bf cr7*4+0,11f
+err2; std r0,0(r3)
+ addi r3,r3,8
+ addi r4,r4,-8
+
+ /* Destination is 16 byte aligned, need to get it cacheline aligned */
+11: lwz r7,DCACHEL1LOGLINESIZE(r5)
+ lwz r9,DCACHEL1LINESIZE(r5)
+
+ /*
+ * With worst case alignment the long clear loop takes a minimum
+ * of 1 byte less than 2 cachelines.
+ */
+ sldi r10,r9,2
+ cmpd r4,r10
+ blt .Lmedium_clear
+
+ neg r6,r3
+ addi r10,r9,-1
+ and. r5,r6,r10
+ beq 13f
+
+ srdi r6,r5,4
+ mtctr r6
+ mr r8,r3
+12:
+err1; std r0,0(r3)
+err1; std r0,8(r3)
+ addi r3,r3,16
+ bdnz 12b
+
+ sub r4,r4,r5
+
+13: srd r6,r4,r7
+ mtctr r6
+ mr r8,r3
+14:
+err1; dcbz r0,r3
+ add r3,r3,r9
+ bdnz 14b
+
+ and r4,r4,r10
+
+ cmpdi r4,32
+ blt .Lshort_clear
+ b .Lmedium_clear
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
new file mode 100644
index 00000000000..3cf529ceec5
--- /dev/null
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -0,0 +1,74 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
+ * Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+
+int enter_vmx_usercopy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ /* This acts as preempt_disable() as well and will make
+ * enable_kernel_altivec(). We need to disable page faults
+ * as they can call schedule and thus make us lose the VMX
+ * context. So on page faults, we just fail which will cause
+ * a fallback to the normal non-vmx copy.
+ */
+ pagefault_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * This function must return 0 because we tail call optimise when calling
+ * from __copy_tofrom_user_power7 which returns 0 on success.
+ */
+int exit_vmx_usercopy(void)
+{
+ pagefault_enable();
+ return 0;
+}
+
+int enter_vmx_copy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ preempt_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_copy(void *dest)
+{
+ preempt_enable();
+ return dest;
+}
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
new file mode 100644
index 00000000000..e905f7c2ea7
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -0,0 +1,177 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <altivec.h>
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V) \
+ unative_t *V = (unative_t *)V##_in; \
+ unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V) \
+ do { \
+ V##_0 = V[0]; \
+ V##_1 = V[1]; \
+ V##_2 = V[2]; \
+ V##_3 = V[3]; \
+ } while (0)
+
+#define STORE(V) \
+ do { \
+ V[0] = V##_0; \
+ V[1] = V##_1; \
+ V[2] = V##_2; \
+ V[3] = V##_3; \
+ } while (0)
+
+#define XOR(V1, V2) \
+ do { \
+ V1##_0 = vec_xor(V1##_0, V2##_0); \
+ V1##_1 = vec_xor(V1##_1, V2##_1); \
+ V1##_2 = vec_xor(V1##_2, V2##_2); \
+ V1##_3 = vec_xor(V1##_3, V2##_3); \
+ } while (0)
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ XOR(v1, v2);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ XOR(v1, v2);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ DEFINE(v5);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ preempt_disable();
+ enable_kernel_altivec();
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ LOAD(v5);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v5);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ v5 += 4;
+ } while (--lines > 0);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);