Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/lib
12 files changed, 1872 insertions, 0 deletions
diff --git a/arch/ppc64/lib/Makefile b/arch/ppc64/lib/Makefile
new file mode 100644
index 00000000000..bf7b5bbfc04
--- /dev/null
+++ b/arch/ppc64/lib/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for ppc64-specific library files..
+#
+
+lib-y := checksum.o dec_and_lock.o string.o strcase.o
+lib-y += copypage.o memcpy.o copyuser.o usercopy.o
+
+# Lock primitives are defined as no-ops in include/linux/spinlock.h
+# for non-SMP configs. Don't build the real versions.
+
+lib-$(CONFIG_SMP) += locks.o
+
+# e2a provides EBCDIC to ASCII conversions.
+ifdef CONFIG_PPC_ISERIES
+obj-$(CONFIG_PCI)	+= e2a.o
+endif
+
+lib-$(CONFIG_DEBUG_KERNEL) += sstep.o
diff --git a/arch/ppc64/lib/checksum.S b/arch/ppc64/lib/checksum.S
new file mode 100644
index 00000000000..ef96c6c58ef
--- /dev/null
+++ b/arch/ppc64/lib/checksum.S
@@ -0,0 +1,229 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ *
+ * In practice len == 5, but this is not guaranteed.  So this code does not
+ * attempt to use doubleword instructions.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
+        add     r0,r0,r4
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
+ * No real gain trying to do this specially for 64 bit, but
+ * the 32 bit addition may spill into the upper bits of
+ * the doubleword so we still must fold it down from 64.
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+        rldicl  r4,r0,32,0      /* fold 64 bit value */
+        add     r0,r4,r0
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * This code assumes at least halfword alignment, though the length
+ * can be any number of bytes.  The sum is accumulated in r5.
+ *
+ * csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(csum_partial)
+        subi	r3,r3,8		/* we'll offset by 8 for the loads */
+        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
+        addic   r5,r5,0         /* clear carry */
+        beq	3f              /* if we're doing < 8 bytes */
+        andi.	r0,r3,2         /* aligned on a word boundary already? */
+        beq+	1f
+        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        addc    r5,r5,r6
+        srdi.   r6,r4,3         /* recompute number of doublewords */
+        beq     3f              /* any left? */
+1:      mtctr   r6
+2:      ldu     r6,8(r3)        /* main sum loop */
+        adde    r5,r5,r6
+        bdnz    2b
+        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
+3:	cmpwi	0,r4,4		/* is at least a full word left? */
+	blt	4f
+	lwz	r6,8(r3)	/* sum this word */
+	addi	r3,r3,4
+	subi	r4,r4,4
+	adde	r5,r5,r6
+4:	cmpwi	0,r4,2		/* is at least a halfword left? */
+        blt+	5f
+        lhz     r6,8(r3)        /* sum this halfword */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        adde    r5,r5,r6
+5:	cmpwi	0,r4,1		/* is at least a byte left? */
+        bne+    6f
+        lbz     r6,8(r3)        /* sum this byte */
+        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
+        adde    r5,r5,r6
+6:      addze	r5,r5		/* add in final carry */
+	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
+        add     r3,r4,r5
+        srdi    r3,r3,32
+        blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * This code needs to be reworked to take advantage of 64 bit sum+copy.
+ * However, due to tokenring halfword alignment problems this will be very
+ * tricky.  For now we'll leave it until we instrument it somehow.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
+92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
+	adde	r0,r0,r6
+	bdnz	82b
+	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
+        rldicl  r4,r3,32,0      /* fold 64 bit value */
+        add     r3,r4,r3
+        srdi    r3,r3,32
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+	.globl src_error_1
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+	.globl src_error_2
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+	.globl src_error_3
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+	.globl src_error
+src_error:
+	cmpdi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+	.globl dst_error
+dst_error:
+	cmpdi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.align  3
+	.llong	81b,src_error_1
+	.llong	91b,dst_error
+	.llong	82b,src_error_2
+	.llong	92b,dst_error
+	.llong	83b,src_error_3
+	.llong	93b,dst_error
+	.llong	84b,src_error_3
+	.llong	94b,dst_error
+	.llong	95b,dst_error
+	.llong	96b,dst_error
+	.llong	97b,dst_error
diff --git a/arch/ppc64/lib/copypage.S b/arch/ppc64/lib/copypage.S
new file mode 100644
index 00000000000..733d61618bb
--- /dev/null
+++ b/arch/ppc64/lib/copypage.S
@@ -0,0 +1,121 @@
+/*
+ * arch/ppc64/lib/copypage.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_page)
+	std	r31,-8(1)
+	std	r30,-16(1)
+	std	r29,-24(1)
+	std	r28,-32(1)
+	std	r27,-40(1)
+	std	r26,-48(1)
+	std	r25,-56(1)
+	std	r24,-64(1)
+	std	r23,-72(1)
+	std	r22,-80(1)
+	std	r21,-88(1)
+	std	r20,-96(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r12,5
+0:	addi	r5,r5,-24
+	mtctr	r12
+	ld	r22,640(4)
+	ld	r21,512(4)
+	ld	r20,384(4)
+	ld	r11,256(4)
+	ld	r9,128(4)
+	ld	r7,0(4)
+	ld	r25,648(4)
+	ld	r24,520(4)
+	ld	r23,392(4)
+	ld	r10,264(4)
+	ld	r8,136(4)
+	ldu	r6,8(4)
+	cmpwi	r5,24
+1:	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	ld	r28,648(4)
+	ld	r27,520(4)
+	ld	r26,392(4)
+	ld	r31,264(4)
+	ld	r30,136(4)
+	ld	r29,8(4)
+	std	r25,656(3)
+	std	r24,528(3)
+	std	r23,400(3)
+	std	r10,272(3)
+	std	r8,144(3)
+	std	r6,16(3)
+	ld	r22,656(4)
+	ld	r21,528(4)
+	ld	r20,400(4)
+	ld	r11,272(4)
+	ld	r9,144(4)
+	ld	r7,16(4)
+	std	r28,664(3)
+	std	r27,536(3)
+	std	r26,408(3)
+	std	r31,280(3)
+	std	r30,152(3)
+	stdu	r29,24(3)
+	ld	r25,664(4)
+	ld	r24,536(4)
+	ld	r23,408(4)
+	ld	r10,280(4)
+	ld	r8,152(4)
+	ldu	r6,24(4)
+	bdnz	1b
+	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+	ld	r7,0(4)
+	ld	r8,8(4)
+	ldu	r9,16(4)
+3:	ld	r10,8(4)
+	std	r7,8(3)
+	ld	r7,16(4)
+	std	r8,16(3)
+	ld	r8,24(4)
+	std	r9,24(3)
+	ldu	r9,32(4)
+	stdu	r10,32(3)
+	bdnz	3b
+4:	ld	r10,8(4)
+	std	r7,8(3)
+	std	r8,16(3)
+	std	r9,24(3)
+	std	r10,32(3)
+9:	ld	r20,-96(1)
+	ld	r21,-88(1)
+	ld	r22,-80(1)
+	ld	r23,-72(1)
+	ld	r24,-64(1)
+	ld	r25,-56(1)
+	ld	r26,-48(1)
+	ld	r27,-40(1)
+	ld	r28,-32(1)
+	ld	r29,-24(1)
+	ld	r30,-16(1)
+	ld	r31,-8(1)
+	blr
diff --git a/arch/ppc64/lib/copyuser.S b/arch/ppc64/lib/copyuser.S
new file mode 100644
index 00000000000..a0b3fbbd6fb
--- /dev/null
+++ b/arch/ppc64/lib/copyuser.S
@@ -0,0 +1,576 @@
+/*
+ * arch/ppc64/lib/copyuser.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(__copy_tofrom_user)
+	/* first check for a whole page copy on a page boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page
+	andi.	r6,r6,7
+	mtcrf	0x01,r5
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+20:	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,22f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,72f
+21:	ld	r9,8(r4)
+70:	std	r8,8(r3)
+22:	ldu	r8,16(r4)
+71:	stdu	r9,16(r3)
+	bdnz	21b
+72:	std	r8,8(r3)
+	beq+	3f
+	addi	r3,r3,16
+23:	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+73:	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+74:	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+75:	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+25:	ld	r0,8(r4)
+	sld	r6,r9,r10
+26:	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+27:	ld	r0,8(r4)
+	b	2f
+
+28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+29:	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+30:	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+31:	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+32:	ld	r0,8(r4)
+76:	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+33:	ldu	r9,16(r4)
+	or	r12,r8,r12
+77:	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+78:	std	r12,8(r3)
+	or	r7,r7,r6
+79:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+80:	std	r12,24(r3)
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+34:	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+35:	lbz	r0,0(r4)
+81:	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+36:	lhzx	r0,r7,r4
+82:	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+37:	lwzx	r0,r7,r4
+83:	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+38:	lwz	r0,0(r4)
+39:	lwz	r9,4(r4)
+	addi	r4,r4,8
+84:	stw	r0,0(r3)
+85:	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+40:	lwz	r0,0(r4)
+	addi	r4,r4,4
+86:	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+41:	lhz	r0,0(r4)
+	addi	r4,r4,2
+87:	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+42:	lbz	r0,0(r4)
+88:	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ */
+
+136:
+137:
+	add	r3,r3,r7
+	b	1f
+130:
+131:
+	addi	r3,r3,8
+120:
+122:
+124:
+125:
+126:
+127:
+128:
+129:
+133:
+	addi	r3,r3,8
+121:
+132:
+	addi	r3,r3,8
+123:
+134:
+135:
+138:
+139:
+140:
+141:
+142:
+
+/*
+ * here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination
+ */
+1:	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+89:	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, need to clear ctr bytes starting at r3
+ */
+143:	mfctr	r5
+	li	r0,0
+	mr	r4,r3
+	mr	r3,r5		/* return the number of bytes not copied */
+1:	andi.	r9,r4,7
+	beq	3f
+90:	stb	r0,0(r4)
+	addic.	r5,r5,-1
+	addi	r4,r4,1
+	bne	1b
+	blr
+3:	cmpldi	cr1,r5,8
+	srdi	r9,r5,3
+	andi.	r5,r5,7
+	blt	cr1,93f
+	mtctr	r9
+91:	std	r0,0(r4)
+	addi	r4,r4,8
+	bdnz	91b
+93:	beqlr
+	mtctr	r5	
+92:	stb	r0,0(r4)
+	addi	r4,r4,1
+	bdnz	92b
+	blr
+
+/*
+ * exception handlers for stores: we just need to work
+ * out how many bytes weren't copied
+ */
+182:
+183:
+	add	r3,r3,r7
+	b	1f
+180:
+	addi	r3,r3,8
+171:
+177:
+	addi	r3,r3,8
+170:
+172:
+176:
+178:
+	addi	r3,r3,4
+185:
+	addi	r3,r3,4
+173:
+174:
+175:
+179:
+181:
+184:
+186:
+187:
+188:
+189:	
+1:
+	ld	r6,-24(r1)
+	ld	r5,-8(r1)
+	add	r6,r6,r5
+	subf	r3,r3,r6	/* #bytes not copied */
+190:
+191:
+192:
+	blr			/* #bytes not copied in r3 */
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,120b
+	.llong	21b,121b
+	.llong	70b,170b
+	.llong	22b,122b
+	.llong	71b,171b
+	.llong	72b,172b
+	.llong	23b,123b
+	.llong	73b,173b
+	.llong	74b,174b
+	.llong	75b,175b
+	.llong	24b,124b
+	.llong	25b,125b
+	.llong	26b,126b
+	.llong	27b,127b
+	.llong	28b,128b
+	.llong	29b,129b
+	.llong	30b,130b
+	.llong	31b,131b
+	.llong	32b,132b
+	.llong	76b,176b
+	.llong	33b,133b
+	.llong	77b,177b
+	.llong	78b,178b
+	.llong	79b,179b
+	.llong	80b,180b
+	.llong	34b,134b
+	.llong	35b,135b
+	.llong	81b,181b
+	.llong	36b,136b
+	.llong	82b,182b
+	.llong	37b,137b
+	.llong	83b,183b
+	.llong	38b,138b
+	.llong	39b,139b
+	.llong	84b,184b
+	.llong	85b,185b
+	.llong	40b,140b
+	.llong	86b,186b
+	.llong	41b,141b
+	.llong	87b,187b
+	.llong	42b,142b
+	.llong	88b,188b
+	.llong	43b,143b
+	.llong	89b,189b
+	.llong	90b,190b
+	.llong	91b,191b
+	.llong	92b,192b
+	
+	.text
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label) but it runs slightly
+ * slower on POWER3.
+ */
+.Lcopy_page:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+20:	ld	r22,640(4)
+21:	ld	r21,512(4)
+22:	ld	r20,384(4)
+23:	ld	r11,256(4)
+24:	ld	r9,128(4)
+25:	ld	r7,0(4)
+26:	ld	r25,648(4)
+27:	ld	r24,520(4)
+28:	ld	r23,392(4)
+29:	ld	r10,264(4)
+30:	ld	r8,136(4)
+31:	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+32:	std	r22,648(3)
+33:	std	r21,520(3)
+34:	std	r20,392(3)
+35:	std	r11,264(3)
+36:	std	r9,136(3)
+37:	std	r7,8(3)
+38:	ld	r28,648(4)
+39:	ld	r27,520(4)
+40:	ld	r26,392(4)
+41:	ld	r31,264(4)
+42:	ld	r30,136(4)
+43:	ld	r29,8(4)
+44:	std	r25,656(3)
+45:	std	r24,528(3)
+46:	std	r23,400(3)
+47:	std	r10,272(3)
+48:	std	r8,144(3)
+49:	std	r6,16(3)
+50:	ld	r22,656(4)
+51:	ld	r21,528(4)
+52:	ld	r20,400(4)
+53:	ld	r11,272(4)
+54:	ld	r9,144(4)
+55:	ld	r7,16(4)
+56:	std	r28,664(3)
+57:	std	r27,536(3)
+58:	std	r26,408(3)
+59:	std	r31,280(3)
+60:	std	r30,152(3)
+61:	stdu	r29,24(3)
+62:	ld	r25,664(4)
+63:	ld	r24,536(4)
+64:	ld	r23,408(4)
+65:	ld	r10,280(4)
+66:	ld	r8,152(4)
+67:	ldu	r6,24(4)
+	bdnz	1b
+68:	std	r22,648(3)
+69:	std	r21,520(3)
+70:	std	r20,392(3)
+71:	std	r11,264(3)
+72:	std	r9,136(3)
+73:	std	r7,8(3)
+74:	addi	r4,r4,640
+75:	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+76:	ld	r7,0(4)
+77:	ld	r8,8(4)
+78:	ldu	r9,16(4)
+3:
+79:	ld	r10,8(4)
+80:	std	r7,8(3)
+81:	ld	r7,16(4)
+82:	std	r8,16(3)
+83:	ld	r8,24(4)
+84:	std	r9,24(3)
+85:	ldu	r9,32(4)
+86:	stdu	r10,32(3)
+	bdnz	3b
+4:
+87:	ld	r10,8(4)
+88:	std	r7,8(3)
+89:	std	r8,16(3)
+90:	std	r9,24(3)
+91:	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+100:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,100b
+	.llong	21b,100b
+	.llong	22b,100b
+	.llong	23b,100b
+	.llong	24b,100b
+	.llong	25b,100b
+	.llong	26b,100b
+	.llong	27b,100b
+	.llong	28b,100b
+	.llong	29b,100b
+	.llong	30b,100b
+	.llong	31b,100b
+	.llong	32b,100b
+	.llong	33b,100b
+	.llong	34b,100b
+	.llong	35b,100b
+	.llong	36b,100b
+	.llong	37b,100b
+	.llong	38b,100b
+	.llong	39b,100b
+	.llong	40b,100b
+	.llong	41b,100b
+	.llong	42b,100b
+	.llong	43b,100b
+	.llong	44b,100b
+	.llong	45b,100b
+	.llong	46b,100b
+	.llong	47b,100b
+	.llong	48b,100b
+	.llong	49b,100b
+	.llong	50b,100b
+	.llong	51b,100b
+	.llong	52b,100b
+	.llong	53b,100b
+	.llong	54b,100b
+	.llong	55b,100b
+	.llong	56b,100b
+	.llong	57b,100b
+	.llong	58b,100b
+	.llong	59b,100b
+	.llong	60b,100b
+	.llong	61b,100b
+	.llong	62b,100b
+	.llong	63b,100b
+	.llong	64b,100b
+	.llong	65b,100b
+	.llong	66b,100b
+	.llong	67b,100b
+	.llong	68b,100b
+	.llong	69b,100b
+	.llong	70b,100b
+	.llong	71b,100b
+	.llong	72b,100b
+	.llong	73b,100b
+	.llong	74b,100b
+	.llong	75b,100b
+	.llong	76b,100b
+	.llong	77b,100b
+	.llong	78b,100b
+	.llong	79b,100b
+	.llong	80b,100b
+	.llong	81b,100b
+	.llong	82b,100b
+	.llong	83b,100b
+	.llong	84b,100b
+	.llong	85b,100b
+	.llong	86b,100b
+	.llong	87b,100b
+	.llong	88b,100b
+	.llong	89b,100b
+	.llong	90b,100b
+	.llong	91b,100b
diff --git a/arch/ppc64/lib/dec_and_lock.c b/arch/ppc64/lib/dec_and_lock.c
new file mode 100644
index 00000000000..6e8d8591708
--- /dev/null
+++ b/arch/ppc64/lib/dec_and_lock.c
@@ -0,0 +1,55 @@
+/*
+ * ppc64 version of atomic_dec_and_lock() using cmpxchg
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+
+/*
+ * This is an implementation of the notion of "decrement a
+ * reference count, and return locked if it decremented to zero".
+ *
+ * This implementation can be used on any architecture that
+ * has a cmpxchg, and where atomic->value is an int holding
+ * the value of the atomic (i.e. the high bits aren't used
+ * for a lock or anything like that).
+ *
+ * N.B. ATOMIC_DEC_AND_LOCK gets defined in include/linux/spinlock.h
+ * if spinlocks are empty and thus atomic_dec_and_lock is defined
+ * to be atomic_dec_and_test - in that case we don't need it
+ * defined here as well.
+ */
+
+#ifndef ATOMIC_DEC_AND_LOCK
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	int counter;
+	int newcount;
+
+	for (;;) {
+		counter = atomic_read(atomic);
+		newcount = counter - 1;
+		if (!newcount)
+			break;		/* do it the slow way */
+
+		newcount = cmpxchg(&atomic->counter, counter, newcount);
+		if (newcount == counter)
+			return 0;
+	}
+
+	spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock(lock);
+	return 0;
+}
+
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif /* ATOMIC_DEC_AND_LOCK */
diff --git a/arch/ppc64/lib/e2a.c b/arch/ppc64/lib/e2a.c
new file mode 100644
index 00000000000..d2b83488792
--- /dev/null
+++ b/arch/ppc64/lib/e2a.c
@@ -0,0 +1,108 @@
+/*
+ *  arch/ppc64/lib/e2a.c
+ *
+ *  EBCDIC to ASCII conversion
+ *
+ * This function moved here from arch/ppc64/kernel/viopath.c
+ *
+ * (C) Copyright 2000-2004 IBM Corporation
+ *
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/module.h>
+
+unsigned char e2a(unsigned char x)
+{
+	switch (x) {
+	case 0xF0:
+		return '0';
+	case 0xF1:
+		return '1';
+	case 0xF2:
+		return '2';
+	case 0xF3:
+		return '3';
+	case 0xF4:
+		return '4';
+	case 0xF5:
+		return '5';
+	case 0xF6:
+		return '6';
+	case 0xF7:
+		return '7';
+	case 0xF8:
+		return '8';
+	case 0xF9:
+		return '9';
+	case 0xC1:
+		return 'A';
+	case 0xC2:
+		return 'B';
+	case 0xC3:
+		return 'C';
+	case 0xC4:
+		return 'D';
+	case 0xC5:
+		return 'E';
+	case 0xC6:
+		return 'F';
+	case 0xC7:
+		return 'G';
+	case 0xC8:
+		return 'H';
+	case 0xC9:
+		return 'I';
+	case 0xD1:
+		return 'J';
+	case 0xD2:
+		return 'K';
+	case 0xD3:
+		return 'L';
+	case 0xD4:
+		return 'M';
+	case 0xD5:
+		return 'N';
+	case 0xD6:
+		return 'O';
+	case 0xD7:
+		return 'P';
+	case 0xD8:
+		return 'Q';
+	case 0xD9:
+		return 'R';
+	case 0xE2:
+		return 'S';
+	case 0xE3:
+		return 'T';
+	case 0xE4:
+		return 'U';
+	case 0xE5:
+		return 'V';
+	case 0xE6:
+		return 'W';
+	case 0xE7:
+		return 'X';
+	case 0xE8:
+		return 'Y';
+	case 0xE9:
+		return 'Z';
+	}
+	return ' ';
+}
+EXPORT_SYMBOL(e2a);
+
+
diff --git a/arch/ppc64/lib/locks.c b/arch/ppc64/lib/locks.c
new file mode 100644
index 00000000000..ef70ef91abe
--- /dev/null
+++ b/arch/ppc64/lib/locks.c
@@ -0,0 +1,95 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hvcall.h>
+#include <asm/iSeries/HvCall.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
+
+void __spin_yield(spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+	struct paca_struct *holder_paca;
+
+	lock_value = lock->lock;
+	if (lock_value == 0)
+		return;
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	holder_paca = &paca[holder_cpu];
+	yield_count = holder_paca->lppaca.yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->lock != lock_value)
+		return;		/* something has changed */
+#ifdef CONFIG_PPC_ISERIES
+	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+		((u64)holder_cpu << 32) | yield_count);
+#else
+	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
+			   yield_count);
+#endif
+}
+
+/*
+ * Waiting for a read lock or a write lock on a rwlock...
+ * This turns out to be the same for read and write locks, since
+ * we only know the holder if it is write-locked.
+ */
+void __rw_yield(rwlock_t *rw)
+{
+	int lock_value;
+	unsigned int holder_cpu, yield_count;
+	struct paca_struct *holder_paca;
+
+	lock_value = rw->lock;
+	if (lock_value >= 0)
+		return;		/* no write lock at present */
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	holder_paca = &paca[holder_cpu];
+	yield_count = holder_paca->lppaca.yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+#ifdef CONFIG_PPC_ISERIES
+	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+		((u64)holder_cpu << 32) | yield_count);
+#else
+	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
+			   yield_count);
+#endif
+}
+#endif
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+	while (lock->lock) {
+		HMT_low();
+		if (SHARED_PROCESSOR)
+			__spin_yield(lock);
+	}
+	HMT_medium();
+}
+
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/lib