diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 10:32:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 10:32:01 -0700 |
commit | 9daeaa370526df1c19eba4780247bb7155541e38 (patch) | |
tree | 5ae2601c26e280e81d753c1fe65453a3b8b1d2a0 /arch/sparc/lib/umul.S | |
parent | cb62ab71fe2b16e8203a0f0a2ef4eda23d761338 (diff) | |
parent | 1edc17832d8f49a0263d364c453ea35da0e4e2a6 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc updates from David Miller:
1) Kill off support for sun4c and Cypress sun4m chips.
And as a result we were able to also kill off that ugly btfixup thing
that required multi-stage links of the final vmlinux image in the
Kbuild system. This should make the kbuild maintainers really happy.
Thanks a lot to Sam Ravnborg for his tireless efforts to get this
going.
2) Convert sparc64 to nobootmem. I suspect now with sparc32 being a lot
cleaner, it should be able to fall in line and modernize in this area
too.
3) Make sparc32 use generic clockevents, from Tkhai Kirill.
[ I fixed up the BPF rules, and tried to clean up the build rules too.
But I don't have - or want - a sparc cross-build environment, so the
BPF rule bug and the related build cleanup was all done with just a
bare "make -n" pseudo-test. - Linus ]
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (110 commits)
sparc32: use flushi when run-time patching in per_cpu_patch
sparc32: fix cpuid_patch run-time patching
sparc32: drop unused inline functions in srmmu.c
sparc32: drop unused functions in pgtsrmmu.h
sparc32,leon: move leon mmu functions to leon_mm.c
sparc32,leon: remove duplicate definitions in leon.h
sparc32,leon: remove duplicate UART register definitions
sparc32,leon: move leon ASI definitions to asi.h
sparc32: move trap table to a separate file
sparc64: renamed ttable.S to ttable_64.S
sparc32: Remove asm/sysen.h header.
sparc32: Delete asm/smpprim.h
sparc32: Remove unused empty_bad_page{,_table} declarations.
sparc32: Kill boot_cpu_id4
sparc32: Move GET_PROCESSOR*_ID() out of asm/asmmacro.h
sparc32: Remove completely unused code from asm/cache.h
sparc32: Add ucmpdi2.o to obj-y instead of lib-y.
sparc32: add ucmpdi2
sparc: introduce arch/sparc/Kbuild
sparc: remove obsolete documentation
...
Diffstat (limited to 'arch/sparc/lib/umul.S')
-rw-r--r-- | arch/sparc/lib/umul.S | 171 |
1 files changed, 0 insertions, 171 deletions
diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S deleted file mode 100644 index 1f36ae68252..00000000000 --- a/arch/sparc/lib/umul.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * umul.S: This routine was taken from glibc-1.09 and is covered - * by the GNU Library General Public License Version 2. - */ - - -/* - * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - * call .umul - * nop - * bnz overflow (or tnz) - */ - - .globl .umul - .globl _Umul -.umul: -_Umul: /* needed for export */ - or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - - /* - * Normally, with the shift-and-add approach, if both numbers are - * positive you get the correct result. With 32-bit two's-complement - * numbers, -x is represented as - * - * x 32 - * ( 2 - ------ ) mod 2 * 2 - * 32 - * 2 - * - * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, - * we can treat this as if the radix point were just to the left - * of the sign bit (multiply by 2^32), and get - * - * -x = (2 - x) mod 2 - * - * Then, ignoring the `mod 2's for convenience: - * - * x * y = xy - * -x * y = 2y - xy - * x * -y = 2x - xy - * -x * -y = 4 - 2x - 2y + xy - * - * For signed multiplies, we subtract (x << 32) from the partial - * product to fix this problem for negative multipliers (see mul.s). - * Because of the way the shift into the partial product is calculated - * (N xor V), this term is automatically removed for the multiplicand, - * so we don't have to adjust. - * - * But for unsigned multiplies, the high order bit wasn't a sign bit, - * and the correction is wrong. So for unsigned multiplies where the - * high order bit is one, we end up with xy - (y << 32). To fix it - * we add y << 32. - */ -#if 0 - tst %o1 - bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) - -1: - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 -#else - /* Faster code from tege@sics.se. */ - sra %o1, 31, %o2 ! make mask from sign bit - and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %o2, %o1 ! add compensation and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above), - * and overflow is impossible (the answer is at most 24 bits long). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the result; %y has - * the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * -----result----- - * - * The 12 bits of %o4 left of the `result' area are all zero; - * in fact, all top 20 bits of %o4 are zero. - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20 - or %o5, %o0, %o0 - retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z - - .globl .umul_patch -.umul_patch: - umul %o0, %o1, %o0 - retl - rd %y, %o1 - nop |