diff options
Diffstat (limited to 'lib/mpi')
| -rw-r--r-- | lib/mpi/Makefile | 22 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-add1.c | 61 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-lshift.c | 63 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-mul1.c | 57 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-mul2.c | 60 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-mul3.c | 61 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-rshift.c | 63 | ||||
| -rw-r--r-- | lib/mpi/generic_mpih-sub1.c | 60 | ||||
| -rw-r--r-- | lib/mpi/longlong.h | 1379 | ||||
| -rw-r--r-- | lib/mpi/mpi-bit.c | 56 | ||||
| -rw-r--r-- | lib/mpi/mpi-cmp.c | 70 | ||||
| -rw-r--r-- | lib/mpi/mpi-inline.h | 122 | ||||
| -rw-r--r-- | lib/mpi/mpi-internal.h | 257 | ||||
| -rw-r--r-- | lib/mpi/mpi-pow.c | 323 | ||||
| -rw-r--r-- | lib/mpi/mpicoder.c | 260 | ||||
| -rw-r--r-- | lib/mpi/mpih-cmp.c | 56 | ||||
| -rw-r--r-- | lib/mpi/mpih-div.c | 236 | ||||
| -rw-r--r-- | lib/mpi/mpih-mul.c | 497 | ||||
| -rw-r--r-- | lib/mpi/mpiutil.c | 126 | 
19 files changed, 3829 insertions, 0 deletions
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile new file mode 100644 index 00000000000..019a68c9014 --- /dev/null +++ b/lib/mpi/Makefile @@ -0,0 +1,22 @@ +# +# MPI multiprecision maths library (from gpg) +# + +obj-$(CONFIG_MPILIB) = mpi.o + +mpi-y = \ +	generic_mpih-lshift.o		\ +	generic_mpih-mul1.o		\ +	generic_mpih-mul2.o		\ +	generic_mpih-mul3.o		\ +	generic_mpih-rshift.o		\ +	generic_mpih-sub1.o		\ +	generic_mpih-add1.o		\ +	mpicoder.o			\ +	mpi-bit.o			\ +	mpi-cmp.o			\ +	mpih-cmp.o			\ +	mpih-div.o			\ +	mpih-mul.o			\ +	mpi-pow.o			\ +	mpiutil.o diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c new file mode 100644 index 00000000000..c94c7dd344b --- /dev/null +++ b/lib/mpi/generic_mpih-add1.c @@ -0,0 +1,61 @@ +/* mpihelp-add_1.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, + *               2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +	      mpi_ptr_t s2_ptr, mpi_size_t size) +{ +	mpi_limb_t x, y, cy; +	mpi_size_t j; + +	/* The loop counter and index J goes from -SIZE to -1.  This way +	   the loop becomes faster.  */ +	j = -size; + +	/* Offset the base pointers to compensate for the negative indices. */ +	s1_ptr -= j; +	s2_ptr -= j; +	res_ptr -= j; + +	cy = 0; +	do { +		y = s2_ptr[j]; +		x = s1_ptr[j]; +		y += cy;	/* add previous carry to one addend */ +		cy = y < cy;	/* get out carry from that addition */ +		y += x;		/* add other addend */ +		cy += y < x;	/* get out carry from that add, combine */ +		res_ptr[j] = y; +	} while (++j); + +	return cy; +} diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c new file mode 100644 index 00000000000..86318927231 --- /dev/null +++ b/lib/mpi/generic_mpih-lshift.c @@ -0,0 +1,63 @@ +/* mpihelp-lshift.c  -	MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) +{ +	mpi_limb_t high_limb, low_limb; +	unsigned sh_1, sh_2; +	mpi_size_t i; +	mpi_limb_t retval; + +	sh_1 = cnt; +	wp += 1; +	sh_2 = BITS_PER_MPI_LIMB - sh_1; +	i = usize - 1; +	low_limb = up[i]; +	retval = low_limb >> sh_2; +	high_limb = low_limb; +	while (--i >= 0) { +		low_limb = up[i]; +		wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); +		high_limb = low_limb; +	} +	wp[i] = high_limb << sh_1; + +	return retval; +} diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c new file mode 100644 index 00000000000..1668dfd9092 --- /dev/null +++ b/lib/mpi/generic_mpih-mul1.c @@ -0,0 +1,57 @@ +/* mpihelp-mul_1.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +	      mpi_limb_t s2_limb) +{ +	mpi_limb_t cy_limb; +	mpi_size_t j; +	mpi_limb_t prod_high, prod_low; + +	/* The loop counter and index J goes from -S1_SIZE to -1.  This way +	 * the loop becomes faster.  */ +	j = -s1_size; + +	/* Offset the base pointers to compensate for the negative indices.  */ +	s1_ptr -= j; +	res_ptr -= j; + +	cy_limb = 0; +	do { +		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); +		prod_low += cy_limb; +		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; +		res_ptr[j] = prod_low; +	} while (++j); + +	return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c new file mode 100644 index 00000000000..8a7b29ee174 --- /dev/null +++ b/lib/mpi/generic_mpih-mul2.c @@ -0,0 +1,60 @@ +/* mpihelp-mul_2.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +		 mpi_size_t s1_size, mpi_limb_t s2_limb) +{ +	mpi_limb_t cy_limb; +	mpi_size_t j; +	mpi_limb_t prod_high, prod_low; +	mpi_limb_t x; + +	/* The loop counter and index J goes from -SIZE to -1.  This way +	 * the loop becomes faster.  */ +	j = -s1_size; +	res_ptr -= j; +	s1_ptr -= j; + +	cy_limb = 0; +	do { +		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + +		prod_low += cy_limb; +		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + +		x = res_ptr[j]; +		prod_low = x + prod_low; +		cy_limb += prod_low < x ? 1 : 0; +		res_ptr[j] = prod_low; +	} while (++j); +	return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c new file mode 100644 index 00000000000..f96df327be6 --- /dev/null +++ b/lib/mpi/generic_mpih-mul3.c @@ -0,0 +1,61 @@ +/* mpihelp-mul_3.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +		 mpi_size_t s1_size, mpi_limb_t s2_limb) +{ +	mpi_limb_t cy_limb; +	mpi_size_t j; +	mpi_limb_t prod_high, prod_low; +	mpi_limb_t x; + +	/* The loop counter and index J goes from -SIZE to -1.  This way +	 * the loop becomes faster.  */ +	j = -s1_size; +	res_ptr -= j; +	s1_ptr -= j; + +	cy_limb = 0; +	do { +		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + +		prod_low += cy_limb; +		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + +		x = res_ptr[j]; +		prod_low = x - prod_low; +		cy_limb += prod_low > x ? 1 : 0; +		res_ptr[j] = prod_low; +	} while (++j); + +	return cy_limb; +} diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c new file mode 100644 index 00000000000..ffa328818ca --- /dev/null +++ b/lib/mpi/generic_mpih-rshift.c @@ -0,0 +1,63 @@ +/* mpih-rshift.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + *               2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GNUPG + * + * GNUPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNUPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ +	mpi_limb_t high_limb, low_limb; +	unsigned sh_1, sh_2; +	mpi_size_t i; +	mpi_limb_t retval; + +	sh_1 = cnt; +	wp -= 1; +	sh_2 = BITS_PER_MPI_LIMB - sh_1; +	high_limb = up[0]; +	retval = high_limb << sh_2; +	low_limb = high_limb; +	for (i = 1; i < usize; i++) { +		high_limb = up[i]; +		wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); +		low_limb = high_limb; +	} +	wp[i] = low_limb >> sh_1; + +	return retval; +} diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c new file mode 100644 index 00000000000..5d98ab7d685 --- /dev/null +++ b/lib/mpi/generic_mpih-sub1.c @@ -0,0 +1,60 @@ +/* mpihelp-add_2.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +	      mpi_ptr_t s2_ptr, mpi_size_t size) +{ +	mpi_limb_t x, y, cy; +	mpi_size_t j; + +	/* The loop counter and index J goes from -SIZE to -1.  This way +	   the loop becomes faster.  */ +	j = -size; + +	/* Offset the base pointers to compensate for the negative indices.  */ +	s1_ptr -= j; +	s2_ptr -= j; +	res_ptr -= j; + +	cy = 0; +	do { +		y = s2_ptr[j]; +		x = s1_ptr[j]; +		y += cy;	/* add previous carry to subtrahend */ +		cy = y < cy;	/* get out carry from that addition */ +		y = x - y;	/* main subtract */ +		cy += y > x;	/* get out carry from the subtract, combine */ +		res_ptr[j] = y; +	} while (++j); + +	return cy; +} diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h new file mode 100644 index 00000000000..aac511417ad --- /dev/null +++ b/lib/mpi/longlong.h @@ -0,0 +1,1379 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + * Note: I added some stuff for use with gnupg + * + * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, + *	2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this file; see the file COPYING.LIB.  If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. */ + +#include <asm-generic/bitops/count_zeros.h> + +/* You have to define the following before including this file: + * + * UWtype -- An unsigned type, default type for operations (typically a "word") + * UHWtype -- An unsigned type, at least half the size of UWtype. + * UDWtype -- An unsigned type, at least twice as large a UWtype + * W_TYPE_SIZE -- size in bits of UWtype + * + * SItype, USItype -- Signed and unsigned 32 bit types. + * DItype, UDItype -- Signed and unsigned 64 bit types. + * + * On a 32 bit machine UWtype should typically be USItype; + * on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries +	that use this file takes place.  */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + * + * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + * word product in HIGH_PROD and LOW_PROD. + * + * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + * UDWtype product.  This is just a variant of umul_ppmm. + + * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator) divides a UDWtype, composed by the UWtype integers + * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less + * than DENOMINATOR for correct operation.  If, in addition, the most + * significant bit of DENOMINATOR must be 1, then the pre-processor symbol + * UDIV_NEEDS_NORMALIZATION is defined to 1. + * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient + * is rounded towards 0. + * + * 5) count_leading_zeros(count, x) counts the number of zero-bits from the + * msb to the first non-zero bit in the UWtype X.  This is the number of + * steps X needs to be shifted left to set the msb.  Undefined for X == 0, + * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + * + * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + * from the least significant end. + * + * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + * high_addend_2, low_addend_2) adds two UWtype integers, composed by + * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow + * (i.e. carry out) is not stored anywhere, and is lost. + * + * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE + * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere, + * and is lost. + * + * If any of these macros are left undefined for a particular CPU, + * C macros are used.  */ + +/* The CPUs come in alphabetical order below. + * + * Please add support for more CPUs here, or improve the current support + * for the CPUs below!	*/ + +#if defined(__GNUC__) && !defined(NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be +	understood by gcc1.	Use cpp to avoid major code duplication.  */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +/*************************************** +	**************  A29K  ***************** +	***************************************/ +#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("add %1,%4,%5\n" \ +		"addc %0,%2,%3" \ +	: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +	: "%r" ((USItype)(ah)), \ +		"rI" ((USItype)(bh)), \ +		"%r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("sub %1,%4,%5\n" \ +		"subc %0,%2,%3" \ +	: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +	: "r" ((USItype)(ah)), \ +		"rI" ((USItype)(bh)), \ +		"r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ +		USItype __m0 = (m0), __m1 = (m1); \ +		__asm__ ("multiplu %0,%1,%2" \ +		: "=r" ((USItype)(xl)) \ +		: "r" (__m0), \ +			"r" (__m1)); \ +		__asm__ ("multmu %0,%1,%2" \ +		: "=r" ((USItype)(xh)) \ +		: "r" (__m0), \ +			"r" (__m1)); \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ +	__asm__ ("dividu %0,%3,%4" \ +	: "=r" ((USItype)(q)), \ +		"=q" ((USItype)(r)) \ +	: "1" ((USItype)(n1)), \ +		"r" ((USItype)(n0)), \ +		"r" ((USItype)(d))) +#endif /* __a29k__ */ + +#if defined(__alpha) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, m0, m1)			\ +do {							\ +	UDItype __m0 = (m0), __m1 = (m1);		\ +	(ph) = __builtin_alpha_umulh(__m0, __m1);	\ +	(pl) = __m0 * __m1;                             \ +} while (0) +#define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { UDItype __r; \ +	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ +	(r) = __r; \ +} while (0) +extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha */ + +/*************************************** +	**************  ARM  ****************** +	***************************************/ +#if defined(__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("adds %1, %4, %5\n" \ +		"adc  %0, %2, %3" \ +	: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +	: "%r" ((USItype)(ah)), \ +		"rI" ((USItype)(bh)), \ +		"%r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subs %1, %4, %5\n" \ +		"sbc  %0, %2, %3" \ +	: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +	: "r" ((USItype)(ah)), \ +		"rI" ((USItype)(bh)), \ +		"r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))) +#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ +#define umul_ppmm(xh, xl, a, b) \ +	__asm__ ("%@ Inlined umul_ppmm\n" \ +		"mov	%|r0, %2, lsr #16		@ AAAA\n" \ +		"mov	%|r2, %3, lsr #16		@ BBBB\n" \ +		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \ +		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \ +		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \ +		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \ +		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \ +		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \ +		"adds	%|r0, %1, %0			@ central sum\n" \ +		"addcs	%|r2, %|r2, #65536\n" \ +		"adds	%1, %|r1, %|r0, lsl #16\n" \ +		"adc	%0, %|r2, %|r0, lsr #16" \ +	: "=&r" ((USItype)(xh)), \ +		"=r" ((USItype)(xl)) \ +	: "r" ((USItype)(a)), \ +		"r" ((USItype)(b)) \ +	: "r0", "r1", "r2") +#else +#define umul_ppmm(xh, xl, a, b) \ +	__asm__ ("%@ Inlined umul_ppmm\n" \ +		"umull %r1, %r0, %r2, %r3" \ +	: "=&r" ((USItype)(xh)), \ +			"=r" ((USItype)(xl)) \ +	: "r" ((USItype)(a)), \ +			"r" ((USItype)(b)) \ +	: "r0", "r1") +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +/*************************************** +	**************  CLIPPER  ************** +	***************************************/ +#if defined(__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ +	({union {UDItype __ll; \ +		struct {USItype __l, __h; } __i; \ +	} __xx; \ +	__asm__ ("mulwux %2,%0" \ +	: "=r" (__xx.__ll) \ +	: "%0" ((USItype)(u)), \ +		"r" ((USItype)(v))); \ +	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define smul_ppmm(w1, w0, u, v) \ +	({union {DItype __ll; \ +		struct {SItype __l, __h; } __i; \ +	} __xx; \ +	__asm__ ("mulwx %2,%0" \ +	: "=r" (__xx.__ll) \ +	: "%0" ((SItype)(u)), \ +		"r" ((SItype)(v))); \ +	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ +	({UDItype __w; \ +	__asm__ ("mulwux %2,%0" \ +	: "=r" (__w) \ +	: "%0" ((USItype)(u)), \ +		"r" ((USItype)(v))); \ +	__w; }) +#endif /* __clipper__ */ + +/*************************************** +	**************  GMICRO  *************** +	***************************************/ +#if defined(__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("add.w %5,%1\n" \ +		"addx %3,%0" \ +	: "=g" ((USItype)(sh)), \ +		"=&g" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +		"g" ((USItype)(bh)), \ +		"%1" ((USItype)(al)), \ +		"g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("sub.w %5,%1\n" \ +		"subx %3,%0" \ +	: "=g" ((USItype)(sh)), \ +		"=&g" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +		"g" ((USItype)(bh)), \ +		"1" ((USItype)(al)), \ +		"g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ +	__asm__ ("mulx %3,%0,%1" \ +	: "=g" ((USItype)(ph)), \ +		"=r" ((USItype)(pl)) \ +	: "%0" ((USItype)(m0)), \ +		"g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ +	__asm__ ("divx %4,%0,%1" \ +	: "=g" ((USItype)(q)), \ +		"=r" ((USItype)(r)) \ +	: "1" ((USItype)(nh)), \ +		"0" ((USItype)(nl)), \ +		"g" ((USItype)(d))) +#endif + +/*************************************** +	**************  HPPA  ***************** +	***************************************/ +#if defined(__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("add %4,%5,%1\n" \ +		   "addc %2,%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "%rM" ((USItype)(ah)), \ +	     "rM" ((USItype)(bh)), \ +	     "%rM" ((USItype)(al)), \ +	     "rM" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("sub %4,%5,%1\n" \ +	   "subb %2,%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "rM" ((USItype)(ah)), \ +	     "rM" ((USItype)(bh)), \ +	     "rM" ((USItype)(al)), \ +	     "rM" ((USItype)(bl))) +#if 0 && defined(_PA_RISC1_1) +/* xmpyu uses floating point register which is not allowed in Linux kernel. */ +#define umul_ppmm(wh, wl, u, v) \ +do { \ +	union {UDItype __ll; \ +	struct {USItype __h, __l; } __i; \ +	} __xx; \ +	__asm__ ("xmpyu %1,%2,%0" \ +	: "=*f" (__xx.__ll) \ +	: "*f" ((USItype)(u)), \ +	       "*f" ((USItype)(v))); \ +	(wh) = __xx.__i.__h; \ +	(wl) = __xx.__i.__l; \ +} while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#if 0 /* #ifndef LONGLONG_STANDALONE */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { USItype __r; \ +	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ +	(r) = __r; \ +} while (0) +extern USItype __udiv_qrnnd(); +#endif /* LONGLONG_STANDALONE */ +#endif /* hppa */ + +/*************************************** +	**************  I370  ***************** +	***************************************/ +#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ +	union {UDItype __ll; \ +	   struct {USItype __h, __l; } __i; \ +	} __xx; \ +	USItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ("mr %0,%3" \ +	: "=r" (__xx.__i.__h), \ +	       "=r" (__xx.__i.__l) \ +	: "%1" (__m0), \ +	       "r" (__m1)); \ +	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +	(xh) += ((((SItype) __m0 >> 31) & __m1) \ +	     + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define smul_ppmm(xh, xl, m0, m1) \ +do { \ +	union {DItype __ll; \ +	   struct {USItype __h, __l; } __i; \ +	} __xx; \ +	__asm__ ("mr %0,%3" \ +	: "=r" (__xx.__i.__h), \ +	       "=r" (__xx.__i.__l) \ +	: "%1" (m0), \ +	       "r" (m1)); \ +	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ +	union {DItype __ll; \ +	   struct {USItype __h, __l; } __i; \ +	} __xx; \ +	__xx.__i.__h = n1; __xx.__i.__l = n0; \ +	__asm__ ("dr %0,%2" \ +	: "=r" (__xx.__ll) \ +	: "0" (__xx.__ll), "r" (d)); \ +	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \ +} while (0) +#endif + +/*************************************** +	**************  I386  ***************** +	***************************************/ +#undef __i386__ +#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("addl %5,%1\n" \ +	   "adcl %3,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +	     "g" ((USItype)(bh)), \ +	     "%1" ((USItype)(al)), \ +	     "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subl %5,%1\n" \ +	   "sbbl %3,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +	     "g" ((USItype)(bh)), \ +	     "1" ((USItype)(al)), \ +	     "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("mull %3" \ +	: "=a" ((USItype)(w0)), \ +	     "=d" ((USItype)(w1)) \ +	: "%0" ((USItype)(u)), \ +	     "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ +	__asm__ ("divl %4" \ +	: "=a" ((USItype)(q)), \ +	     "=d" ((USItype)(r)) \ +	: "0" ((USItype)(n0)), \ +	     "1" ((USItype)(n1)), \ +	     "rm" ((USItype)(d))) +#ifndef UMUL_TIME +#define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +/*************************************** +	**************  I860  ***************** +	***************************************/ +#if defined(__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r, h, l, c) \ +	__asm__ ("shr %3,r0,r0\n" \ +	"shrd %1,%2,%0" \ +	   "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +/*************************************** +	**************  I960  ***************** +	***************************************/ +#if defined(__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("cmpo 1,0\n" \ +	"addc %5,%4,%1\n" \ +	"addc %3,%2,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "%dI" ((USItype)(ah)), \ +	     "dI" ((USItype)(bh)), \ +	     "%dI" ((USItype)(al)), \ +	     "dI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("cmpo 0,0\n" \ +	"subc %5,%4,%1\n" \ +	"subc %3,%2,%0" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "dI" ((USItype)(ah)), \ +	     "dI" ((USItype)(bh)), \ +	     "dI" ((USItype)(al)), \ +	     "dI" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ +	({union {UDItype __ll; \ +	   struct {USItype __l, __h; } __i; \ +	} __xx; \ +	__asm__ ("emul        %2,%1,%0" \ +	: "=d" (__xx.__ll) \ +	: "%dI" ((USItype)(u)), \ +	     "dI" ((USItype)(v))); \ +	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ +	({UDItype __w; \ +	__asm__ ("emul      %2,%1,%0" \ +	: "=d" (__w) \ +	: "%dI" ((USItype)(u)), \ +	       "dI" ((USItype)(v))); \ +	__w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ +	union {UDItype __ll; \ +	   struct {USItype __l, __h; } __i; \ +	} __nn; \ +	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \ +	__asm__ ("ediv %d,%n,%0" \ +	: "=d" (__rq.__ll) \ +	: "dI" (__nn.__ll), \ +	     "dI" ((USItype)(d))); \ +	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \ +} while (0) +#if defined(__i960mx)		/* what is the proper symbol to test??? */ +#define rshift_rhlc(r, h, l, c) \ +do { \ +	union {UDItype __ll; \ +	   struct {USItype __l, __h; } __i; \ +	} __nn; \ +	__nn.__i.__h = (h); __nn.__i.__l = (l); \ +	__asm__ ("shre %2,%1,%0" \ +	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ +} +#endif /* i960mx */ +#endif /* i960 */ + +/*************************************** +	**************  68000	**************** +	***************************************/ +#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("add%.l %5,%1\n" \ +	   "addx%.l %3,%0" \ +	: "=d" ((USItype)(sh)), \ +	     "=&d" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +	     "d" ((USItype)(bh)), \ +	     "%1" ((USItype)(al)), \ +	     "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("sub%.l %5,%1\n" \ +	   "subx%.l %3,%0" \ +	: "=d" ((USItype)(sh)), \ +	     "=&d" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +	     "d" ((USItype)(bh)), \ +	     "1" ((USItype)(al)), \ +	     "g" ((USItype)(bl))) +#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("mulu%.l %3,%1:%0" \ +	: "=d" ((USItype)(w0)), \ +	     "=d" ((USItype)(w1)) \ +	: "%0" ((USItype)(u)), \ +	     "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ +	__asm__ ("divu%.l %4,%1:%0" \ +	: "=d" ((USItype)(q)), \ +	     "=d" ((USItype)(r)) \ +	: "0" ((USItype)(n0)), \ +	     "1" ((USItype)(n1)), \ +	     "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ +	__asm__ ("divs%.l %4,%1:%0" \ +	: "=d" ((USItype)(q)), \ +	     "=d" ((USItype)(r)) \ +	: "0" ((USItype)(n0)), \ +	     "1" ((USItype)(n1)), \ +	     "dmi" ((USItype)(d))) +#else /* not mc68020 */ +#define umul_ppmm(xh, xl, a, b) \ +do { USItype __umul_tmp1, __umul_tmp2; \ +	__asm__ ("| Inlined umul_ppmm\n" \ +	"move%.l %5,%3\n" \ +	"move%.l %2,%0\n" \ +	"move%.w %3,%1\n" \ +	"swap	%3\n" \ +	"swap	%0\n" \ +	"mulu	%2,%1\n" \ +	"mulu	%3,%0\n" \ +	"mulu	%2,%3\n" \ +	"swap	%2\n" \ +	"mulu	%5,%2\n" \ +	"add%.l	%3,%2\n" \ +	"jcc	1f\n" \ +	"add%.l	%#0x10000,%0\n" \ +	"1:	move%.l %2,%3\n" \ +	"clr%.w	%2\n" \ +	"swap	%2\n" \ +	"swap	%3\n" \ +	"clr%.w	%3\n" \ +	"add%.l	%3,%1\n" \ +	"addx%.l %2,%0\n" \ +	"| End inlined umul_ppmm" \ +	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ +		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ +	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \ +} while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +#endif /* mc68000 */ + +/*************************************** +	**************  88000	**************** +	***************************************/ +#if defined(__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("addu.co %1,%r4,%r5\n" \ +	   "addu.ci %0,%r2,%r3" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "%rJ" ((USItype)(ah)), \ +	     "rJ" ((USItype)(bh)), \ +	     "%rJ" ((USItype)(al)), \ +	     "rJ" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subu.co %1,%r4,%r5\n" \ +	   "subu.ci %0,%r2,%r3" \ +	: "=r" ((USItype)(sh)), \ +	     "=&r" ((USItype)(sl)) \ +	: "rJ" ((USItype)(ah)), \ +	     "rJ" ((USItype)(bh)), \ +	     "rJ" ((USItype)(al)), \ +	     "rJ" ((USItype)(bl))) +#if defined(__m88110__) +#define umul_ppmm(wh, wl, u, v) \ +do { \ +	union {UDItype __ll; \ +	   struct {USItype __h, __l; } __i; \ +	} __x; \ +	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ +	(wh) = __x.__i.__h; \ +	(wl) = __x.__i.__l; \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ +	({union {UDItype __ll; \ +	   struct {USItype __h, __l; } __i; \ +	} __x, __q; \ +	__x.__i.__h = (n1); __x.__i.__l = (n0); \ +	__asm__ ("divu.d %0,%1,%2" \ +	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ +	(r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +/*************************************** +	**************  MIPS  ***************** +	***************************************/ +#if defined(__mips__) && W_TYPE_SIZE == 32 +#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#define umul_ppmm(w1, w0, u, v)			\ +do {						\ +	UDItype __ll = (UDItype)(u) * (v);	\ +	w1 = __ll >> 32;			\ +	w0 = __ll;				\ +} while (0) +#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("multu %2,%3" \ +	: "=l" ((USItype)(w0)), \ +	     "=h" ((USItype)(w1)) \ +	: "d" ((USItype)(u)), \ +	     "d" ((USItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("multu %2,%3\n" \ +	   "mflo %0\n" \ +	   "mfhi %1" \ +	: "=d" ((USItype)(w0)), \ +	     "=d" ((USItype)(w1)) \ +	: "d" ((USItype)(u)), \ +	     "d" ((USItype)(v))) +#endif +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips__ */ + +/*************************************** +	**************  MIPS/64  ************** +	***************************************/ +#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 +#define umul_ppmm(w1, w0, u, v) \ +do {									\ +	typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\ +	__ll_UTItype __ll = (__ll_UTItype)(u) * (v);			\ +	w1 = __ll >> 64;						\ +	w0 = __ll;							\ +} while (0) +#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("dmultu %2,%3" \ +	: "=l" ((UDItype)(w0)), \ +	     "=h" ((UDItype)(w1)) \ +	: "d" ((UDItype)(u)), \ +	     "d" ((UDItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("dmultu %2,%3\n" \ +	   "mflo %0\n" \ +	   "mfhi %1" \ +	: "=d" ((UDItype)(w0)), \ +	     "=d" ((UDItype)(w1)) \ +	: "d" ((UDItype)(u)), \ +	     "d" ((UDItype)(v))) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips__ */ + +/*************************************** +	**************  32000	**************** +	***************************************/ +#if defined(__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ +	({union {UDItype __ll; \ +	   struct {USItype __l, __h; } __i; \ +	} __xx; \ +	__asm__ ("meid %2,%0" \ +	: "=g" (__xx.__ll) \ +	: "%0" ((USItype)(u)), \ +	     "g" ((USItype)(v))); \ +	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ +	({UDItype __w; \ +	__asm__ ("meid %2,%0" \ +	: "=g" (__w) \ +	: "%0" ((USItype)(u)), \ +	       "g" ((USItype)(v))); \ +	__w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ +	({union {UDItype __ll; \ +	   struct {USItype __l, __h; } __i; \ +	} __xx; \ +	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \ +	__asm__ ("deid %2,%0" \ +	: "=g" (__xx.__ll) \ +	: "0" (__xx.__ll), \ +	     "g" ((USItype)(d))); \ +	(r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +#endif /* __ns32000__ */ + +/*************************************** +	**************  PPC  ****************** +	***************************************/ +#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ +	if (__builtin_constant_p(bh) && (bh) == 0) \ +		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "%r" ((USItype)(ah)), \ +		"%r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))); \ +	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ +		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "%r" ((USItype)(ah)), \ +		"%r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))); \ +	else \ +		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "%r" ((USItype)(ah)), \ +		"r" ((USItype)(bh)), \ +		"%r" ((USItype)(al)), \ +		"rI" ((USItype)(bl))); \ +} while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ +	if (__builtin_constant_p(ah) && (ah) == 0) \ +		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "r" ((USItype)(bh)), \ +		"rI" ((USItype)(al)), \ +		"r" ((USItype)(bl))); \ +	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ +		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "r" ((USItype)(bh)), \ +		"rI" ((USItype)(al)), \ +		"r" ((USItype)(bl))); \ +	else if (__builtin_constant_p(bh) && (bh) == 0) \ +		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "r" ((USItype)(ah)), \ +		"rI" ((USItype)(al)), \ +		"r" ((USItype)(bl))); \ +	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ +		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "r" ((USItype)(ah)), \ +		"rI" ((USItype)(al)), \ +		"r" ((USItype)(bl))); \ +	else \ +		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ +		: "=r" ((USItype)(sh)), \ +		"=&r" ((USItype)(sl)) \ +		: "r" ((USItype)(ah)), \ +		"r" ((USItype)(bh)), \ +		"rI" ((USItype)(al)), \ +		"r" ((USItype)(bl))); \ +} while (0) +#if defined(_ARCH_PPC) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ +	USItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ("mulhwu %0,%1,%2" \ +	: "=r" ((USItype) ph) \ +	: "%r" (__m0), \ +	"r" (__m1)); \ +	(pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ +do { \ +	SItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ("mulhw %0,%1,%2" \ +	: "=r" ((SItype) ph) \ +	: "%r" (__m0), \ +	"r" (__m1)); \ +	(pl) = __m0 * __m1; \ +} while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ +	USItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ("mul %0,%2,%3" \ +	: "=r" ((USItype)(xh)), \ +	"=q" ((USItype)(xl)) \ +	: "r" (__m0), \ +	"r" (__m1)); \ +	(xh) += ((((SItype) __m0 >> 31) & __m1) \ +	+ (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ +	__asm__ ("mul %0,%2,%3" \ +	: "=r" ((SItype)(xh)), \ +	"=q" ((SItype)(xl)) \ +	: "r" (m0), \ +	"r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ +	__asm__ ("div %0,%2,%4" \ +	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ +	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +#define UDIV_TIME 100 +#endif +#endif /* Power architecture variants.  */ + +/*************************************** +	**************  PYR  ****************** +	***************************************/ +#if defined(__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("addw        %5,%1\n" \ +	"addwc	%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +	"g" ((USItype)(bh)), \ +	"%1" ((USItype)(al)), \ +	"g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subw        %5,%1\n" \ +	"subwb	%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +	"g" ((USItype)(bh)), \ +	"1" ((USItype)(al)), \ +	"g" ((USItype)(bl))) +	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */ +#define umul_ppmm(w1, w0, u, v) \ +	({union {UDItype __ll; \ +	struct {USItype __h, __l; } __i; \ +	} __xx; \ +	__asm__ ("movw %1,%R0\n" \ +	"uemul %2,%0" \ +	: "=&r" (__xx.__ll) \ +	: "g" ((USItype) (u)), \ +	"g" ((USItype)(v))); \ +	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#endif /* __pyr__ */ + +/*************************************** +	**************  RT/ROMP  ************** +	***************************************/ +#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("a %1,%5\n" \ +	"ae %0,%3" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +	"r" ((USItype)(bh)), \ +	"%1" ((USItype)(al)), \ +	"r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("s %1,%5\n" \ +	"se %0,%3" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +	"r" ((USItype)(bh)), \ +	"1" ((USItype)(al)), \ +	"r" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ +	USItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ( \ +	"s       r2,r2\n" \ +	"mts	r10,%2\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"m	r2,%3\n" \ +	"cas	%0,r2,r0\n" \ +	"mfs	r10,%1" \ +	: "=r" ((USItype)(ph)), \ +	"=r" ((USItype)(pl)) \ +	: "%r" (__m0), \ +	"r" (__m1) \ +	: "r2"); \ +	(ph) += ((((SItype) __m0 >> 31) & __m1) \ +	+ (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#endif /* RT/ROMP */ + +/*************************************** +	**************  SH2  ****************** +	***************************************/ +#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ +	&& W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ( \ +	"dmulu.l %2,%3\n" \ +	"sts	macl,%1\n" \ +	"sts	mach,%0" \ +	: "=r" ((USItype)(w1)), \ +	"=r" ((USItype)(w0)) \ +	: "r" ((USItype)(u)), \ +	"r" ((USItype)(v)) \ +	: "macl", "mach") +#define UMUL_TIME 5 +#endif + +/*************************************** +	**************  SPARC	**************** +	***************************************/ +#if defined(__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("addcc %r4,%5,%1\n" \ +	"addx %r2,%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "%rJ" ((USItype)(ah)), \ +	"rI" ((USItype)(bh)), \ +	"%rJ" ((USItype)(al)), \ +	"rI" ((USItype)(bl)) \ +	__CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subcc %r4,%5,%1\n" \ +	"subx %r2,%3,%0" \ +	: "=r" ((USItype)(sh)), \ +	"=&r" ((USItype)(sl)) \ +	: "rJ" ((USItype)(ah)), \ +	"rI" ((USItype)(bh)), \ +	"rJ" ((USItype)(al)), \ +	"rI" ((USItype)(bl)) \ +	__CLOBBER_CC) +#if defined(__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, +	2) the 'I' flag thinks of the range as a 13 bit signed interval, +	while we want to match a 13 bit interval, sign extended to 32 bits, +	but INTERPRETED AS UNSIGNED.  */ +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("umul %2,%3,%1;rd %%y,%0" \ +	: "=r" ((USItype)(w1)), \ +	"=r" ((USItype)(w0)) \ +	: "r" ((USItype)(u)), \ +	"r" ((USItype)(v))) +#define UMUL_TIME 5 +#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ +	USItype __q; \ +	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ +	: "=r" ((USItype)(__q)) \ +	: "r" ((USItype)(n1)), \ +	"r" ((USItype)(n0)), \ +	"r" ((USItype)(d))); \ +	(r) = (n0) - __q * (d); \ +	(q) = __q; \ +} while (0) +#define UDIV_TIME 25 +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined(__sparclite__) +/* This has hardware multiply but not divide.  It also has two additional +	instructions scan (ffs from high bit) and divscc.  */ +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("umul %2,%3,%1;rd %%y,%0" \ +	: "=r" ((USItype)(w1)), \ +	"=r" ((USItype)(w0)) \ +	: "r" ((USItype)(u)), \ +	"r" ((USItype)(v))) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ +	__asm__ ("! Inlined udiv_qrnnd\n" \ +	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \ +	"tst	%%g0\n" \ +	"divscc	%3,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%%g1\n" \ +	"divscc	%%g1,%4,%0\n" \ +	"rd	%%y,%1\n" \ +	"bl,a 1f\n" \ +	"add	%1,%4,%1\n" \ +	"1:	! End of inline udiv_qrnnd" \ +	: "=r" ((USItype)(q)), \ +	"=r" ((USItype)(r)) \ +	: "r" ((USItype)(n1)), \ +	"r" ((USItype)(n0)), \ +	"rI" ((USItype)(d)) \ +	: "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ +	__asm__ ("! Inlined umul_ppmm\n" \ +	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \ +	"sra	%3,31,%%g2	! Don't move this insn\n" \ +	"and	%2,%%g2,%%g2	! Don't move this insn\n" \ +	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,%3,%%g1\n" \ +	"mulscc	%%g1,0,%%g1\n" \ +	"add	%%g1,%%g2,%0\n" \ +	"rd	%%y,%1" \ +	: "=r" ((USItype)(w1)), \ +	"=r" ((USItype)(w0)) \ +	: "%rI" ((USItype)(u)), \ +	"r" ((USItype)(v)) \ +	: "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39		/* 39 instructions */ +/* It's quite necessary to add this much assembler for the sparc. +   The default udiv_qrnnd (in C) is more than 10 times slower!  */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +  __asm__ ("! Inlined udiv_qrnnd\n\t"					\ +	   "mov	32,%%g1\n\t"						\ +	   "subcc	%1,%2,%%g0\n\t"					\ +	   "1:	bcs	5f\n\t"						\ +	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\ +	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\ +	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\ +	   "subcc	%%g1,1,%%g1\n\t"				\ +	   "2:	bne	1b\n\t"						\ +	   "subcc	%1,%2,%%g0\n\t"					\ +	   "bcs	3f\n\t"							\ +	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\ +	   "b		3f\n\t"						\ +	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\ +	   "4:	sub	%1,%2,%1\n\t"					\ +	   "5:	addxcc	%1,%1,%1\n\t"					\ +	   "bcc	2b\n\t"							\ +	   "subcc	%%g1,1,%%g1\n\t"				\ +	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \ +	   "bne	4b\n\t"							\ +	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \ +	   "sub	%1,%2,%1\n\t"						\ +	   "3:	xnor	%0,0,%0\n\t"					\ +	   "! End of inline udiv_qrnnd\n"				\ +	   : "=&r" ((USItype)(q)),					\ +	     "=&r" ((USItype)(r))					\ +	   : "r" ((USItype)(d)),					\ +	     "1" ((USItype)(n1)),					\ +	     "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */ +#endif +#endif /* __sparc__ */ + +/*************************************** +	**************  VAX  ****************** +	***************************************/ +#if defined(__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("addl2 %5,%1\n" \ +	"adwc %3,%0" \ +	: "=g" ((USItype)(sh)), \ +	"=&g" ((USItype)(sl)) \ +	: "%0" ((USItype)(ah)), \ +	"g" ((USItype)(bh)), \ +	"%1" ((USItype)(al)), \ +	"g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("subl2 %5,%1\n" \ +	"sbwc %3,%0" \ +	: "=g" ((USItype)(sh)), \ +	"=&g" ((USItype)(sl)) \ +	: "0" ((USItype)(ah)), \ +	"g" ((USItype)(bh)), \ +	"1" ((USItype)(al)), \ +	"g" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ +	union {UDItype __ll; \ +	struct {USItype __l, __h; } __i; \ +	} __xx; \ +	USItype __m0 = (m0), __m1 = (m1); \ +	__asm__ ("emul %1,%2,$0,%0" \ +	: "=g" (__xx.__ll) \ +	: "g" (__m0), \ +	"g" (__m1)); \ +	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +	(xh) += ((((SItype) __m0 >> 31) & __m1) \ +	+ (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ +	union {DItype __ll; \ +	struct {SItype __l, __h; } __i; \ +	} __xx; \ +	__xx.__i.__h = n1; __xx.__i.__l = n0; \ +	__asm__ ("ediv %3,%2,%0,%1" \ +	: "=g" (q), "=g" (r) \ +	: "g" (__xx.__ll), "g" (d)); \ +} while (0) +#endif /* __vax__ */ + +/*************************************** +	**************  Z8000	**************** +	***************************************/ +#if defined(__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \ +	: "=r" ((unsigned int)(sh)), \ +	"=&r" ((unsigned int)(sl)) \ +	: "%0" ((unsigned int)(ah)), \ +	"r" ((unsigned int)(bh)), \ +	"%1" ((unsigned int)(al)), \ +	"rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \ +	: "=r" ((unsigned int)(sh)), \ +	"=&r" ((unsigned int)(sl)) \ +	: "0" ((unsigned int)(ah)), \ +	"r" ((unsigned int)(bh)), \ +	"1" ((unsigned int)(al)), \ +	"rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ +	union {long int __ll; \ +	struct {unsigned int __h, __l; } __i; \ +	} __xx; \ +	unsigned int __m0 = (m0), __m1 = (m1); \ +	__asm__ ("mult      %S0,%H3" \ +	: "=r" (__xx.__i.__h), \ +	"=r" (__xx.__i.__l) \ +	: "%1" (__m0), \ +	"rQR" (__m1)); \ +	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +	(xh) += ((((signed int) __m0 >> 15) & __m1) \ +	+ (((signed int) __m1 >> 15) & __m0)); \ +} while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +/*************************************** +	***********  Generic Versions	******** +	***************************************/ +#if !defined(umul_ppmm) && defined(__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ +{ \ +	UDWtype __ll = __umulsidi3(m0, m1); \ +	ph = (UWtype) (__ll >> W_TYPE_SIZE); \ +	pl = (UWtype) __ll; \ +} +#endif + +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) \ +	({UWtype __hi, __lo; \ +	umul_ppmm(__hi, __lo, u, v); \ +	((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + +	/* If this machine has no inline assembler, use C macros.  */ + +#if !defined(add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ +	UWtype __x; \ +	__x = (al) + (bl); \ +	(sh) = (ah) + (bh) + (__x < (al)); \ +	(sl) = __x; \ +} while (0) +#endif + +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ +	UWtype __x; \ +	__x = (al) - (bl); \ +	(sh) = (ah) - (bh) - (__x > (al)); \ +	(sl) = __x; \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ +do { \ +	UWtype __x0, __x1, __x2, __x3; \ +	UHWtype __ul, __vl, __uh, __vh; \ +	UWtype __u = (u), __v = (v); \ +	\ +	__ul = __ll_lowpart(__u); \ +	__uh = __ll_highpart(__u); \ +	__vl = __ll_lowpart(__v); \ +	__vh = __ll_highpart(__v); \ +	\ +	__x0 = (UWtype) __ul * __vl; \ +	__x1 = (UWtype) __ul * __vh; \ +	__x2 = (UWtype) __uh * __vl; \ +	__x3 = (UWtype) __uh * __vh; \ +	\ +	__x1 += __ll_highpart(__x0);/* this can't give carry */ \ +	__x1 += __x2;		/* but this indeed can */ \ +	if (__x1 < __x2)		/* did we get it? */ \ +	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \ +	\ +	(w1) = __x3 + __ll_highpart(__x1); \ +	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ +do { \ +	UWtype __w1; \ +	UWtype __m0 = (u), __m1 = (v); \ +	umul_ppmm(__w1, w0, __m0, __m1); \ +	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ +	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ +} while (0) +#endif + +	/* Define this unconditionally, so it can be used for debugging.  */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ +do { \ +	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ +	__d1 = __ll_highpart(d); \ +	__d0 = __ll_lowpart(d); \ +	\ +	__r1 = (n1) % __d1; \ +	__q1 = (n1) / __d1; \ +	__m = (UWtype) __q1 * __d0; \ +	__r1 = __r1 * __ll_B | __ll_highpart(n0); \ +	if (__r1 < __m) { \ +		__q1--, __r1 += (d); \ +		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ +		if (__r1 < __m) \ +			__q1--, __r1 += (d); \ +	} \ +	__r1 -= __m; \ +	\ +	__r0 = __r1 % __d1; \ +	__q0 = __r1 / __d1; \ +	__m = (UWtype) __q0 * __d0; \ +	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \ +	if (__r0 < __m) { \ +		__q0--, __r0 += (d); \ +		if (__r0 >= (d)) \ +			if (__r0 < __m) \ +				__q0--, __r0 += (d); \ +	} \ +	__r0 -= __m; \ +	\ +	(q) = (UWtype) __q1 * __ll_B | __q0; \ +	(r) = __r0; \ +} while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through +	__udiv_w_sdiv (defined in libgcc or elsewhere).  */ +#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ +	UWtype __r; \ +	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ +	(r) = __r; \ +} while (0) +#endif + +	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */ +#if !defined(udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c new file mode 100644 index 00000000000..503537e0843 --- /dev/null +++ b/lib/mpi/mpi-bit.c @@ -0,0 +1,56 @@ +/* mpi-bit.c  -  MPI bit level fucntions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#define A_LIMB_1 ((mpi_limb_t) 1) + +/**************** + * Sometimes we have MSL (most significant limbs) which are 0; + * this is for some reasons not good, so this function removes them. + */ +void mpi_normalize(MPI a) +{ +	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--) +		; +} + +/**************** + * Return the number of bits in A. + */ +unsigned mpi_get_nbits(MPI a) +{ +	unsigned n; + +	mpi_normalize(a); + +	if (a->nlimbs) { +		mpi_limb_t alimb = a->d[a->nlimbs - 1]; +		if (alimb) +			n = count_leading_zeros(alimb); +		else +			n = BITS_PER_MPI_LIMB; +		n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; +	} else +		n = 0; +	return n; +} +EXPORT_SYMBOL_GPL(mpi_get_nbits); diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c new file mode 100644 index 00000000000..1871e7b61ca --- /dev/null +++ b/lib/mpi/mpi-cmp.c @@ -0,0 +1,70 @@ +/* mpi-cmp.c  -  MPI functions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +int mpi_cmp_ui(MPI u, unsigned long v) +{ +	mpi_limb_t limb = v; + +	mpi_normalize(u); +	if (!u->nlimbs && !limb) +		return 0; +	if (u->sign) +		return -1; +	if (u->nlimbs > 1) +		return 1; + +	if (u->d[0] == limb) +		return 0; +	else if (u->d[0] > limb) +		return 1; +	else +		return -1; +} +EXPORT_SYMBOL_GPL(mpi_cmp_ui); + +int mpi_cmp(MPI u, MPI v) +{ +	mpi_size_t usize, vsize; +	int cmp; + +	mpi_normalize(u); +	mpi_normalize(v); +	usize = u->nlimbs; +	vsize = v->nlimbs; +	if (!u->sign && v->sign) +		return 1; +	if (u->sign && !v->sign) +		return -1; +	if (usize != vsize && !u->sign && !v->sign) +		return usize - vsize; +	if (usize != vsize && u->sign && v->sign) +		return vsize + usize; +	if (!usize) +		return 0; +	cmp = mpihelp_cmp(u->d, v->d, usize); +	if (!cmp) +		return 0; +	if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0)) +		return 1; +	return -1; +} +EXPORT_SYMBOL_GPL(mpi_cmp); diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h new file mode 100644 index 00000000000..e2b39852b30 --- /dev/null +++ b/lib/mpi/mpi-inline.h @@ -0,0 +1,122 @@ +/* mpi-inline.h  -  Internal to the Multi Precision Integers + *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INLINE_H +#define G10_MPI_INLINE_H + +#ifndef G10_MPI_INLINE_DECL +#define G10_MPI_INLINE_DECL  extern inline +#endif + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +	      mpi_size_t s1_size, mpi_limb_t s2_limb) +{ +	mpi_limb_t x; + +	x = *s1_ptr++; +	s2_limb += x; +	*res_ptr++ = s2_limb; +	if (s2_limb < x) {	/* sum is less than the left operand: handle carry */ +		while (--s1_size) { +			x = *s1_ptr++ + 1;	/* add carry */ +			*res_ptr++ = x;	/* and store */ +			if (x)	/* not 0 (no overflow): we can stop */ +				goto leave; +		} +		return 1;	/* return carry (size of s1 to small) */ +	} + +leave: +	if (res_ptr != s1_ptr) {	/* not the same variable */ +		mpi_size_t i;	/* copy the rest */ +		for (i = 0; i < s1_size - 1; i++) +			res_ptr[i] = s1_ptr[i]; +	} +	return 0;		/* no carry */ +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +	    mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ +	mpi_limb_t cy = 0; + +	if (s2_size) +		cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); + +	if (s1_size - s2_size) +		cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, +				   s1_size - s2_size, cy); +	return cy; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +	      mpi_size_t s1_size, mpi_limb_t s2_limb) +{ +	mpi_limb_t x; + +	x = *s1_ptr++; +	s2_limb = x - s2_limb; +	*res_ptr++ = s2_limb; +	if (s2_limb > x) { +		while (--s1_size) { +			x = *s1_ptr++; +			*res_ptr++ = x - 1; +			if (x) +				goto leave; +		} +		return 1; +	} + +leave: +	if (res_ptr != s1_ptr) { +		mpi_size_t i; +		for (i = 0; i < s1_size - 1; i++) +			res_ptr[i] = s1_ptr[i]; +	} +	return 0; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +	    mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ +	mpi_limb_t cy = 0; + +	if (s2_size) +		cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + +	if (s1_size - s2_size) +		cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, +				   s1_size - s2_size, cy); +	return cy; +} + +#endif /*G10_MPI_INLINE_H */ diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h new file mode 100644 index 00000000000..60cf765628e --- /dev/null +++ b/lib/mpi/mpi-internal.h @@ -0,0 +1,257 @@ +/* mpi-internal.h  -  Internal to the Multi Precision Integers + *	Copyright (C) 1994, 1996 Free Software Foundation, Inc. + *	Copyright (C) 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/mpi.h> +#include <linux/errno.h> + +#define log_debug printk +#define log_bug printk + +#define assert(x) \ +	do { \ +		if (!x) \ +			log_bug("failed assertion\n"); \ +	} while (0); + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines.  */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */ +typedef int mpi_size_t;		/* (must be a signed type) */ + +static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) +{ +	if (a->alloced < b) +		return mpi_resize(a, b); +	return 0; +} + +/* Copy N limbs from S to D.  */ +#define MPN_COPY(d, s, n) \ +	do {					\ +		mpi_size_t _i;			\ +		for (_i = 0; _i < (n); _i++)	\ +			(d)[_i] = (s)[_i];	\ +	} while (0) + +#define MPN_COPY_INCR(d, s, n) \ +	do {					\ +		mpi_size_t _i;			\ +		for (_i = 0; _i < (n); _i++)	\ +			(d)[_i] = (d)[_i];	\ +	} while (0) + +#define MPN_COPY_DECR(d, s, n) \ +	do {					\ +		mpi_size_t _i;			\ +		for (_i = (n)-1; _i >= 0; _i--) \ +			(d)[_i] = (s)[_i];	\ +	} while (0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ +	do {					\ +		int  _i;			\ +		for (_i = 0; _i < (n); _i++)	\ +			(d)[_i] = 0;		\ +	} while (0) + +#define MPN_NORMALIZE(d, n)  \ +	do {					\ +		while ((n) > 0) {		\ +			if ((d)[(n)-1])		\ +				break;		\ +			(n)--;			\ +		}				\ +	} while (0) + +#define MPN_NORMALIZE_NOT_ZERO(d, n) \ +	do {				\ +		for (;;) {		\ +			if ((d)[(n)-1])	\ +				break;	\ +			(n)--;		\ +		}			\ +	} while (0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ +	do {							\ +		if ((size) < KARATSUBA_THRESHOLD)		\ +			mul_n_basecase(prodp, up, vp, size);	\ +		else						\ +			mul_n(prodp, up, vp, size, tspace);	\ +	} while (0); + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones).  For correct operation, the most significant bit of D + * has to be set.  Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ +	do {								\ +		mpi_limb_t _q, _ql, _r;					\ +		mpi_limb_t _xh, _xl;					\ +		umul_ppmm(_q, _ql, (nh), (di));				\ +		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \ +		umul_ppmm(_xh, _xl, _q, (d));				\ +		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\ +		if (_xh) {						\ +			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\ +			_q++;						\ +			if (_xh) {					\ +				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\ +				_q++;					\ +			}						\ +		}							\ +		if (_r >= (d)) {					\ +			_r -= (d);					\ +			_q++;						\ +		}							\ +		(r) = _r;						\ +		(q) = _q;						\ +	} while (0) + +/*-- mpiutil.c --*/ +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); +void mpi_free_limb_space(mpi_ptr_t a); +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); + +/*-- mpi-bit.c --*/ +void mpi_rshift_limbs(MPI a, unsigned int count); +int mpi_lshift_limbs(MPI a, unsigned int count); + +/*-- mpihelp-add.c --*/ +mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			 mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			 mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +		       mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-sub.c --*/ +mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			 mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			 mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +		       mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-cmp.c --*/ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); + +/*-- mpihelp-mul.c --*/ + +struct karatsuba_ctx { +	struct karatsuba_ctx *next; +	mpi_ptr_t tspace; +	mpi_size_t tspace_size; +	mpi_ptr_t tp; +	mpi_size_t tp_size; +}; + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx); + +mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			    mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			    mpi_size_t s1_size, mpi_limb_t s2_limb); +int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); +int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, +		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); +void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, +		mpi_ptr_t tspace); + +int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, +			       mpi_ptr_t up, mpi_size_t usize, +			       mpi_ptr_t vp, mpi_size_t vsize, +			       struct karatsuba_ctx *ctx); + +/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ +mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +			 mpi_size_t s1_size, mpi_limb_t s2_limb); + +/*-- mpihelp-div.c --*/ +mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, +			 mpi_limb_t divisor_limb); +mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, +			  mpi_ptr_t np, mpi_size_t nsize, +			  mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, +			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, +			    mpi_limb_t divisor_limb); + +/*-- mpihelp-shift.c --*/ +mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, +			  unsigned cnt); +mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, +			  unsigned cnt); + +/* Define stuff for longlong.h.  */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB +typedef mpi_limb_t UWtype; +typedef unsigned int UHWtype; +#if defined(__GNUC__) +typedef unsigned int UQItype __attribute__ ((mode(QI))); +typedef int SItype __attribute__ ((mode(SI))); +typedef unsigned int USItype __attribute__ ((mode(SI))); +typedef int DItype __attribute__ ((mode(DI))); +typedef unsigned int UDItype __attribute__ ((mode(DI))); +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; +#endif + +#ifdef __GNUC__ +#include "mpi-inline.h" +#endif + +#endif /*G10_MPI_INTERNAL_H */ diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c new file mode 100644 index 00000000000..5464c8744ea --- /dev/null +++ b/lib/mpi/mpi-pow.c @@ -0,0 +1,323 @@ +/* mpi-pow.c  -  MPI functions + *	Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * RES = BASE ^ EXP mod MOD + */ +int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) +{ +	mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; +	mpi_ptr_t xp_marker = NULL; +	mpi_ptr_t tspace = NULL; +	mpi_ptr_t rp, ep, mp, bp; +	mpi_size_t esize, msize, bsize, rsize; +	int esign, msign, bsign, rsign; +	mpi_size_t size; +	int mod_shift_cnt; +	int negative_result; +	int assign_rp = 0; +	mpi_size_t tsize = 0;	/* to avoid compiler warning */ +	/* fixme: we should check that the warning is void */ +	int rc = -ENOMEM; + +	esize = exp->nlimbs; +	msize = mod->nlimbs; +	size = 2 * msize; +	esign = exp->sign; +	msign = mod->sign; + +	rp = res->d; +	ep = exp->d; + +	if (!msize) +		return -EINVAL; + +	if (!esize) { +		/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 +		 * depending on if MOD equals 1.  */ +		rp[0] = 1; +		res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; +		res->sign = 0; +		goto leave; +	} + +	/* Normalize MOD (i.e. make its most significant bit set) as required by +	 * mpn_divrem.  This will make the intermediate values in the calculation +	 * slightly larger, but the correct result is obtained after a final +	 * reduction using the original MOD value.  */ +	mp = mp_marker = mpi_alloc_limb_space(msize); +	if (!mp) +		goto enomem; +	mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]); +	if (mod_shift_cnt) +		mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); +	else +		MPN_COPY(mp, mod->d, msize); + +	bsize = base->nlimbs; +	bsign = base->sign; +	if (bsize > msize) {	/* The base is larger than the module. Reduce it. */ +		/* Allocate (BSIZE + 1) with space for remainder and quotient. +		 * (The quotient is (bsize - msize + 1) limbs.)  */ +		bp = bp_marker = mpi_alloc_limb_space(bsize + 1); +		if (!bp) +			goto enomem; +		MPN_COPY(bp, base->d, bsize); +		/* We don't care about the quotient, store it above the remainder, +		 * at BP + MSIZE.  */ +		mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize); +		bsize = msize; +		/* Canonicalize the base, since we are going to multiply with it +		 * quite a few times.  */ +		MPN_NORMALIZE(bp, bsize); +	} else +		bp = base->d; + +	if (!bsize) { +		res->nlimbs = 0; +		res->sign = 0; +		goto leave; +	} + +	if (res->alloced < size) { +		/* We have to allocate more space for RES.  If any of the input +		 * parameters are identical to RES, defer deallocation of the old +		 * space.  */ +		if (rp == ep || rp == mp || rp == bp) { +			rp = mpi_alloc_limb_space(size); +			if (!rp) +				goto enomem; +			assign_rp = 1; +		} else { +			if (mpi_resize(res, size) < 0) +				goto enomem; +			rp = res->d; +		} +	} else {		/* Make BASE, EXP and MOD not overlap with RES.  */ +		if (rp == bp) { +			/* RES and BASE are identical.  Allocate temp. space for BASE.  */ +			BUG_ON(bp_marker); +			bp = bp_marker = mpi_alloc_limb_space(bsize); +			if (!bp) +				goto enomem; +			MPN_COPY(bp, rp, bsize); +		} +		if (rp == ep) { +			/* RES and EXP are identical.  Allocate temp. space for EXP.  */ +			ep = ep_marker = mpi_alloc_limb_space(esize); +			if (!ep) +				goto enomem; +			MPN_COPY(ep, rp, esize); +		} +		if (rp == mp) { +			/* RES and MOD are identical.  Allocate temporary space for MOD. */ +			BUG_ON(mp_marker); +			mp = mp_marker = mpi_alloc_limb_space(msize); +			if (!mp) +				goto enomem; +			MPN_COPY(mp, rp, msize); +		} +	} + +	MPN_COPY(rp, bp, bsize); +	rsize = bsize; +	rsign = bsign; + +	{ +		mpi_size_t i; +		mpi_ptr_t xp; +		int c; +		mpi_limb_t e; +		mpi_limb_t carry_limb; +		struct karatsuba_ctx karactx; + +		xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); +		if (!xp) +			goto enomem; + +		memset(&karactx, 0, sizeof karactx); +		negative_result = (ep[0] & 1) && base->sign; + +		i = esize - 1; +		e = ep[i]; +		c = count_leading_zeros(e); +		e = (e << c) << 1;	/* shift the exp bits to the left, lose msb */ +		c = BITS_PER_MPI_LIMB - 1 - c; + +		/* Main loop. +		 * +		 * Make the result be pointed to alternately by XP and RP.  This +		 * helps us avoid block copying, which would otherwise be necessary +		 * with the overlap restrictions of mpihelp_divmod. With 50% probability +		 * the result after this loop will be in the area originally pointed +		 * by RP (==RES->d), and with 50% probability in the area originally +		 * pointed to by XP. +		 */ + +		for (;;) { +			while (c) { +				mpi_ptr_t tp; +				mpi_size_t xsize; + +				/*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */ +				if (rsize < KARATSUBA_THRESHOLD) +					mpih_sqr_n_basecase(xp, rp, rsize); +				else { +					if (!tspace) { +						tsize = 2 * rsize; +						tspace = +						    mpi_alloc_limb_space(tsize); +						if (!tspace) +							goto enomem; +					} else if (tsize < (2 * rsize)) { +						mpi_free_limb_space(tspace); +						tsize = 2 * rsize; +						tspace = +						    mpi_alloc_limb_space(tsize); +						if (!tspace) +							goto enomem; +					} +					mpih_sqr_n(xp, rp, rsize, tspace); +				} + +				xsize = 2 * rsize; +				if (xsize > msize) { +					mpihelp_divrem(xp + msize, 0, xp, xsize, +						       mp, msize); +					xsize = msize; +				} + +				tp = rp; +				rp = xp; +				xp = tp; +				rsize = xsize; + +				if ((mpi_limb_signed_t) e < 0) { +					/*mpihelp_mul( xp, rp, rsize, bp, bsize ); */ +					if (bsize < KARATSUBA_THRESHOLD) { +						mpi_limb_t tmp; +						if (mpihelp_mul +						    (xp, rp, rsize, bp, bsize, +						     &tmp) < 0) +							goto enomem; +					} else { +						if (mpihelp_mul_karatsuba_case +						    (xp, rp, rsize, bp, bsize, +						     &karactx) < 0) +							goto enomem; +					} + +					xsize = rsize + bsize; +					if (xsize > msize) { +						mpihelp_divrem(xp + msize, 0, +							       xp, xsize, mp, +							       msize); +						xsize = msize; +					} + +					tp = rp; +					rp = xp; +					xp = tp; +					rsize = xsize; +				} +				e <<= 1; +				c--; +			} + +			i--; +			if (i < 0) +				break; +			e = ep[i]; +			c = BITS_PER_MPI_LIMB; +		} + +		/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT +		 * steps.  Adjust the result by reducing it with the original MOD. +		 * +		 * Also make sure the result is put in RES->d (where it already +		 * might be, see above). +		 */ +		if (mod_shift_cnt) { +			carry_limb = +			    mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt); +			rp = res->d; +			if (carry_limb) { +				rp[rsize] = carry_limb; +				rsize++; +			} +		} else { +			MPN_COPY(res->d, rp, rsize); +			rp = res->d; +		} + +		if (rsize >= msize) { +			mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); +			rsize = msize; +		} + +		/* Remove any leading zero words from the result.  */ +		if (mod_shift_cnt) +			mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); +		MPN_NORMALIZE(rp, rsize); + +		mpihelp_release_karatsuba_ctx(&karactx); +	} + +	if (negative_result && rsize) { +		if (mod_shift_cnt) +			mpihelp_rshift(mp, mp, msize, mod_shift_cnt); +		mpihelp_sub(rp, mp, msize, rp, rsize); +		rsize = msize; +		rsign = msign; +		MPN_NORMALIZE(rp, rsize); +	} +	res->nlimbs = rsize; +	res->sign = rsign; + +leave: +	rc = 0; +enomem: +	if (assign_rp) +		mpi_assign_limb_space(res, rp, size); +	if (mp_marker) +		mpi_free_limb_space(mp_marker); +	if (bp_marker) +		mpi_free_limb_space(bp_marker); +	if (ep_marker) +		mpi_free_limb_space(ep_marker); +	if (xp_marker) +		mpi_free_limb_space(xp_marker); +	if (tspace) +		mpi_free_limb_space(tspace); +	return rc; +} +EXPORT_SYMBOL_GPL(mpi_powm); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c new file mode 100644 index 00000000000..4cc6442733f --- /dev/null +++ b/lib/mpi/mpicoder.c @@ -0,0 +1,260 @@ +/* mpicoder.c  -  Coder for the external representation of MPIs + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <linux/bitops.h> +#include <asm-generic/bitops/count_zeros.h> +#include "mpi-internal.h" + +#define MAX_EXTERN_MPI_BITS 16384 + +/** + * mpi_read_raw_data - Read a raw byte stream as a positive integer + * @xbuffer: The data to read + * @nbytes: The amount of data to read + */ +MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) +{ +	const uint8_t *buffer = xbuffer; +	int i, j; +	unsigned nbits, nlimbs; +	mpi_limb_t a; +	MPI val = NULL; + +	while (nbytes > 0 && buffer[0] == 0) { +		buffer++; +		nbytes--; +	} + +	nbits = nbytes * 8; +	if (nbits > MAX_EXTERN_MPI_BITS) { +		pr_info("MPI: mpi too large (%u bits)\n", nbits); +		return NULL; +	} +	if (nbytes > 0) +		nbits -= count_leading_zeros(buffer[0]); +	else +		nbits = 0; + +	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); +	val = mpi_alloc(nlimbs); +	if (!val) +		return NULL; +	val->nbits = nbits; +	val->sign = 0; +	val->nlimbs = nlimbs; + +	if (nbytes > 0) { +		i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; +		i %= BYTES_PER_MPI_LIMB; +		for (j = nlimbs; j > 0; j--) { +			a = 0; +			for (; i < BYTES_PER_MPI_LIMB; i++) { +				a <<= 8; +				a |= *buffer++; +			} +			i = 0; +			val->d[j - 1] = a; +		} +	} +	return val; +} +EXPORT_SYMBOL_GPL(mpi_read_raw_data); + +MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) +{ +	const uint8_t *buffer = xbuffer; +	int i, j; +	unsigned nbits, nbytes, nlimbs, nread = 0; +	mpi_limb_t a; +	MPI val = NULL; + +	if (*ret_nread < 2) +		goto leave; +	nbits = buffer[0] << 8 | buffer[1]; + +	if (nbits > MAX_EXTERN_MPI_BITS) { +		pr_info("MPI: mpi too large (%u bits)\n", nbits); +		goto leave; +	} +	buffer += 2; +	nread = 2; + +	nbytes = DIV_ROUND_UP(nbits, 8); +	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); +	val = mpi_alloc(nlimbs); +	if (!val) +		return NULL; +	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; +	i %= BYTES_PER_MPI_LIMB; +	val->nbits = nbits; +	j = val->nlimbs = nlimbs; +	val->sign = 0; +	for (; j > 0; j--) { +		a = 0; +		for (; i < BYTES_PER_MPI_LIMB; i++) { +			if (++nread > *ret_nread) { +				printk +				    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", +				     nread, *ret_nread); +				goto leave; +			} +			a <<= 8; +			a |= *buffer++; +		} +		i = 0; +		val->d[j - 1] = a; +	} + +leave: +	*ret_nread = nread; +	return val; +} +EXPORT_SYMBOL_GPL(mpi_read_from_buffer); + +/**************** + * Return an allocated buffer with the MPI (msb first). + * NBYTES receives the length of this buffer. Caller must free the + * return string (This function does return a 0 byte buffer with NBYTES + * set to zero if the value of A is zero. If sign is not NULL, it will + * be set to the sign of the A. + */ +void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +{ +	uint8_t *p, *buffer; +	mpi_limb_t alimb; +	int i; +	unsigned int n; + +	if (sign) +		*sign = a->sign; +	*nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB; +	if (!n) +		n++;		/* avoid zero length allocation */ +	p = buffer = kmalloc(n, GFP_KERNEL); +	if (!p) +		return NULL; + +	for (i = a->nlimbs - 1; i >= 0; i--) { +		alimb = a->d[i]; +#if BYTES_PER_MPI_LIMB == 4 +		*p++ = alimb >> 24; +		*p++ = alimb >> 16; +		*p++ = alimb >> 8; +		*p++ = alimb; +#elif BYTES_PER_MPI_LIMB == 8 +		*p++ = alimb >> 56; +		*p++ = alimb >> 48; +		*p++ = alimb >> 40; +		*p++ = alimb >> 32; +		*p++ = alimb >> 24; +		*p++ = alimb >> 16; +		*p++ = alimb >> 8; +		*p++ = alimb; +#else +#error please implement for this limb size. +#endif +	} + +	/* this is sub-optimal but we need to do the shift operation +	 * because the caller has to free the returned buffer */ +	for (p = buffer; !*p && *nbytes; p++, --*nbytes) +		; +	if (p != buffer) +		memmove(buffer, p, *nbytes); + +	return buffer; +} +EXPORT_SYMBOL_GPL(mpi_get_buffer); + +/**************** + * Use BUFFER to update MPI. + */ +int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) +{ +	const uint8_t *buffer = xbuffer, *p; +	mpi_limb_t alimb; +	int nlimbs; +	int i; + +	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); +	if (RESIZE_IF_NEEDED(a, nlimbs) < 0) +		return -ENOMEM; +	a->sign = sign; + +	for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) { +#if BYTES_PER_MPI_LIMB == 4 +		alimb = (mpi_limb_t) *p--; +		alimb |= (mpi_limb_t) *p-- << 8; +		alimb |= (mpi_limb_t) *p-- << 16; +		alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 +		alimb = (mpi_limb_t) *p--; +		alimb |= (mpi_limb_t) *p-- << 8; +		alimb |= (mpi_limb_t) *p-- << 16; +		alimb |= (mpi_limb_t) *p-- << 24; +		alimb |= (mpi_limb_t) *p-- << 32; +		alimb |= (mpi_limb_t) *p-- << 40; +		alimb |= (mpi_limb_t) *p-- << 48; +		alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif +		a->d[i++] = alimb; +	} +	if (p >= buffer) { +#if BYTES_PER_MPI_LIMB == 4 +		alimb = *p--; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 8; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 16; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 +		alimb = (mpi_limb_t) *p--; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 8; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 16; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 24; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 32; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 40; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 48; +		if (p >= buffer) +			alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif +		a->d[i++] = alimb; +	} +	a->nlimbs = i; + +	if (i != nlimbs) { +		pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i, +		       nlimbs); +		BUG(); +	} +	return 0; +} +EXPORT_SYMBOL_GPL(mpi_set_buffer); diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c new file mode 100644 index 00000000000..b2fd39677f1 --- /dev/null +++ b/lib/mpi/mpih-cmp.c @@ -0,0 +1,56 @@ +/* mpihelp-sub.c  -  MPI helper functions + *	Copyright (C) 1994, 1996 Free Software Foundation, Inc. + *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) +{ +	mpi_size_t i; +	mpi_limb_t op1_word, op2_word; + +	for (i = size - 1; i >= 0; i--) { +		op1_word = op1_ptr[i]; +		op2_word = op2_ptr[i]; +		if (op1_word != op2_word) +			goto diff; +	} +	return 0; + +diff: +	/* This can *not* be simplified to +	 *   op2_word - op2_word +	 * since that expression might give signed overflow.  */ +	return (op1_word > op2_word) ? 1 : -1; +} diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c new file mode 100644 index 00000000000..c57d1d46295 --- /dev/null +++ b/lib/mpi/mpih-div.c @@ -0,0 +1,236 @@ +/* mpihelp-div.c  -  MPI helper functions + *	Copyright (C) 1994, 1996 Free Software Foundation, Inc. + *	Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + *    QP + DSIZE >= NP must hold true.	(This means that it's + *    possible to put the quotient in the high part of NUM, right after the + *    remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, +	       mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) +{ +	mpi_limb_t most_significant_q_limb = 0; + +	switch (dsize) { +	case 0: +		/* We are asked to divide by zero, so go ahead and do it!  (To make +		   the compiler not remove this statement, return the value.)  */ +		/* +		 * existing clients of this function have been modified +		 * not to call it with dsize == 0, so this should not happen +		 */ +		return 1 / dsize; + +	case 1: +		{ +			mpi_size_t i; +			mpi_limb_t n1; +			mpi_limb_t d; + +			d = dp[0]; +			n1 = np[nsize - 1]; + +			if (n1 >= d) { +				n1 -= d; +				most_significant_q_limb = 1; +			} + +			qp += qextra_limbs; +			for (i = nsize - 2; i >= 0; i--) +				udiv_qrnnd(qp[i], n1, n1, np[i], d); +			qp -= qextra_limbs; + +			for (i = qextra_limbs - 1; i >= 0; i--) +				udiv_qrnnd(qp[i], n1, n1, 0, d); + +			np[0] = n1; +		} +		break; + +	case 2: +		{ +			mpi_size_t i; +			mpi_limb_t n1, n0, n2; +			mpi_limb_t d1, d0; + +			np += nsize - 2; +			d1 = dp[1]; +			d0 = dp[0]; +			n1 = np[1]; +			n0 = np[0]; + +			if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { +				sub_ddmmss(n1, n0, n1, n0, d1, d0); +				most_significant_q_limb = 1; +			} + +			for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { +				mpi_limb_t q; +				mpi_limb_t r; + +				if (i >= qextra_limbs) +					np--; +				else +					np[0] = 0; + +				if (n1 == d1) { +					/* Q should be either 111..111 or 111..110.  Need special +					 * treatment of this rare case as normal division would +					 * give overflow.  */ +					q = ~(mpi_limb_t) 0; + +					r = n0 + d1; +					if (r < d1) {	/* Carry in the addition? */ +						add_ssaaaa(n1, n0, r - d0, +							   np[0], 0, d0); +						qp[i] = q; +						continue; +					} +					n1 = d0 - (d0 != 0 ? 1 : 0); +					n0 = -d0; +				} else { +					udiv_qrnnd(q, r, n1, n0, d1); +					umul_ppmm(n1, n0, d0, q); +				} + +				n2 = np[0]; +q_test: +				if (n1 > r || (n1 == r && n0 > n2)) { +					/* The estimated Q was too large.  */ +					q--; +					sub_ddmmss(n1, n0, n1, n0, 0, d0); +					r += d1; +					if (r >= d1)	/* If not carry, test Q again.  */ +						goto q_test; +				} + +				qp[i] = q; +				sub_ddmmss(n1, n0, r, n2, n1, n0); +			} +			np[1] = n1; +			np[0] = n0; +		} +		break; + +	default: +		{ +			mpi_size_t i; +			mpi_limb_t dX, d1, n0; + +			np += nsize - dsize; +			dX = dp[dsize - 1]; +			d1 = dp[dsize - 2]; +			n0 = np[dsize - 1]; + +			if (n0 >= dX) { +				if (n0 > dX +				    || mpihelp_cmp(np, dp, dsize - 1) >= 0) { +					mpihelp_sub_n(np, np, dp, dsize); +					n0 = np[dsize - 1]; +					most_significant_q_limb = 1; +				} +			} + +			for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { +				mpi_limb_t q; +				mpi_limb_t n1, n2; +				mpi_limb_t cy_limb; + +				if (i >= qextra_limbs) { +					np--; +					n2 = np[dsize]; +				} else { +					n2 = np[dsize - 1]; +					MPN_COPY_DECR(np + 1, np, dsize - 1); +					np[0] = 0; +				} + +				if (n0 == dX) { +					/* This might over-estimate q, but it's probably not worth +					 * the extra code here to find out.  */ +					q = ~(mpi_limb_t) 0; +				} else { +					mpi_limb_t r; + +					udiv_qrnnd(q, r, n0, np[dsize - 1], dX); +					umul_ppmm(n1, n0, d1, q); + +					while (n1 > r +					       || (n1 == r +						   && n0 > np[dsize - 2])) { +						q--; +						r += dX; +						if (r < dX)	/* I.e. "carry in previous addition?" */ +							break; +						n1 -= n0 < d1; +						n0 -= d1; +					} +				} + +				/* Possible optimization: We already have (q * n0) and (1 * n1) +				 * after the calculation of q.  Taking advantage of that, we +				 * could make this loop make two iterations less.  */ +				cy_limb = mpihelp_submul_1(np, dp, dsize, q); + +				if (n2 != cy_limb) { +					mpihelp_add_n(np, np, dp, dsize); +					q--; +				} + +				qp[i] = q; +				n0 = np[dsize - 1]; +			} +		} +	} + +	return most_significant_q_limb; +} diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c new file mode 100644 index 00000000000..7c841719fdf --- /dev/null +++ b/lib/mpi/mpih-mul.c @@ -0,0 +1,497 @@ +/* mpihelp-mul.c  -  MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + *               2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + *	 Actually it's the same code with only minor changes in the + *	 way the data is stored; this is to support the abstraction + *	 of an optional secure memory allocation which may be used + *	 to avoid revealing of sensitive data due to paging etc. + *	 The GNU MP Library itself is published under the LGPL; + *	 however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\ +	do {							\ +		if ((size) < KARATSUBA_THRESHOLD)		\ +			mul_n_basecase(prodp, up, vp, size);	\ +		else						\ +			mul_n(prodp, up, vp, size, tspace);	\ +	} while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\ +	do {							\ +		if ((size) < KARATSUBA_THRESHOLD)		\ +			mpih_sqr_n_basecase(prodp, up, size);	\ +		else						\ +			mpih_sqr_n(prodp, up, size, tspace);	\ +	} while (0); + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are + * always stored.  Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + *    must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication.  All multiplies rely + * on this, both small and huge.  Small ones arrive here immediately.  Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ +	mpi_size_t i; +	mpi_limb_t cy; +	mpi_limb_t v_limb; + +	/* Multiply by the first limb in V separately, as the result can be +	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */ +	v_limb = vp[0]; +	if (v_limb <= 1) { +		if (v_limb == 1) +			MPN_COPY(prodp, up, size); +		else +			MPN_ZERO(prodp, size); +		cy = 0; +	} else +		cy = mpihelp_mul_1(prodp, up, size, v_limb); + +	prodp[size] = cy; +	prodp++; + +	/* For each iteration in the outer loop, multiply one limb from +	 * U with one limb from V, and add it to PROD.  */ +	for (i = 1; i < size; i++) { +		v_limb = vp[i]; +		if (v_limb <= 1) { +			cy = 0; +			if (v_limb == 1) +				cy = mpihelp_add_n(prodp, prodp, up, size); +		} else +			cy = mpihelp_addmul_1(prodp, up, size, v_limb); + +		prodp[size] = cy; +		prodp++; +	} + +	return cy; +} + +static void +mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, +		mpi_size_t size, mpi_ptr_t tspace) +{ +	if (size & 1) { +		/* The size is odd, and the code below doesn't handle that. +		 * Multiply the least significant (size - 1) limbs with a recursive +		 * call, and handle the most significant limb of S1 and S2 +		 * separately. +		 * A slightly faster way to do this would be to make the Karatsuba +		 * code below behave as if the size were even, and let it check for +		 * odd size in the end.  I.e., in essence move this code to the end. +		 * Doing so would save us a recursive call, and potentially make the +		 * stack grow a lot less. +		 */ +		mpi_size_t esize = size - 1;	/* even size */ +		mpi_limb_t cy_limb; + +		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); +		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); +		prodp[esize + esize] = cy_limb; +		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); +		prodp[esize + size] = cy_limb; +	} else { +		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. +		 * +		 * Split U in two pieces, U1 and U0, such that +		 * U = U0 + U1*(B**n), +		 * and V in V1 and V0, such that +		 * V = V0 + V1*(B**n). +		 * +		 * UV is then computed recursively using the identity +		 * +		 *        2n   n          n                     n +		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V +		 *                1 1        1  0   0  1              0 0 +		 * +		 * Where B = 2**BITS_PER_MP_LIMB. +		 */ +		mpi_size_t hsize = size >> 1; +		mpi_limb_t cy; +		int negflg; + +		/* Product H.      ________________  ________________ +		 *                |_____U1 x V1____||____U0 x V0_____| +		 * Put result in upper part of PROD and pass low part of TSPACE +		 * as new TSPACE. +		 */ +		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, +				  tspace); + +		/* Product M.      ________________ +		 *                |_(U1-U0)(V0-V1)_| +		 */ +		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { +			mpihelp_sub_n(prodp, up + hsize, up, hsize); +			negflg = 0; +		} else { +			mpihelp_sub_n(prodp, up, up + hsize, hsize); +			negflg = 1; +		} +		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { +			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); +			negflg ^= 1; +		} else { +			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); +			/* No change of NEGFLG.  */ +		} +		/* Read temporary operands from low part of PROD. +		 * Put result in low part of TSPACE using upper part of TSPACE +		 * as new TSPACE. +		 */ +		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, +				  tspace + size); + +		/* Add/copy product H. */ +		MPN_COPY(prodp + hsize, prodp + size, hsize); +		cy = mpihelp_add_n(prodp + size, prodp + size, +				   prodp + size + hsize, hsize); + +		/* Add product M (if NEGFLG M is a negative number) */ +		if (negflg) +			cy -= +			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, +					  size); +		else +			cy += +			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, +					  size); + +		/* Product L.      ________________  ________________ +		 *                |________________||____U0 x V0_____| +		 * Read temporary operands from low part of PROD. +		 * Put result in low part of TSPACE using upper part of TSPACE +		 * as new TSPACE. +		 */ +		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + +		/* Add/copy Product L (twice) */ + +		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); +		if (cy) +			mpihelp_add_1(prodp + hsize + size, +				      prodp + hsize + size, hsize, cy); + +		MPN_COPY(prodp, tspace, hsize); +		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, +				   hsize); +		if (cy) +			mpihelp_add_1(prodp + size, prodp + size, size, 1); +	} +} + +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) +{ +	mpi_size_t i; +	mpi_limb_t cy_limb; +	mpi_limb_t v_limb; + +	/* Multiply by the first limb in V separately, as the result can be +	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */ +	v_limb = up[0]; +	if (v_limb <= 1) { +		if (v_limb == 1) +			MPN_COPY(prodp, up, size); +		else +			MPN_ZERO(prodp, size); +		cy_limb = 0; +	} else +		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); + +	prodp[size] = cy_limb; +	prodp++; + +	/* For each iteration in the outer loop, multiply one limb from +	 * U with one limb from V, and add it to PROD.  */ +	for (i = 1; i < size; i++) { +		v_limb = up[i]; +		if (v_limb <= 1) { +			cy_limb = 0; +			if (v_limb == 1) +				cy_limb = mpihelp_add_n(prodp, prodp, up, size); +		} else +			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); + +		prodp[size] = cy_limb; +		prodp++; +	} +} + +void +mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ +	if (size & 1) { +		/* The size is odd, and the code below doesn't handle that. +		 * Multiply the least significant (size - 1) limbs with a recursive +		 * call, and handle the most significant limb of S1 and S2 +		 * separately. +		 * A slightly faster way to do this would be to make the Karatsuba +		 * code below behave as if the size were even, and let it check for +		 * odd size in the end.  I.e., in essence move this code to the end. +		 * Doing so would save us a recursive call, and potentially make the +		 * stack grow a lot less. +		 */ +		mpi_size_t esize = size - 1;	/* even size */ +		mpi_limb_t cy_limb; + +		MPN_SQR_N_RECURSE(prodp, up, esize, tspace); +		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); +		prodp[esize + esize] = cy_limb; +		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); + +		prodp[esize + size] = cy_limb; +	} else { +		mpi_size_t hsize = size >> 1; +		mpi_limb_t cy; + +		/* Product H.      ________________  ________________ +		 *                |_____U1 x U1____||____U0 x U0_____| +		 * Put result in upper part of PROD and pass low part of TSPACE +		 * as new TSPACE. +		 */ +		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + +		/* Product M.      ________________ +		 *                |_(U1-U0)(U0-U1)_| +		 */ +		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) +			mpihelp_sub_n(prodp, up + hsize, up, hsize); +		else +			mpihelp_sub_n(prodp, up, up + hsize, hsize); + +		/* Read temporary operands from low part of PROD. +		 * Put result in low part of TSPACE using upper part of TSPACE +		 * as new TSPACE.  */ +		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + +		/* Add/copy product H  */ +		MPN_COPY(prodp + hsize, prodp + size, hsize); +		cy = mpihelp_add_n(prodp + size, prodp + size, +				   prodp + size + hsize, hsize); + +		/* Add product M (if NEGFLG M is a negative number).  */ +		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); + +		/* Product L.      ________________  ________________ +		 *                |________________||____U0 x U0_____| +		 * Read temporary operands from low part of PROD. +		 * Put result in low part of TSPACE using upper part of TSPACE +		 * as new TSPACE.  */ +		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); + +		/* Add/copy Product L (twice).  */ +		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); +		if (cy) +			mpihelp_add_1(prodp + hsize + size, +				      prodp + hsize + size, hsize, cy); + +		MPN_COPY(prodp, tspace, hsize); +		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, +				   hsize); +		if (cy) +			mpihelp_add_1(prodp + size, prodp + size, size, 1); +	} +} + +int +mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, +			   mpi_ptr_t up, mpi_size_t usize, +			   mpi_ptr_t vp, mpi_size_t vsize, +			   struct karatsuba_ctx *ctx) +{ +	mpi_limb_t cy; + +	if (!ctx->tspace || ctx->tspace_size < vsize) { +		if (ctx->tspace) +			mpi_free_limb_space(ctx->tspace); +		ctx->tspace = mpi_alloc_limb_space(2 * vsize); +		if (!ctx->tspace) +			return -ENOMEM; +		ctx->tspace_size = vsize; +	} + +	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); + +	prodp += vsize; +	up += vsize; +	usize -= vsize; +	if (usize >= vsize) { +		if (!ctx->tp || ctx->tp_size < vsize) { +			if (ctx->tp) +				mpi_free_limb_space(ctx->tp); +			ctx->tp = mpi_alloc_limb_space(2 * vsize); +			if (!ctx->tp) { +				if (ctx->tspace) +					mpi_free_limb_space(ctx->tspace); +				ctx->tspace = NULL; +				return -ENOMEM; +			} +			ctx->tp_size = vsize; +		} + +		do { +			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); +			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); +			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, +				      cy); +			prodp += vsize; +			up += vsize; +			usize -= vsize; +		} while (usize >= vsize); +	} + +	if (usize) { +		if (usize < KARATSUBA_THRESHOLD) { +			mpi_limb_t tmp; +			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) +			    < 0) +				return -ENOMEM; +		} else { +			if (!ctx->next) { +				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); +				if (!ctx->next) +					return -ENOMEM; +			} +			if (mpihelp_mul_karatsuba_case(ctx->tspace, +						       vp, vsize, +						       up, usize, +						       ctx->next) < 0) +				return -ENOMEM; +		} + +		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); +		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); +	} + +	return 0; +} + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) +{ +	struct karatsuba_ctx *ctx2; + +	if (ctx->tp) +		mpi_free_limb_space(ctx->tp); +	if (ctx->tspace) +		mpi_free_limb_space(ctx->tspace); +	for (ctx = ctx->next; ctx; ctx = ctx2) { +		ctx2 = ctx->next; +		if (ctx->tp) +			mpi_free_limb_space(ctx->tp); +		if (ctx->tspace) +			mpi_free_limb_space(ctx->tspace); +		kfree(ctx); +	} +} + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP.  USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized.  Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + *    must be distinct from the multiplier and the multiplicand. + */ + +int +mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, +	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) +{ +	mpi_ptr_t prod_endp = prodp + usize + vsize - 1; +	mpi_limb_t cy; +	struct karatsuba_ctx ctx; + +	if (vsize < KARATSUBA_THRESHOLD) { +		mpi_size_t i; +		mpi_limb_t v_limb; + +		if (!vsize) { +			*_result = 0; +			return 0; +		} + +		/* Multiply by the first limb in V separately, as the result can be +		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */ +		v_limb = vp[0]; +		if (v_limb <= 1) { +			if (v_limb == 1) +				MPN_COPY(prodp, up, usize); +			else +				MPN_ZERO(prodp, usize); +			cy = 0; +		} else +			cy = mpihelp_mul_1(prodp, up, usize, v_limb); + +		prodp[usize] = cy; +		prodp++; + +		/* For each iteration in the outer loop, multiply one limb from +		 * U with one limb from V, and add it to PROD.  */ +		for (i = 1; i < vsize; i++) { +			v_limb = vp[i]; +			if (v_limb <= 1) { +				cy = 0; +				if (v_limb == 1) +					cy = mpihelp_add_n(prodp, prodp, up, +							   usize); +			} else +				cy = mpihelp_addmul_1(prodp, up, usize, v_limb); + +			prodp[usize] = cy; +			prodp++; +		} + +		*_result = cy; +		return 0; +	} + +	memset(&ctx, 0, sizeof ctx); +	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) +		return -ENOMEM; +	mpihelp_release_karatsuba_ctx(&ctx); +	*_result = *prod_endp; +	return 0; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c new file mode 100644 index 00000000000..bf076d281d4 --- /dev/null +++ b/lib/mpi/mpiutil.c @@ -0,0 +1,126 @@ +/* mpiutil.ac  -  Utility functions for MPI + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/**************** + * Note:  It was a bad idea to use the number of limbs to allocate + *	  because on a alpha the limbs are large but we normally need + *	  integers of n bits - So we should chnage this to bits (or bytes). + * + *	  But mpi_alloc is used in a lot of places :-) + */ +MPI mpi_alloc(unsigned nlimbs) +{ +	MPI a; + +	a = kmalloc(sizeof *a, GFP_KERNEL); +	if (!a) +		return a; + +	if (nlimbs) { +		a->d = mpi_alloc_limb_space(nlimbs); +		if (!a->d) { +			kfree(a); +			return NULL; +		} +	} else { +		a->d = NULL; +	} + +	a->alloced = nlimbs; +	a->nlimbs = 0; +	a->sign = 0; +	a->flags = 0; +	a->nbits = 0; +	return a; +} +EXPORT_SYMBOL_GPL(mpi_alloc); + +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) +{ +	size_t len = nlimbs * sizeof(mpi_limb_t); + +	if (!len) +		return NULL; + +	return kmalloc(len, GFP_KERNEL); +} + +void mpi_free_limb_space(mpi_ptr_t a) +{ +	if (!a) +		return; + +	kfree(a); +} + +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) +{ +	mpi_free_limb_space(a->d); +	a->d = ap; +	a->alloced = nlimbs; +} + +/**************** + * Resize the array of A to NLIMBS. the additional space is cleared + * (set to 0) [done by m_realloc()] + */ +int mpi_resize(MPI a, unsigned nlimbs) +{ +	void *p; + +	if (nlimbs <= a->alloced) +		return 0;	/* no need to do it */ + +	if (a->d) { +		p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); +		if (!p) +			return -ENOMEM; +		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); +		kfree(a->d); +		a->d = p; +	} else { +		a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); +		if (!a->d) +			return -ENOMEM; +	} +	a->alloced = nlimbs; +	return 0; +} + +void mpi_free(MPI a) +{ +	if (!a) +		return; + +	if (a->flags & 4) +		kfree(a->d); +	else +		mpi_free_limb_space(a->d); + +	if (a->flags & ~7) +		pr_info("invalid flag value in mpi\n"); +	kfree(a); +} +EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL");  | 
