aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am11
-rw-r--r--configure.ac37
-rw-r--r--cpu-miner.c74
-rw-r--r--miner.h46
-rw-r--r--sha256_4way.c488
-rw-r--r--sha256_cryptopp.c617
-rw-r--r--sha256_generic.c274
-rw-r--r--sha256_sse2_amd64.c133
-rw-r--r--sha256_via.c85
-rw-r--r--x86_64/.gitignore1
-rw-r--r--x86_64/Makefile.am8
-rw-r--r--x86_64/sha256_xmm_amd64.asm219
12 files changed, 1 insertions, 1992 deletions
diff --git a/Makefile.am b/Makefile.am
index b435083..5a6fd25 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,17 +14,8 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
bin_PROGRAMS = minerd
minerd_SOURCES = elist.h miner.h compat.h \
- cpu-miner.c util.c scrypt.c \
- sha256_generic.c sha256_4way.c sha256_via.c \
- sha256_cryptopp.c sha256_sse2_amd64.c
+ cpu-miner.c util.c scrypt.c
minerd_LDFLAGS = $(PTHREAD_FLAGS)
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
-if HAVE_x86_64
-if HAS_YASM
-SUBDIRS += x86_64
-minerd_LDADD += x86_64/libx8664.a
-AM_CFLAGS = -DHAS_YASM
-endif
-endif
diff --git a/configure.ac b/configure.ac
index 1747aef..3b0733f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,42 +54,6 @@ else
JANSSON_LIBS=-ljansson
fi
-dnl Find YASM
-has_yasm=false
-AC_PATH_PROG([YASM],[yasm],[false])
-if test "x$YASM" != "xfalse" ; then
- AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
- yasmver=`yasm --version | head -1 | cut -d\ -f2`
- yamajor=`echo $yasmver | cut -d. -f1`
- yaminor=`echo $yasmver | cut -d. -f2`
- yamini=`echo $yasmver | cut -d. -f3`
- if test "$yamajor" -ge "1" ; then
- if test "$yamajor" -eq "1" ; then
- if test "$yaminor" -ge "0" ; then
- if test "$yaminor" -eq "0"; then
- if test "$yamini" -ge "1"; then
- has_yasm=true
- fi
- else
- has_yasm=true
- fi
- fi
- fi
- else
- has_yasm=false
- fi
- if test "x$has_yasm" = "xtrue" ; then
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
-fi
-if test "x$has_yasm" = "xfalse" ; then
- AC_MSG_NOTICE([yasm is required for the sse2_64 algorithm. It will be skipped.])
-fi
-
-AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue])
-
PKG_PROG_PKG_CONFIG()
LIBCURL_CHECK_CONFIG(, 7.10.1, ,
@@ -103,7 +67,6 @@ AC_CONFIG_FILES([
Makefile
compat/Makefile
compat/jansson/Makefile
- x86_64/Makefile
])
AC_OUTPUT
diff --git a/cpu-miner.c b/cpu-miner.c
index 819d202..133183a 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -81,30 +81,10 @@ struct workio_cmd {
};
enum sha256_algos {
- ALGO_C, /* plain C */
- ALGO_4WAY, /* parallel SSE2 */
- ALGO_VIA, /* VIA padlock */
- ALGO_CRYPTOPP, /* Crypto++ (C) */
- ALGO_CRYPTOPP_ASM32, /* Crypto++ 32-bit assembly */
- ALGO_SSE2_64, /* SSE2 for x86_64 */
ALGO_SCRYPT, /* scrypt(1024,1,1) */
};
static const char *algo_names[] = {
- [ALGO_C] = "c",
-#ifdef WANT_SSE2_4WAY
- [ALGO_4WAY] = "4way",
-#endif
-#ifdef WANT_VIA_PADLOCK
- [ALGO_VIA] = "via",
-#endif
- [ALGO_CRYPTOPP] = "cryptopp",
-#ifdef WANT_CRYPTOPP_ASM32
- [ALGO_CRYPTOPP_ASM32] = "cryptopp_asm32",
-#endif
-#ifdef WANT_X8664_SSE2
- [ALGO_SSE2_64] = "sse2_64",
-#endif
[ALGO_SCRYPT] = "scrypt",
};
@@ -119,11 +99,7 @@ static int opt_fail_pause = 30;
int opt_scantime = 5;
static json_t *opt_config;
static const bool opt_time = true;
-#ifdef WANT_X8664_SSE2
-static enum sha256_algos opt_algo = ALGO_SCRYPT;
-#else
static enum sha256_algos opt_algo = ALGO_SCRYPT;
-#endif
static int opt_n_threads;
static int num_processors;
static char *rpc_url;
@@ -578,56 +554,6 @@ static void *miner_thread(void *userdata)
/* scan nonces for a proof-of-work hash */
switch (opt_algo) {
- case ALGO_C:
- rc = scanhash_c(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
- break;
-
-#ifdef WANT_X8664_SSE2
- case ALGO_SSE2_64: {
- unsigned int rc5 =
- scanhash_sse2_64(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash,
- work.target,
- max_nonce, &hashes_done);
- rc = (rc5 == -1) ? false : true;
- }
- break;
-#endif
-
-#ifdef WANT_SSE2_4WAY
- case ALGO_4WAY: {
- unsigned int rc4 =
- ScanHash_4WaySSE2(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash,
- work.target,
- max_nonce, &hashes_done);
- rc = (rc4 == -1) ? false : true;
- }
- break;
-#endif
-
-#ifdef WANT_VIA_PADLOCK
- case ALGO_VIA:
- rc = scanhash_via(thr_id, work.data, work.target,
- max_nonce, &hashes_done);
- break;
-#endif
- case ALGO_CRYPTOPP:
- rc = scanhash_cryptopp(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
- break;
-
-#ifdef WANT_CRYPTOPP_ASM32
- case ALGO_CRYPTOPP_ASM32:
- rc = scanhash_asm32(thr_id, work.midstate, work.data + 64,
- work.hash1, work.hash, work.target,
- max_nonce, &hashes_done);
- break;
-#endif
-
case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, work.data, scratchbuf,
work.target, max_nonce, &hashes_done);
diff --git a/miner.h b/miner.h
index a5bdfff..5d46209 100644
--- a/miner.h
+++ b/miner.h
@@ -37,18 +37,6 @@ void *alloca (size_t);
#endif
-#ifdef __SSE2__
-#define WANT_SSE2_4WAY 1
-#endif
-
-#if defined(__i386__) || defined(__x86_64__)
-#define WANT_VIA_PADLOCK 1
-#endif
-
-#if defined(__x86_64__) && defined(__SSE2__) && defined(HAS_YASM)
-#define WANT_X8664_SSE2 1
-#endif
-
#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
#define WANT_BUILTIN_BSWAP
#else
@@ -97,10 +85,6 @@ enum {
#define likely(expr) (expr)
#endif
-#if defined(__i386__)
-#define WANT_CRYPTOPP_ASM32
-#endif
-
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
@@ -143,36 +127,6 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
extern char *bin2hex(const unsigned char *p, size_t len);
extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
-extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate,
- unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone);
-
-extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate,
- unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone);
-
-extern bool scanhash_via(int, unsigned char *data_inout,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-
-extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done);
-extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata,
- unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone);
extern int scanhash_scrypt(int, unsigned char *pdata, unsigned char *scratchbuf,
const unsigned char *ptarget,
uint32_t max_nonce, unsigned long *nHashesDone);
diff --git a/sha256_4way.c b/sha256_4way.c
deleted file mode 100644
index 82dd6ca..0000000
--- a/sha256_4way.c
+++ /dev/null
@@ -1,488 +0,0 @@
-// Copyright (c) 2010 Satoshi Nakamoto
-// Distributed under the MIT/X11 software license, see the accompanying
-// file license.txt or http://www.opensource.org/licenses/mit-license.php.
-
-// tcatm's 4-way 128-bit SSE2 SHA-256
-
-#include "cpuminer-config.h"
-#include "miner.h"
-
-#ifdef WANT_SSE2_4WAY
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define NPAR 32
-
-static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
-
-static const unsigned int sha256_consts[] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) {
- return _mm_xor_si128(_mm_and_si128(b,c),_mm_andnot_si128(b,d));
-}
-
-static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) {
- return _mm_xor_si128(_mm_xor_si128(_mm_and_si128(b,c),_mm_and_si128(b,d)),_mm_and_si128(c,d));
-}
-
-static __attribute__((always_inline)) __m128i ROTR(__m128i x, const int n) {
- return _mm_or_si128(_mm_srli_epi32(x, n),_mm_slli_epi32(x, 32 - n));
-}
-
-static __attribute__((always_inline)) __m128i SHR(__m128i x, const int n) {
- return _mm_srli_epi32(x, n);
-}
-
-/* SHA256 Functions */
-#define BIGSIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 2),ROTR((x), 13)),ROTR((x), 22)))
-#define BIGSIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 6),ROTR((x), 11)),ROTR((x), 25)))
-
-
-#define SIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 7),ROTR((x), 18)), SHR((x), 3 )))
-#define SIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x),17),ROTR((x), 19)), SHR((x), 10)))
-
-static inline unsigned int store32(const __m128i x, int i) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x;
- return box.ret[i];
-}
-
-static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x;
- *x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0];
-}
-
-#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(x0, x1),_mm_add_epi32( x2,x3))
-#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4)
-
-#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
- T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w); \
-d = _mm_add_epi32(d, T1); \
-h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c)));
-
-static inline void dumpreg(__m128i x, char *msg) {
- union { unsigned int ret[4]; __m128i x; } box;
- box.x = x ;
- printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]);
-}
-
-#if 1
-#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \
- __func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i));
-#else
-#define dumpstate()
-#endif
-
-static const unsigned int pSHA256InitState[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-
-unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
- unsigned char *pdata,
- unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone)
-{
- unsigned int *nNonce_p = (unsigned int*)(pdata + 12);
- unsigned int nonce = 0;
-
- work_restart[thr_id].restart = 0;
-
- for (;;)
- {
- unsigned int thash[9][NPAR] __attribute__((aligned(128)));
- int j;
-
- nonce += NPAR;
- *nNonce_p = nonce;
-
- DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState);
-
- for (j = 0; j < NPAR; j++)
- {
- if (unlikely(thash[7][j] == 0))
- {
- int i;
-
- for (i = 0; i < 32/4; i++)
- ((unsigned int*)phash)[i] = thash[i][j];
-
- if (fulltest(phash, ptarget)) {
- *nHashesDone = nonce;
- *nNonce_p = nonce + j;
- return nonce + j;
- }
- }
- }
-
- if ((nonce >= max_nonce) || work_restart[thr_id].restart)
- {
- *nHashesDone = nonce;
- return -1;
- }
- }
-}
-
-
-static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
-{
- unsigned int* In = (unsigned int*)pin;
- unsigned int* Pad = (unsigned int*)pad;
- unsigned int* hPre = (unsigned int*)pre;
- unsigned int* hInit = (unsigned int*)init;
- unsigned int /* i, j, */ k;
-
- /* vectors used in calculation */
- __m128i w0, w1, w2, w3, w4, w5, w6, w7;
- __m128i w8, w9, w10, w11, w12, w13, w14, w15;
- __m128i T1;
- __m128i a, b, c, d, e, f, g, h;
- __m128i nonce, preNonce;
-
- /* nonce offset for vector */
- __m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000);
-
-
- preNonce = _mm_add_epi32(_mm_set1_epi32(In[3]), offset);
-
- for(k = 0; k<NPAR; k+=4) {
- w0 = _mm_set1_epi32(In[0]);
- w1 = _mm_set1_epi32(In[1]);
- w2 = _mm_set1_epi32(In[2]);
- //w3 = _mm_set1_epi32(In[3]); nonce will be later hacked into the hash
- w4 = _mm_set1_epi32(In[4]);
- w5 = _mm_set1_epi32(In[5]);
- w6 = _mm_set1_epi32(In[6]);
- w7 = _mm_set1_epi32(In[7]);
- w8 = _mm_set1_epi32(In[8]);
- w9 = _mm_set1_epi32(In[9]);
- w10 = _mm_set1_epi32(In[10]);
- w11 = _mm_set1_epi32(In[11]);
- w12 = _mm_set1_epi32(In[12]);
- w13 = _mm_set1_epi32(In[13]);
- w14 = _mm_set1_epi32(In[14]);
- w15 = _mm_set1_epi32(In[15]);
-
- /* hack nonce into lowest byte of w3 */
- nonce = _mm_add_epi32(preNonce, _mm_set1_epi32(k));
- w3 = nonce;
-
- a = _mm_set1_epi32(hPre[0]);
- b = _mm_set1_epi32(hPre[1]);
- c = _mm_set1_epi32(hPre[2]);
- d = _mm_set1_epi32(hPre[3]);
- e = _mm_set1_epi32(hPre[4]);
- f = _mm_set1_epi32(hPre[5]);
- g = _mm_set1_epi32(hPre[6]);
- h = _mm_set1_epi32(hPre[7]);
-
- SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
- SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
- SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
- SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
- SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
- SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
- SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
- SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
- SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
- SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
- SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
- SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
- SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
- SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
- SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
- SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-#define store_load(x, i, dest) \
- T1 = _mm_set1_epi32((hPre)[i]); \
- dest = _mm_add_epi32(T1, x);
-
- store_load(a, 0, w0);
- store_load(b, 1, w1);
- store_load(c, 2, w2);
- store_load(d, 3, w3);
- store_load(e, 4, w4);
- store_load(f, 5, w5);
- store_load(g, 6, w6);
- store_load(h, 7, w7);
-
- w8 = _mm_set1_epi32(Pad[8]);
- w9 = _mm_set1_epi32(Pad[9]);
- w10 = _mm_set1_epi32(Pad[10]);
- w11 = _mm_set1_epi32(Pad[11]);
- w12 = _mm_set1_epi32(Pad[12]);
- w13 = _mm_set1_epi32(Pad[13]);
- w14 = _mm_set1_epi32(Pad[14]);
- w15 = _mm_set1_epi32(Pad[15]);
-
- a = _mm_set1_epi32(hInit[0]);
- b = _mm_set1_epi32(hInit[1]);
- c = _mm_set1_epi32(hInit[2]);
- d = _mm_set1_epi32(hInit[3]);
- e = _mm_set1_epi32(hInit[4]);
- f = _mm_set1_epi32(hInit[5]);
- g = _mm_set1_epi32(hInit[6]);
- h = _mm_set1_epi32(hInit[7]);
-
- SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
- SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
- SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
- SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
- SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
- SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
- SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
- SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
- SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
- SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
- SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
- SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
- SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
- SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
- SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
- SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
- w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
- SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
- w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
- SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
- w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
- SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
- w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
- SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
- w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
- SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
- w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
- SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
- w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
- SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
- w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
- SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
- w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
- SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
- w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
- SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
- w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
- SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
- w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
- SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
- w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
- SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-
- /* Skip last 3-rounds; not necessary for H==0 */
-#if 0
- w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
- SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
- w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
- SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
- w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
- SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-#endif
-
- /* store resulsts directly in thash */
-#define store_2(x,i) \
- w0 = _mm_set1_epi32(hInit[i]); \
- *(__m128i *)&(thash)[i][0+k] = _mm_add_epi32(w0, x);
-
- store_2(a, 0);
- store_2(b, 1);
- store_2(c, 2);
- store_2(d, 3);
- store_2(e, 4);
- store_2(f, 5);
- store_2(g, 6);
- store_2(h, 7);
- *(__m128i *)&(thash)[8][0+k] = nonce;
- }
-
-}
-
-#endif /* WANT_SSE2_4WAY */
diff --git a/sha256_cryptopp.c b/sha256_cryptopp.c
deleted file mode 100644
index c3eb29f..0000000
--- a/sha256_cryptopp.c
+++ /dev/null
@@ -1,617 +0,0 @@
-
-#include "cpuminer-config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "miner.h"
-
-typedef uint32_t word32;
-
-static word32 rotrFixed(word32 word, unsigned int shift)
-{
- return (word >> shift) | (word << (32 - shift));
-}
-
-#define blk0(i) (W[i] = data[i])
-
-static const word32 SHA256_K[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
-
-#define Ch(x,y,z) (z^(x&(y^z)))
-#define Maj(x,y,z) (y^((x^y)&(y^z)))
-
-#define a(i) T[(0-i)&7]
-#define b(i) T[(1-i)&7]
-#define c(i) T[(2-i)&7]
-#define d(i) T[(3-i)&7]
-#define e(i) T[(4-i)&7]
-#define f(i) T[(5-i)&7]
-#define g(i) T[(6-i)&7]
-#define h(i) T[(7-i)&7]
-
-#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
- d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
-
-// for SHA256
-#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
-#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
-#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
-#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
-
-static void SHA256_Transform(word32 *state, const word32 *data)
-{
- word32 W[16] = { };
- word32 T[8];
- unsigned int j;
-
- /* Copy context->state[] to working vars */
- memcpy(T, state, sizeof(T));
- /* 64 operations, partially loop unrolled */
- for (j=0; j<64; j+=16)
- {
- R( 0); R( 1); R( 2); R( 3);
- R( 4); R( 5); R( 6); R( 7);
- R( 8); R( 9); R(10); R(11);
- R(12); R(13); R(14); R(15);
- }
- /* Add the working vars back into context.state[] */
- state[0] += a(0);
- state[1] += b(0);
- state[2] += c(0);
- state[3] += d(0);
- state[4] += e(0);
- state[5] += f(0);
- state[6] += g(0);
- state[7] += h(0);
-}
-
-static void runhash(void *state, const void *input, const void *init)
-{
- memcpy(state, init, 32);
- SHA256_Transform(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_cryptopp(int thr_id, const unsigned char *midstate,
- unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- uint32_t *hash32 = (uint32_t *) hash;
- uint32_t *nonce = (uint32_t *)(data + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
-
- work_restart[thr_id].restart = 0;
-
- while (1) {
- n++;
- *nonce = n;
-
- runhash(hash1, data, midstate);
- runhash(hash, hash1, sha256_init_state);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
-#if defined(WANT_CRYPTOPP_ASM32)
-
-#define CRYPTOPP_FASTCALL
-#define CRYPTOPP_BOOL_X86 1
-#define CRYPTOPP_BOOL_X64 0
-#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- #define AS1(x) x*newline*
- #define AS2(x, y) x, y*newline*
- #define AS3(x, y, z) x, y, z*newline*
- #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
- #define ASL(x) label##x:*newline*
- #define ASJ(x, y, z) x label##y*newline*
- #define ASC(x, y) x label##y*newline*
- #define AS_HEX(y) 0##y##h
-#elif defined(_MSC_VER) || defined(__BORLANDC__)
- #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
- #define AS1(x) __asm {x}
- #define AS2(x, y) __asm {x, y}
- #define AS3(x, y, z) __asm {x, y, z}
- #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
- #define ASL(x) __asm {label##x:}
- #define ASJ(x, y, z) __asm {x label##y}
- #define ASC(x, y) __asm {x label##y}
- #define CRYPTOPP_NAKED __declspec(naked)
- #define AS_HEX(y) 0x##y
-#else
- #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
- // define these in two steps to allow arguments to be expanded
- #define GNU_AS1(x) #x ";"
- #define GNU_AS2(x, y) #x ", " #y ";"
- #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
- #define GNU_ASL(x) "\n" #x ":"
- #define GNU_ASJ(x, y, z) #x " " #y #z ";"
- #define AS1(x) GNU_AS1(x)
- #define AS2(x, y) GNU_AS2(x, y)
- #define AS3(x, y, z) GNU_AS3(x, y, z)
- #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
- #define ASL(x) GNU_ASL(x)
- #define ASJ(x, y, z) GNU_ASJ(x, y, z)
- #define ASC(x, y) #x " " #y ";"
- #define CRYPTOPP_NAKED
- #define AS_HEX(y) 0x##y
-#endif
-
-#define IF0(y)
-#define IF1(y) y
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-#define ASM_MOD(x, y) ((x) MOD (y))
-#define XMMWORD_PTR XMMWORD PTR
-#else
-// GNU assembler doesn't seem to have mod operator
-#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
-// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
-#define XMMWORD_PTR
-#endif
-
-#if CRYPTOPP_BOOL_X86
- #define AS_REG_1 ecx
- #define AS_REG_2 edx
- #define AS_REG_3 esi
- #define AS_REG_4 edi
- #define AS_REG_5 eax
- #define AS_REG_6 ebx
- #define AS_REG_7 ebp
- #define AS_REG_1d ecx
- #define AS_REG_2d edx
- #define AS_REG_3d esi
- #define AS_REG_4d edi
- #define AS_REG_5d eax
- #define AS_REG_6d ebx
- #define AS_REG_7d ebp
- #define WORD_SZ 4
- #define WORD_REG(x) e##x
- #define WORD_PTR DWORD PTR
- #define AS_PUSH_IF86(x) AS1(push e##x)
- #define AS_POP_IF86(x) AS1(pop e##x)
- #define AS_JCXZ jecxz
-#elif CRYPTOPP_BOOL_X64
- #ifdef CRYPTOPP_GENERATE_X64_MASM
- #define AS_REG_1 rcx
- #define AS_REG_2 rdx
- #define AS_REG_3 r8
- #define AS_REG_4 r9
- #define AS_REG_5 rax
- #define AS_REG_6 r10
- #define AS_REG_7 r11
- #define AS_REG_1d ecx
- #define AS_REG_2d edx
- #define AS_REG_3d r8d
- #define AS_REG_4d r9d
- #define AS_REG_5d eax
- #define AS_REG_6d r10d
- #define AS_REG_7d r11d
- #else
- #define AS_REG_1 rdi
- #define AS_REG_2 rsi
- #define AS_REG_3 rdx
- #define AS_REG_4 rcx
- #define AS_REG_5 r8
- #define AS_REG_6 r9
- #define AS_REG_7 r10
- #define AS_REG_1d edi
- #define AS_REG_2d esi
- #define AS_REG_3d edx
- #define AS_REG_4d ecx
- #define AS_REG_5d r8d
- #define AS_REG_6d r9d
- #define AS_REG_7d r10d
- #endif
- #define WORD_SZ 8
- #define WORD_REG(x) r##x
- #define WORD_PTR QWORD PTR
- #define AS_PUSH_IF86(x)
- #define AS_POP_IF86(x)
- #define AS_JCXZ jrcxz
-#endif
-
-static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
- , ... // VC60 workaround: prevent VC 6 from inlining this function
-#endif
- )
-{
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
- AS2(mov ecx, [state])
- AS2(mov edx, [data])
-#endif
-
- #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
- #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
- #define G(i) H(i+1)
- #define F(i) H(i+2)
- #define E(i) H(i+3)
- #define D(i) H(i+4)
- #define C(i) H(i+5)
- #define B(i) H(i+6)
- #define A(i) H(i+7)
- #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
- #define Wt_2(i) Wt((i)-2)
- #define Wt_15(i) Wt((i)-15)
- #define Wt_7(i) Wt((i)-7)
- #define K_END [BASE+8*4+16*4+0*WORD_SZ]
- #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
- #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
- #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
- #define Kt(i) WORD_REG(si)+(i)*4
-#if CRYPTOPP_BOOL_X86
- #define BASE esp+4
-#elif defined(__GNUC__)
- #define BASE r8
-#else
- #define BASE rsp
-#endif
-
-#define RA0(i, edx, edi) \
- AS2( add edx, [Kt(i)] )\
- AS2( add edx, [Wt(i)] )\
- AS2( add edx, H(i) )\
-
-#define RA1(i, edx, edi)
-
-#define RB0(i, edx, edi)
-
-#define RB1(i, edx, edi) \
- AS2( mov AS_REG_7d, [Wt_2(i)] )\
- AS2( mov edi, [Wt_15(i)])\
- AS2( mov ebx, AS_REG_7d )\
- AS2( shr AS_REG_7d, 10 )\
- AS2( ror ebx, 17 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( ror ebx, 2 )\
- AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
- AS2( add ebx, [Wt_7(i)])\
- AS2( mov AS_REG_7d, edi )\
- AS2( shr AS_REG_7d, 3 )\
- AS2( ror edi, 7 )\
- AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
- AS2( xor AS_REG_7d, edi )\
- AS2( add edx, [Kt(i)])\
- AS2( ror edi, 11 )\
- AS2( add edx, H(i) )\
- AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
- AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
- AS2( mov [Wt(i)], AS_REG_7d)\
- AS2( add edx, AS_REG_7d )\
-
-#define ROUND(i, r, eax, ecx, edi, edx)\
- /* in: edi = E */\
- /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
- AS2( mov edx, F(i) )\
- AS2( xor edx, G(i) )\
- AS2( and edx, edi )\
- AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
- AS2( mov AS_REG_7d, edi )\
- AS2( ror edi, 6 )\
- AS2( ror AS_REG_7d, 25 )\
- RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
- AS2( xor AS_REG_7d, edi )\
- AS2( ror edi, 5 )\
- AS2( xor AS_REG_7d, edi )/* S1(E) */\
- AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
- RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
- /* in: ecx = A, eax = B^C, edx = T1 */\
- /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
- AS2( mov ebx, ecx )\
- AS2( xor ecx, B(i) )/* A^B */\
- AS2( and eax, ecx )\
- AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
- AS2( mov AS_REG_7d, ebx )\
- AS2( ror ebx, 2 )\
- AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
- AS2( add edx, D(i) )\
- AS2( mov D(i), edx )\
- AS2( ror AS_REG_7d, 22 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( ror ebx, 11 )\
- AS2( xor AS_REG_7d, ebx )\
- AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
- AS2( mov H(i), eax )\
-
-#define SWAP_COPY(i) \
- AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
- AS1( bswap WORD_REG(bx))\
- AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
-
-#if defined(__GNUC__)
- #if CRYPTOPP_BOOL_X64
- FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace;
- #endif
- __asm__ __volatile__
- (
- #if CRYPTOPP_BOOL_X64
- "lea %4, %%r8;"
- #endif
- ".intel_syntax noprefix;"
-#elif defined(CRYPTOPP_GENERATE_X64_MASM)
- ALIGN 8
- X86_SHA256_HashBlocks PROC FRAME
- rex_push_reg rsi
- push_reg rdi
- push_reg rbx
- push_reg rbp
- alloc_stack(LOCALS_SIZE+8)
- .endprolog
- mov rdi, r8
- lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
-#endif
-
-#if CRYPTOPP_BOOL_X86
- #ifndef __GNUC__
- AS2( mov edi, [len])
- AS2( lea WORD_REG(si), [SHA256_K+48*4])
- #endif
- #if !defined(_MSC_VER) || (_MSC_VER < 1400)
- AS_PUSH_IF86(bx)
- #endif
-
- AS_PUSH_IF86(bp)
- AS2( mov ebx, esp)
- AS2( and esp, -16)
- AS2( sub WORD_REG(sp), LOCALS_SIZE)
- AS_PUSH_IF86(bx)
-#endif
- AS2( mov STATE_SAVE, WORD_REG(cx))
- AS2( mov DATA_SAVE, WORD_REG(dx))
- AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
- AS2( mov DATA_END, WORD_REG(ax))
- AS2( mov K_END, WORD_REG(si))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
- AS2( test edi, 1)
- ASJ( jnz, 2, f)
- AS1( dec DWORD PTR K_END)
-#endif
- AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
- AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASJ( jmp, 0, f)
-#endif
- ASL(2) // non-SSE2
- AS2( mov esi, ecx)
- AS2( lea edi, A(0))
- AS2( mov ecx, 8)
- AS1( rep movsd)
- AS2( mov esi, K_END)
- ASJ( jmp, 3, f)
-#endif
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASL(0)
- AS2( movdqa E(0), xmm1)
- AS2( movdqa A(0), xmm0)
-#endif
-#if CRYPTOPP_BOOL_X86
- ASL(3)
-#endif
- AS2( sub WORD_REG(si), 48*4)
- SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
- SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
-#if CRYPTOPP_BOOL_X86
- SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
- SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
-#endif
- AS2( mov edi, E(0)) // E
- AS2( mov eax, B(0)) // B
- AS2( xor eax, C(0)) // B^C
- AS2( mov ecx, A(0)) // A
-
- ROUND(0, 0, eax, ecx, edi, edx)
- ROUND(1, 0, ecx, eax, edx, edi)
- ROUND(2, 0, eax, ecx, edi, edx)
- ROUND(3, 0, ecx, eax, edx, edi)
- ROUND(4, 0, eax, ecx, edi, edx)
- ROUND(5, 0, ecx, eax, edx, edi)
- ROUND(6, 0, eax, ecx, edi, edx)
- ROUND(7, 0, ecx, eax, edx, edi)
- ROUND(8, 0, eax, ecx, edi, edx)
- ROUND(9, 0, ecx, eax, edx, edi)
- ROUND(10, 0, eax, ecx, edi, edx)
- ROUND(11, 0, ecx, eax, edx, edi)
- ROUND(12, 0, eax, ecx, edi, edx)
- ROUND(13, 0, ecx, eax, edx, edi)
- ROUND(14, 0, eax, ecx, edi, edx)
- ROUND(15, 0, ecx, eax, edx, edi)
-
- ASL(1)
- AS2(add WORD_REG(si), 4*16)
- ROUND(0, 1, eax, ecx, edi, edx)
- ROUND(1, 1, ecx, eax, edx, edi)
- ROUND(2, 1, eax, ecx, edi, edx)
- ROUND(3, 1, ecx, eax, edx, edi)
- ROUND(4, 1, eax, ecx, edi, edx)
- ROUND(5, 1, ecx, eax, edx, edi)
- ROUND(6, 1, eax, ecx, edi, edx)
- ROUND(7, 1, ecx, eax, edx, edi)
- ROUND(8, 1, eax, ecx, edi, edx)
- ROUND(9, 1, ecx, eax, edx, edi)
- ROUND(10, 1, eax, ecx, edi, edx)
- ROUND(11, 1, ecx, eax, edx, edi)
- ROUND(12, 1, eax, ecx, edi, edx)
- ROUND(13, 1, ecx, eax, edx, edi)
- ROUND(14, 1, eax, ecx, edi, edx)
- ROUND(15, 1, ecx, eax, edx, edi)
- AS2( cmp WORD_REG(si), K_END)
- ASJ( jb, 1, b)
-
- AS2( mov WORD_REG(dx), DATA_SAVE)
- AS2( add WORD_REG(dx), 64)
- AS2( mov AS_REG_7, STATE_SAVE)
- AS2( mov DATA_SAVE, WORD_REG(dx))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
- AS2( test DWORD PTR K_END, 1)
- ASJ( jz, 4, f)
-#endif
- AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
- AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
- AS2( paddd xmm1, E(0))
- AS2( paddd xmm0, A(0))
- AS2( movdqa [AS_REG_7+1*16], xmm1)
- AS2( movdqa [AS_REG_7+0*16], xmm0)
- AS2( cmp WORD_REG(dx), DATA_END)
- ASJ( jb, 0, b)
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASJ( jmp, 5, f)
- ASL(4) // non-SSE2
-#endif
- AS2( add [AS_REG_7+0*4], ecx) // A
- AS2( add [AS_REG_7+4*4], edi) // E
- AS2( mov eax, B(0))
- AS2( mov ebx, C(0))
- AS2( mov ecx, D(0))
- AS2( add [AS_REG_7+1*4], eax)
- AS2( add [AS_REG_7+2*4], ebx)
- AS2( add [AS_REG_7+3*4], ecx)
- AS2( mov eax, F(0))
- AS2( mov ebx, G(0))
- AS2( mov ecx, H(0))
- AS2( add [AS_REG_7+5*4], eax)
- AS2( add [AS_REG_7+6*4], ebx)
- AS2( add [AS_REG_7+7*4], ecx)
- AS2( mov ecx, AS_REG_7d)
- AS2( cmp WORD_REG(dx), DATA_END)
- ASJ( jb, 2, b)
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
- ASL(5)
-#endif
-#endif
-
- AS_POP_IF86(sp)
- AS_POP_IF86(bp)
- #if !defined(_MSC_VER) || (_MSC_VER < 1400)
- AS_POP_IF86(bx)
- #endif
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- add rsp, LOCALS_SIZE+8
- pop rbp
- pop rbx
- pop rdi
- pop rsi
- ret
- X86_SHA256_HashBlocks ENDP
-#endif
-
-#ifdef __GNUC__
- ".att_syntax prefix;"
- :
- : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
- #if CRYPTOPP_BOOL_X64
- , "m" (workspace[0])
- #endif
- : "memory", "cc", "%eax"
- #if CRYPTOPP_BOOL_X64
- , "%rbx", "%r8", "%r10"
- #endif
- );
-#endif
-}
-
-static inline bool HasSSE2(void) { return false; }
-
-static void SHA256_Transform32(word32 *state, const word32 *data)
-{
- word32 W[16];
- int i;
-
- for (i = 0; i < 16; i++)
- W[i] = swab32(((word32 *)(data))[i]);
-
- X86_SHA256_HashBlocks(state, W, 16 * 4);
-}
-
-static void runhash32(void *state, const void *input, const void *init)
-{
- memcpy(state, init, 32);
- SHA256_Transform32(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_asm32(int thr_id, const unsigned char *midstate,
- unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- uint32_t *hash32 = (uint32_t *) hash;
- uint32_t *nonce = (uint32_t *)(data + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
-
- work_restart[thr_id].restart = 0;
-
- while (1) {
- n++;
- *nonce = n;
-
- runhash32(hash1, data, midstate);
- runhash32(hash, hash1, sha256_init_state);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
- fulltest(hash, target);
-
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
-#endif // #if defined(WANT_CRYPTOPP_ASM32)
diff --git a/sha256_generic.c b/sha256_generic.c
deleted file mode 100644
index 789b20e..0000000
--- a/sha256_generic.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Cryptographic API.
- *
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "cpuminer-config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "miner.h"
-
-typedef uint32_t u32;
-typedef uint8_t u8;
-
-static inline u32 ror32(u32 word, unsigned int shift)
-{
- return (word >> shift) | (word << (32 - shift));
-}
-
-static inline u32 Ch(u32 x, u32 y, u32 z)
-{
- return z ^ (x & (y ^ z));
-}
-
-static inline u32 Maj(u32 x, u32 y, u32 z)
-{
- return (x & y) | (z & (x | y));
-}
-
-#define e0(x) (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22))
-#define e1(x) (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25))
-#define s0(x) (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
-#define s1(x) (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
- /* byteswap is commented out, because bitcoin input
- * is already big-endian
- */
- W[I] = /* ntohl */ ( ((u32*)(input))[I] );
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
- W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
- u32 a, b, c, d, e, f, g, h, t1, t2;
- u32 W[64];
- int i;
-
- /* load the input */
- for (i = 0; i < 16; i++)
- LOAD_OP(i, W, input);
-
- /* now blend */
- for (i = 16; i < 64; i++)
- BLEND_OP(i, W);
-
- /* load the state into our registers */
- a=state[0]; b=state[1]; c=state[2]; d=state[3];
- e=state[4]; f=state[5]; g=state[6]; h=state[7];
-
- /* now iterate */
- t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56];
- t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
- t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57];
- t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
- t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58];
- t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
- t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59];
- t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
- t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60];
- t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
- t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61];
- t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
- t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62];
- t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
- t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63];
- t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
-
- state[0] += a; state[1] += b; state[2] += c; state[3] += d;
- state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
-#if 0
- /* clear any sensitive info... */
- a = b = c = d = e = f = g = h = t1 = t2 = 0;
- memset(W, 0, 64 * sizeof(u32));
-#endif
-}
-
-static void runhash(void *state, const void *input, const void *init)
-{
- memcpy(state, init, 32);
- sha256_transform(state, input);
-}
-
-const uint32_t sha256_init_state[8] = {
- 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data,
- unsigned char *hash1, unsigned char *hash,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- uint32_t *hash32 = (uint32_t *) hash;
- uint32_t *nonce = (uint32_t *)(data + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
-
- work_restart[thr_id].restart = 0;
-
- while (1) {
- n++;
- *nonce = n;
-
- runhash(hash1, data, midstate);
- runhash(hash, hash1, sha256_init_state);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
diff --git a/sha256_sse2_amd64.c b/sha256_sse2_amd64.c
deleted file mode 100644
index 3aa154c..0000000
--- a/sha256_sse2_amd64.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * SHA-256 driver for ASM routine for x86_64 on Linux
- * Copyright (c) Mark Crichton <crichton@gimp.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "cpuminer-config.h"
-
-#include "miner.h"
-
-#ifdef WANT_X8664_SSE2
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-extern void CalcSha256_x64(__m128i *res, __m128i *data, uint32_t init[8]);
-
-uint32_t g_sha256_k[] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-uint32_t g_sha256_hinit[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-__m128i g_4sha256_k[64];
-
-int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
- unsigned char *pdata,
- unsigned char *phash1, unsigned char *phash,
- const unsigned char *ptarget,
- uint32_t max_nonce, unsigned long *nHashesDone)
-{
- uint32_t *nNonce_p = (uint32_t *)(pdata + 12);
- uint32_t nonce = 0;
- uint32_t m_midstate[8], m_w[16], m_w1[16];
- __m128i m_4w[64], m_4hash[64], m_4hash1[64];
- __m128i offset;
- int i;
-
- work_restart[thr_id].restart = 0;
-
- /* For debugging */
- union {
- __m128i m;
- uint32_t i[4];
- } mi;
-
- /* Message expansion */
- memcpy(m_midstate, pmidstate, sizeof(m_midstate));
- memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
- memcpy(m_w1, phash1, sizeof(m_w1));
- memset(m_4hash, 0, sizeof(m_4hash));
-
- /* Transmongrify */
- for (i = 0; i < 16; i++)
- m_4w[i] = _mm_set1_epi32(m_w[i]);
-
- for (i = 0; i < 16; i++)
- m_4hash1[i] = _mm_set1_epi32(m_w1[i]);
-
- for (i = 0; i < 64; i++)
- g_4sha256_k[i] = _mm_set1_epi32(g_sha256_k[i]);
-
- offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0);
-
- for (;;)
- {
- int j;
-
- m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce));
-
- /* Some optimization can be done here W.R.T. precalculating some hash */
- CalcSha256_x64(m_4hash1, m_4w, m_midstate);
- CalcSha256_x64(m_4hash, m_4hash1, g_sha256_hinit);
-
- for (j = 0; j < 4; j++) {
- mi.m = m_4hash[7];
- if (unlikely(mi.i[j] == 0))
- break;
- }
-
- /* If j = true, we found a hit...so check it */
- /* Use the C version for a check... */
- if (unlikely(j != 4)) {
- for (i = 0; i < 8; i++) {
- mi.m = m_4hash[i];
- *(uint32_t *)&(phash)[i*4] = mi.i[j];
- }
-
- if (fulltest(phash, ptarget)) {
- *nHashesDone = nonce;
- *nNonce_p = nonce + j;
- return nonce + j;
- }
- }
-
- nonce += 4;
-
- if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
- {
- *nHashesDone = nonce;
- return -1;
- }
- }
-}
-
-#endif /* WANT_X8664_SSE2 */
-
diff --git a/sha256_via.c b/sha256_via.c
deleted file mode 100644
index 1f0596c..0000000
--- a/sha256_via.c
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#include "cpuminer-config.h"
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <sys/time.h>
-#include "miner.h"
-
-#ifdef WANT_VIA_PADLOCK
-
-static void via_sha256(void *hash, void *buf, unsigned len)
-{
- unsigned stat = 0;
- asm volatile(".byte 0xf3, 0x0f, 0xa6, 0xd0"
- :"+S"(buf), "+a"(stat)
- :"c"(len), "D" (hash)
- :"memory");
-}
-
-bool scanhash_via(int thr_id, unsigned char *data_inout,
- const unsigned char *target,
- uint32_t max_nonce, unsigned long *hashes_done)
-{
- unsigned char data[128] __attribute__((aligned(128)));
- unsigned char tmp_hash[32] __attribute__((aligned(128)));
- unsigned char tmp_hash1[32] __attribute__((aligned(128)));
- uint32_t *data32 = (uint32_t *) data;
- uint32_t *hash32 = (uint32_t *) tmp_hash;
- uint32_t *nonce = (uint32_t *)(data + 64 + 12);
- uint32_t n = 0;
- unsigned long stat_ctr = 0;
- int i;
-
- work_restart[thr_id].restart = 0;
-
- /* bitcoin gives us big endian input, but via wants LE,
- * so we reverse the swapping bitcoin has already done (extra work)
- * in order to permit the hardware to swap everything
- * back to BE again (extra work).
- */
- for (i = 0; i < 128/4; i++)
- data32[i] = swab32(((uint32_t *)data_inout)[i]);
-
- while (1) {
- n++;
- *nonce = n;
-
- /* first SHA256 transform */
- memcpy(tmp_hash1, sha256_init_state, 32);
- via_sha256(tmp_hash1, data, 80); /* or maybe 128? */
-
- for (i = 0; i < 32/4; i++)
- ((uint32_t *)tmp_hash1)[i] =
- swab32(((uint32_t *)tmp_hash1)[i]);
-
- /* second SHA256 transform */
- memcpy(tmp_hash, sha256_init_state, 32);
- via_sha256(tmp_hash, tmp_hash1, 32);
-
- stat_ctr++;
-
- if (unlikely((hash32[7] == 0) && fulltest(tmp_hash, target))) {
- /* swap nonce'd data back into original storage area;
- * TODO: only swap back the nonce, rather than all data
- */
- for (i = 0; i < 128/4; i++) {
- uint32_t *dout32 = (uint32_t *) data_inout;
- dout32[i] = swab32(data32[i]);
- }
-
- *hashes_done = stat_ctr;
- return true;
- }
-
- if ((n >= max_nonce) || work_restart[thr_id].restart) {
- *hashes_done = stat_ctr;
- return false;
- }
- }
-}
-
-#endif /* WANT_VIA_PADLOCK */
-
diff --git a/x86_64/.gitignore b/x86_64/.gitignore
deleted file mode 100644
index a966652..0000000
--- a/x86_64/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-libx8664.a
diff --git a/x86_64/Makefile.am b/x86_64/Makefile.am
deleted file mode 100644
index c74ddd2..0000000
--- a/x86_64/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-noinst_LIBRARIES = libx8664.a
-
-SUFFIXES = .asm
-
-libx8664_a_SOURCES = sha256_xmm_amd64.asm
-
-.asm.o:
- $(YASM) -f elf64 $<
diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm
deleted file mode 100644
index 4fa0ea9..0000000
--- a/x86_64/sha256_xmm_amd64.asm
+++ /dev/null
@@ -1,219 +0,0 @@
-;; SHA-256 for X86-64 for Linux, based off of:
-
-; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
-; Version 2011
-; This software is Public Domain
-
-; SHA-256 CPU SSE cruncher for Bitcoin Miner
-
-ALIGN 32
-BITS 64
-
-%define hash rdi
-%define data rsi
-%define init rdx
-
-extern g_4sha256_k
-
-global CalcSha256_x64
-; CalcSha256 hash(rdi), data(rsi), init(rdx)
-CalcSha256_x64:
-
- push rbx
-
-LAB_NEXT_NONCE:
- mov r11, data
-; mov rax, pnonce
-; mov eax, [rax]
-; mov [rbx+3*16], eax
-; inc eax
-; mov [rbx+3*16+4], eax
-; inc eax
-; mov [rbx+3*16+8], eax
-; inc eax
-; mov [rbx+3*16+12], eax
-
- mov rcx, 64*4 ;rcx is # of SHA-2 rounds
- mov rax, 16*4 ;rax is where we expand to
-
-LAB_SHA:
- push rcx
- lea rcx, qword [r11+rcx*4]
- lea r11, qword [r11+rax*4]
-LAB_CALC:
- movdqa xmm0, [r11-15*16]
- movdqa xmm2, xmm0 ; (Rotr32(w_15, 7) ^ Rotr32(w_15, 18) ^ (w_15 >> 3))
- psrld xmm0, 3
- movdqa xmm1, xmm0
- pslld xmm2, 14
- psrld xmm1, 4
- pxor xmm0, xmm1
- pxor xmm0, xmm2
- pslld xmm2, 11
- psrld xmm1, 11
- pxor xmm0, xmm1
- pxor xmm0, xmm2
-
- paddd xmm0, [r11-16*16]
-
- movdqa xmm3, [r11-2*16]
- movdqa xmm2, xmm3 ; (Rotr32(w_2, 17) ^ Rotr32(w_2, 19) ^ (w_2 >> 10))
- psrld xmm3, 10
- movdqa xmm1, xmm3
- pslld xmm2, 13
- psrld xmm1, 7
- pxor xmm3, xmm1
- pxor xmm3, xmm2
- pslld xmm2, 2
- psrld xmm1, 2
- pxor xmm3, xmm1
- pxor xmm3, xmm2
- paddd xmm0, xmm3
-
- paddd xmm0, [r11-7*16]
- movdqa [r11], xmm0
- add r11, 16
- cmp r11, rcx
- jb LAB_CALC
- pop rcx
-
- mov rax, 0
-
-; Load the init values of the message into the hash.
-
- movd xmm0, dword [rdx+4*4] ; xmm0 == e
- pshufd xmm0, xmm0, 0
- movd xmm3, dword [rdx+3*4] ; xmm3 == d
- pshufd xmm3, xmm3, 0
- movd xmm4, dword [rdx+2*4] ; xmm4 == c
- pshufd xmm4, xmm4, 0
- movd xmm5, dword [rdx+1*4] ; xmm5 == b
- pshufd xmm5, xmm5, 0
- movd xmm7, dword [rdx+0*4] ; xmm7 == a
- pshufd xmm7, xmm7, 0
- movd xmm8, dword [rdx+5*4] ; xmm8 == f
- pshufd xmm8, xmm8, 0
- movd xmm9, dword [rdx+6*4] ; xmm9 == g
- pshufd xmm9, xmm9, 0
- movd xmm10, dword [rdx+7*4] ; xmm10 == h
- pshufd xmm10, xmm10, 0
-
-LAB_LOOP:
-
-;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
-
- movdqa xmm6, [rsi+rax*4]
- paddd xmm6, g_4sha256_k[rax*4]
- add rax, 4
-
- paddd xmm6, xmm10 ; +h
-
- movdqa xmm1, xmm0
- movdqa xmm2, xmm9
- pandn xmm1, xmm2 ; ~e & g
-
- movdqa xmm10, xmm2 ; h = g
- movdqa xmm2, xmm8 ; f
- movdqa xmm9, xmm2 ; g = f
-
- pand xmm2, xmm0 ; e & f
- pxor xmm1, xmm2 ; (e & f) ^ (~e & g)
- movdqa xmm8, xmm0 ; f = e
-
- paddd xmm6, xmm1 ; Ch + h + w[i] + k[i]
-
- movdqa xmm1, xmm0
- psrld xmm0, 6
- movdqa xmm2, xmm0
- pslld xmm1, 7
- psrld xmm2, 5
- pxor xmm0, xmm1
- pxor xmm0, xmm2
- pslld xmm1, 14
- psrld xmm2, 14
- pxor xmm0, xmm1
- pxor xmm0, xmm2
- pslld xmm1, 5
- pxor xmm0, xmm1 ; Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)
- paddd xmm6, xmm0 ; xmm6 = t1
-
- movdqa xmm0, xmm3 ; d
- paddd xmm0, xmm6 ; e = d+t1
-
- movdqa xmm1, xmm5 ; =b
- movdqa xmm3, xmm4 ; d = c
- movdqa xmm2, xmm4 ; c
- pand xmm2, xmm5 ; b & c
- pand xmm4, xmm7 ; a & c
- pand xmm1, xmm7 ; a & b
- pxor xmm1, xmm4
- movdqa xmm4, xmm5 ; c = b
- movdqa xmm5, xmm7 ; b = a
- pxor xmm1, xmm2 ; (a & c) ^ (a & d) ^ (c & d)
- paddd xmm6, xmm1 ; t1 + ((a & c) ^ (a & d) ^ (c & d))
-
- movdqa xmm2, xmm7
- psrld xmm7, 2
- movdqa xmm1, xmm7
- pslld xmm2, 10
- psrld xmm1, 11
- pxor xmm7, xmm2
- pxor xmm7, xmm1
- pslld xmm2, 9
- psrld xmm1, 9
- pxor xmm7, xmm2
- pxor xmm7, xmm1
- pslld xmm2, 11
- pxor xmm7, xmm2
- paddd xmm7, xmm6 ; a = t1 + (Rotr32(a, 2) ^ Rotr32(a, 13) ^ Rotr32(a, 22)) + ((a & c) ^ (a & d) ^ (c & d));
-
- cmp rax, rcx
- jb LAB_LOOP
-
-; Finished the 64 rounds, calculate hash and save
-
- movd xmm1, dword [rdx+0*4]
- pshufd xmm1, xmm1, 0
- paddd xmm7, xmm1
-
- movd xmm1, dword [rdx+1*4]
- pshufd xmm1, xmm1, 0
- paddd xmm5, xmm1
-
- movd xmm1, dword [rdx+2*4]
- pshufd xmm1, xmm1, 0
- paddd xmm4, xmm1
-
- movd xmm1, dword [rdx+3*4]
- pshufd xmm1, xmm1, 0
- paddd xmm3, xmm1
-
- movd xmm1, dword [rdx+4*4]
- pshufd xmm1, xmm1, 0
- paddd xmm0, xmm1
-
- movd xmm1, dword [rdx+5*4]
- pshufd xmm1, xmm1, 0
- paddd xmm8, xmm1
-
- movd xmm1, dword [rdx+6*4]
- pshufd xmm1, xmm1, 0
- paddd xmm9, xmm1
-
- movd xmm1, dword [rdx+7*4]
- pshufd xmm1, xmm1, 0
- paddd xmm10, xmm1
-
-debug_me:
- movdqa [rdi+0*16], xmm7
- movdqa [rdi+1*16], xmm5
- movdqa [rdi+2*16], xmm4
- movdqa [rdi+3*16], xmm3
- movdqa [rdi+4*16], xmm0
- movdqa [rdi+5*16], xmm8
- movdqa [rdi+6*16], xmm9
- movdqa [rdi+7*16], xmm10
-
-LAB_RET:
- pop rbx
- ret