aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-22 21:22:07 +0200
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-22 21:22:07 +0200
commit611402938ec3658773f4d0073375ae60d9323727 (patch)
treebbecbf35764a94066b6e986bfd8f587a244fc671
parent99084f8be4de50789345c42ae1e56378496c2981 (diff)
Unmangle scrypt.c (revert to cd0b57640c0de03ac23bc965649d4085b1a2556a)
The mangled scrypt.c from Art Forz is too much broken on big endian systems. Revert it back to something that is more maintainable.
-rw-r--r--scrypt.c673
1 files changed, 472 insertions, 201 deletions
diff --git a/scrypt.c b/scrypt.c
index 803a45a..3b80a81 100644
--- a/scrypt.c
+++ b/scrypt.c
@@ -34,24 +34,83 @@
#include <stdint.h>
#include <string.h>
-#define byteswap(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
+
+static inline uint32_t
+be32dec(const void *pp)
+{
+ const uint8_t *p = (uint8_t const *)pp;
+
+ return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
+ ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
+}
+
+static inline void
+be32enc(void *pp, uint32_t x)
+{
+ uint8_t * p = (uint8_t *)pp;
+
+ p[3] = x & 0xff;
+ p[2] = (x >> 8) & 0xff;
+ p[1] = (x >> 16) & 0xff;
+ p[0] = (x >> 24) & 0xff;
+}
+
+static inline uint32_t
+le32dec(const void *pp)
+{
+ const uint8_t *p = (uint8_t const *)pp;
+
+ return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) +
+ ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24));
+}
+
+static inline void
+le32enc(void *pp, uint32_t x)
+{
+ uint8_t * p = (uint8_t *)pp;
+
+ p[0] = x & 0xff;
+ p[1] = (x >> 8) & 0xff;
+ p[2] = (x >> 16) & 0xff;
+ p[3] = (x >> 24) & 0xff;
+}
+
typedef struct SHA256Context {
uint32_t state[8];
- uint32_t buf[16];
+ uint32_t count[2];
+ unsigned char buf[64];
} SHA256_CTX;
+typedef struct HMAC_SHA256Context {
+ SHA256_CTX ictx;
+ SHA256_CTX octx;
+} HMAC_SHA256_CTX;
+
/*
* Encode a length len/4 vector of (uint32_t) into a length len vector of
* (unsigned char) in big-endian form. Assumes len is a multiple of 4.
*/
-static inline void
-be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
+static void
+be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
{
- uint32_t i;
+ size_t i;
+
+ for (i = 0; i < len / 4; i++)
+ be32enc(dst + i * 4, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint32_t). Assumes len is a multiple of 4.
+ */
+static void
+be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
+{
+ size_t i;
- for (i = 0; i < len; i++)
- dst[i] = byteswap(src[i]);
+ for (i = 0; i < len / 4; i++)
+ dst[i] = be32dec(src + i * 4);
}
/* Elementary functions used by SHA256 */
@@ -84,7 +143,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
* the 512-bit input block to produce a new state.
*/
static void
-SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
+SHA256_Transform(uint32_t * state, const unsigned char block[64])
{
uint32_t W[64];
uint32_t S[8];
@@ -92,15 +151,9 @@ SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
int i;
/* 1. Prepare message schedule W. */
- if(swap)
- for (i = 0; i < 16; i++)
- W[i] = byteswap(block[i]);
- else
- memcpy(W, block, 64);
- for (i = 16; i < 64; i += 2) {
+ be32dec_vect(W, block, 64);
+ for (i = 16; i < 64; i++)
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
- W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
- }
/* 2. Initialize working variables. */
memcpy(S, state, 32);
@@ -174,260 +227,478 @@ SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
+
+ /* Clean the stack. */
+ memset(W, 0, 256);
+ memset(S, 0, 32);
+ t0 = t1 = 0;
}
-static inline void
-SHA256_InitState(uint32_t * state)
+static unsigned char PAD[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* SHA-256 initialization. Begins a SHA-256 operation. */
+static void
+SHA256_Init(SHA256_CTX * ctx)
{
+
+ /* Zero bits processed so far */
+ ctx->count[0] = ctx->count[1] = 0;
+
/* Magic initialization constants */
- state[0] = 0x6A09E667;
- state[1] = 0xBB67AE85;
- state[2] = 0x3C6EF372;
- state[3] = 0xA54FF53A;
- state[4] = 0x510E527F;
- state[5] = 0x9B05688C;
- state[6] = 0x1F83D9AB;
- state[7] = 0x5BE0CD19;
+ ctx->state[0] = 0x6A09E667;
+ ctx->state[1] = 0xBB67AE85;
+ ctx->state[2] = 0x3C6EF372;
+ ctx->state[3] = 0xA54FF53A;
+ ctx->state[4] = 0x510E527F;
+ ctx->state[5] = 0x9B05688C;
+ ctx->state[6] = 0x1F83D9AB;
+ ctx->state[7] = 0x5BE0CD19;
+}
+
+/* Add bytes into the hash */
+static void
+SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
+{
+ uint32_t bitlen[2];
+ uint32_t r;
+ const unsigned char *src = in;
+
+ /* Number of bytes left in the buffer from previous updates */
+ r = (ctx->count[1] >> 3) & 0x3f;
+
+ /* Convert the length into a number of bits */
+ bitlen[1] = ((uint32_t)len) << 3;
+ bitlen[0] = (uint32_t)(len >> 29);
+
+ /* Update number of bits */
+ if ((ctx->count[1] += bitlen[1]) < bitlen[1])
+ ctx->count[0]++;
+ ctx->count[0] += bitlen[0];
+
+ /* Handle the case where we don't need to perform any transforms */
+ if (len < 64 - r) {
+ memcpy(&ctx->buf[r], src, len);
+ return;
+ }
+
+ /* Finish the current block */
+ memcpy(&ctx->buf[r], src, 64 - r);
+ SHA256_Transform(ctx->state, ctx->buf);
+ src += 64 - r;
+ len -= 64 - r;
+
+ /* Perform complete blocks */
+ while (len >= 64) {
+ SHA256_Transform(ctx->state, src);
+ src += 64;
+ len -= 64;
+ }
+
+ /* Copy left over data into buffer */
+ memcpy(ctx->buf, src, len);
+}
+
+/* Add padding and terminating bit-count. */
+static void
+SHA256_Pad(SHA256_CTX * ctx)
+{
+ unsigned char len[8];
+ uint32_t r, plen;
+
+ /*
+ * Convert length to a vector of bytes -- we do this now rather
+ * than later because the length will change after we pad.
+ */
+ be32enc_vect(len, ctx->count, 8);
+
+ /* Add 1--64 bytes so that the resulting length is 56 mod 64 */
+ r = (ctx->count[1] >> 3) & 0x3f;
+ plen = (r < 56) ? (56 - r) : (120 - r);
+ SHA256_Update(ctx, PAD, (size_t)plen);
+
+ /* Add the terminating bit-count */
+ SHA256_Update(ctx, len, 8);
}
-static const uint32_t passwdpad[12] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80020000};
-static const uint32_t outerpad[8] = {0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300};
+/*
+ * SHA-256 finalization. Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+static void
+SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx)
+{
+
+ /* Add padding */
+ SHA256_Pad(ctx);
+
+ /* Write the hash */
+ be32enc_vect(digest, ctx->state, 32);
+
+ /* Clear the context state */
+ memset((void *)ctx, 0, sizeof(*ctx));
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+static void
+HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen)
+{
+ unsigned char pad[64];
+ unsigned char khash[32];
+ const unsigned char * K = _K;
+ size_t i;
+
+ /* If Klen > 64, the key is really SHA256(K). */
+ if (Klen > 64) {
+ SHA256_Init(&ctx->ictx);
+ SHA256_Update(&ctx->ictx, K, Klen);
+ SHA256_Final(khash, &ctx->ictx);
+ K = khash;
+ Klen = 32;
+ }
+
+ /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+ SHA256_Init(&ctx->ictx);
+ memset(pad, 0x36, 64);
+ for (i = 0; i < Klen; i++)
+ pad[i] ^= K[i];
+ SHA256_Update(&ctx->ictx, pad, 64);
+
+ /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+ SHA256_Init(&ctx->octx);
+ memset(pad, 0x5c, 64);
+ for (i = 0; i < Klen; i++)
+ pad[i] ^= K[i];
+ SHA256_Update(&ctx->octx, pad, 64);
+
+ /* Clean the stack. */
+ memset(khash, 0, 32);
+}
+
+/* Add bytes to the HMAC-SHA256 operation. */
+static void
+HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void *in, size_t len)
+{
+
+ /* Feed data to the inner SHA256 operation. */
+ SHA256_Update(&ctx->ictx, in, len);
+}
+
+/* Finish an HMAC-SHA256 operation. */
+static void
+HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX * ctx)
+{
+ unsigned char ihash[32];
+
+ /* Finish the inner SHA256 operation. */
+ SHA256_Final(ihash, &ctx->ictx);
+
+ /* Feed the inner hash to the outer SHA256 operation. */
+ SHA256_Update(&ctx->octx, ihash, 32);
+
+ /* Finish the outer SHA256 operation. */
+ SHA256_Final(digest, &ctx->octx);
+
+ /* Clean the stack. */
+ memset(ihash, 0, 32);
+}
/**
* PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
* Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
* write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
*/
-static inline void
-PBKDF2_SHA256_80_128(const uint32_t * passwd, uint32_t * buf)
+static void
+PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt,
+ size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen)
{
- SHA256_CTX PShictx, PShoctx;
- uint32_t tstate[8];
- uint32_t ihash[8];
- uint32_t i;
- uint32_t pad[16];
-
- static const uint32_t innerpad[11] = {0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa0040000};
+ HMAC_SHA256_CTX PShctx, hctx;
+ size_t i;
+ uint8_t ivec[4];
+ uint8_t U[32];
+ uint8_t T[32];
+ uint64_t j;
+ int k;
+ size_t clen;
- /* If Klen > 64, the key is really SHA256(K). */
- SHA256_InitState(tstate);
- SHA256_Transform(tstate, passwd, 1);
- memcpy(pad, passwd+16, 16);
- memcpy(pad+4, passwdpad, 48);
- SHA256_Transform(tstate, pad, 1);
- memcpy(ihash, tstate, 32);
-
- SHA256_InitState(PShictx.state);
- for (i = 0; i < 8; i++)
- pad[i] = ihash[i] ^ 0x36363636;
- for (; i < 16; i++)
- pad[i] = 0x36363636;
- SHA256_Transform(PShictx.state, pad, 0);
- SHA256_Transform(PShictx.state, passwd, 1);
- be32enc_vect(PShictx.buf, passwd+16, 4);
- be32enc_vect(PShictx.buf+5, innerpad, 11);
-
- SHA256_InitState(PShoctx.state);
- for (i = 0; i < 8; i++)
- pad[i] = ihash[i] ^ 0x5c5c5c5c;
- for (; i < 16; i++)
- pad[i] = 0x5c5c5c5c;
- SHA256_Transform(PShoctx.state, pad, 0);
- memcpy(PShoctx.buf+8, outerpad, 32);
+ /* Compute HMAC state after processing P and S. */
+ HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
+ HMAC_SHA256_Update(&PShctx, salt, saltlen);
/* Iterate through the blocks. */
- for (i = 0; i < 4; i++) {
- uint32_t istate[8];
- uint32_t ostate[8];
-
- memcpy(istate, PShictx.state, 32);
- PShictx.buf[4] = i + 1;
- SHA256_Transform(istate, PShictx.buf, 0);
- memcpy(PShoctx.buf, istate, 32);
-
- memcpy(ostate, PShoctx.state, 32);
- SHA256_Transform(ostate, PShoctx.buf, 0);
- be32enc_vect(buf+i*8, ostate, 8);
+ for (i = 0; i * 32 < dkLen; i++) {
+ /* Generate INT(i + 1). */
+ be32enc(ivec, (uint32_t)(i + 1));
+
+ /* Compute U_1 = PRF(P, S || INT(i)). */
+ memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
+ HMAC_SHA256_Update(&hctx, ivec, 4);
+ HMAC_SHA256_Final(U, &hctx);
+
+ /* T_i = U_1 ... */
+ memcpy(T, U, 32);
+
+ for (j = 2; j <= c; j++) {
+ /* Compute U_j. */
+ HMAC_SHA256_Init(&hctx, passwd, passwdlen);
+ HMAC_SHA256_Update(&hctx, U, 32);
+ HMAC_SHA256_Final(U, &hctx);
+
+ /* ... xor U_j ... */
+ for (k = 0; k < 32; k++)
+ T[k] ^= U[k];
+ }
+
+ /* Copy as many bytes as necessary into buf. */
+ clen = dkLen - i * 32;
+ if (clen > 32)
+ clen = 32;
+ memcpy(&buf[i * 32], T, clen);
}
+
+ /* Clean PShctx, since we never called _Final on it. */
+ memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX));
}
-static inline uint32_t
-PBKDF2_SHA256_80_128_32(const uint32_t * passwd, const uint32_t * salt)
-{
- uint32_t tstate[8];
- uint32_t ostate[8];
- uint32_t ihash[8];
- uint32_t i;
+static void blkcpy(void *, void *, size_t);
+static void blkxor(void *, void *, size_t);
+static void salsa20_8(uint32_t[16]);
+static void blockmix_salsa8(uint32_t *, uint32_t *, uint32_t *, size_t);
+static uint64_t integerify(void *, size_t);
+static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *);
- /* Compute HMAC state after processing P and S. */
- uint32_t pad[16];
-
- static const uint32_t ihash_finalblk[16] = {0x00000001,0x80000000,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0x00000620};
+static void
+blkcpy(void * dest, void * src, size_t len)
+{
+ size_t * D = dest;
+ size_t * S = src;
+ size_t L = len / sizeof(size_t);
+ size_t i;
- /* If Klen > 64, the key is really SHA256(K). */
- SHA256_InitState(tstate);
- SHA256_Transform(tstate, passwd, 1);
- memcpy(pad, passwd+16, 16);
- memcpy(pad+4, passwdpad, 48);
- SHA256_Transform(tstate, pad, 1);
- memcpy(ihash, tstate, 32);
-
- SHA256_InitState(ostate);
- for (i = 0; i < 8; i++)
- pad[i] = ihash[i] ^ 0x5c5c5c5c;
- for (; i < 16; i++)
- pad[i] = 0x5c5c5c5c;
- SHA256_Transform(ostate, pad, 0);
+ for (i = 0; i < L; i++)
+ D[i] = S[i];
+}
- SHA256_InitState(tstate);
- for (i = 0; i < 8; i++)
- pad[i] = ihash[i] ^ 0x36363636;
- for (; i < 16; i++)
- pad[i] = 0x36363636;
- SHA256_Transform(tstate, pad, 0);
- SHA256_Transform(tstate, salt, 1);
- SHA256_Transform(tstate, salt+16, 1);
- SHA256_Transform(tstate, ihash_finalblk, 0);
- memcpy(pad, tstate, 32);
- memcpy(pad+8, outerpad, 32);
+static void
+blkxor(void * dest, void * src, size_t len)
+{
+ size_t * D = dest;
+ size_t * S = src;
+ size_t L = len / sizeof(size_t);
+ size_t i;
- /* Feed the inner hash to the outer SHA256 operation. */
- SHA256_Transform(ostate, pad, 0);
- /* Finish the outer SHA256 operation. */
- return byteswap(ostate[7]);
+ for (i = 0; i < L; i++)
+ D[i] ^= S[i];
}
-
/**
* salsa20_8(B):
* Apply the salsa20/8 core to the provided block.
*/
-static inline void
-salsa20_8(uint32_t B[16], const uint32_t Bx[16])
+static void
+salsa20_8(uint32_t B[16])
{
- uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
+ uint32_t x[16];
size_t i;
- x00 = (B[ 0] ^= Bx[ 0]);
- x01 = (B[ 1] ^= Bx[ 1]);
- x02 = (B[ 2] ^= Bx[ 2]);
- x03 = (B[ 3] ^= Bx[ 3]);
- x04 = (B[ 4] ^= Bx[ 4]);
- x05 = (B[ 5] ^= Bx[ 5]);
- x06 = (B[ 6] ^= Bx[ 6]);
- x07 = (B[ 7] ^= Bx[ 7]);
- x08 = (B[ 8] ^= Bx[ 8]);
- x09 = (B[ 9] ^= Bx[ 9]);
- x10 = (B[10] ^= Bx[10]);
- x11 = (B[11] ^= Bx[11]);
- x12 = (B[12] ^= Bx[12]);
- x13 = (B[13] ^= Bx[13]);
- x14 = (B[14] ^= Bx[14]);
- x15 = (B[15] ^= Bx[15]);
+ blkcpy(x, B, 64);
for (i = 0; i < 8; i += 2) {
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
- x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
- x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
- x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
- x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
+ x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9);
+ x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18);
+
+ x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9);
+ x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18);
+
+ x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9);
+ x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18);
+
+ x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9);
+ x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18);
/* Operate on rows. */
- x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
- x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
- x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
- x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
+ x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9);
+ x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18);
+
+ x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9);
+ x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18);
+
+ x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9);
+ x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18);
+
+ x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9);
+ x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18);
#undef R
}
- B[ 0] += x00;
- B[ 1] += x01;
- B[ 2] += x02;
- B[ 3] += x03;
- B[ 4] += x04;
- B[ 5] += x05;
- B[ 6] += x06;
- B[ 7] += x07;
- B[ 8] += x08;
- B[ 9] += x09;
- B[10] += x10;
- B[11] += x11;
- B[12] += x12;
- B[13] += x13;
- B[14] += x14;
- B[15] += x15;
+ for (i = 0; i < 16; i++)
+ B[i] += x[i];
+}
+
+/**
+ * blockmix_salsa8(Bin, Bout, X, r):
+ * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
+ * bytes in length; the output Bout must also be the same size. The
+ * temporary space X must be 64 bytes.
+ */
+static void
+blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
+{
+ size_t i;
+
+ /* 1: X <-- B_{2r - 1} */
+ blkcpy(X, &Bin[(2 * r - 1) * 16], 64);
+
+ /* 2: for i = 0 to 2r - 1 do */
+ for (i = 0; i < 2 * r; i += 2) {
+ /* 3: X <-- H(X \xor B_i) */
+ blkxor(X, &Bin[i * 16], 64);
+ salsa20_8(X);
+
+ /* 4: Y_i <-- X */
+ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+ blkcpy(&Bout[i * 8], X, 64);
+
+ /* 3: X <-- H(X \xor B_i) */
+ blkxor(X, &Bin[i * 16 + 16], 64);
+ salsa20_8(X);
+
+ /* 4: Y_i <-- X */
+ /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
+ blkcpy(&Bout[i * 8 + r * 16], X, 64);
+ }
+}
+
+/**
+ * integerify(B, r):
+ * Return the result of parsing B_{2r-1} as a little-endian integer.
+ */
+static uint64_t
+integerify(void * B, size_t r)
+{
+ uint32_t * X = (void *)((uintptr_t)(B) + (2 * r - 1) * 64);
+
+ return (((uint64_t)(X[1]) << 32) + X[0]);
+}
+
+/**
+ * smix(B, r, N, V, XY):
+ * Compute B = SMix_r(B, N). The input B must be 128r bytes in length;
+ * the temporary storage V must be 128rN bytes in length; the temporary
+ * storage XY must be 256r + 64 bytes in length. The value N must be a
+ * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a
+ * multiple of 64 bytes.
+ */
+static void
+smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
+{
+ uint32_t * X = XY;
+ uint32_t * Y = &XY[32 * r];
+ uint32_t * Z = &XY[64 * r];
+ uint64_t i;
+ uint64_t j;
+ size_t k;
+
+ /* 1: X <-- B */
+ for (k = 0; k < 32 * r; k++)
+ X[k] = le32dec(&B[4 * k]);
+
+ /* 2: for i = 0 to N - 1 do */
+ for (i = 0; i < N; i += 2) {
+ /* 3: V_i <-- X */
+ blkcpy(&V[i * (32 * r)], X, 128 * r);
+
+ /* 4: X <-- H(X) */
+ blockmix_salsa8(X, Y, Z, r);
+
+ /* 3: V_i <-- X */
+ blkcpy(&V[(i + 1) * (32 * r)], Y, 128 * r);
+
+ /* 4: X <-- H(X) */
+ blockmix_salsa8(Y, X, Z, r);
+ }
+
+ /* 6: for i = 0 to N - 1 do */
+ for (i = 0; i < N; i += 2) {
+ /* 7: j <-- Integerify(X) mod N */
+ j = integerify(X, r) & (N - 1);
+
+ /* 8: X <-- H(X \xor V_j) */
+ blkxor(X, &V[j * (32 * r)], 128 * r);
+ blockmix_salsa8(X, Y, Z, r);
+
+ /* 7: j <-- Integerify(X) mod N */
+ j = integerify(Y, r) & (N - 1);
+
+ /* 8: X <-- H(X \xor V_j) */
+ blkxor(Y, &V[j * (32 * r)], 128 * r);
+ blockmix_salsa8(Y, X, Z, r);
+ }
+
+ /* 10: B' <-- X */
+ for (k = 0; k < 32 * r; k++)
+ le32enc(&B[4 * k], X[k]);
}
/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
*/
-static uint32_t scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad)
+static void scrypt_1024_1_1_256_sp(const char* input, char* output, char* scratchpad)
{
+ uint8_t * B;
uint32_t * V;
- uint32_t X[32];
+ uint32_t * XY;
uint32_t i;
- uint32_t j;
- uint32_t k;
- uint64_t *p1, *p2;
-
- p1 = (uint64_t *)X;
- V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
- PBKDF2_SHA256_80_128(input, X);
+ const uint32_t N = 1024;
+ const uint32_t r = 1;
+ const uint32_t p = 1;
- for (i = 0; i < 1024; i += 2) {
- memcpy(&V[i * 32], X, 128);
+ B = (uint8_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
+ XY = (uint32_t *)(B + (128 * r * p));
+ V = (uint32_t *)(B + (128 * r * p) + (256 * r + 64));
- salsa20_8(&X[0], &X[16]);
- salsa20_8(&X[16], &X[0]);
+ /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
+ PBKDF2_SHA256((const uint8_t*)input, 80, (const uint8_t*)input, 80, 1, B, p * 128 * r);
- memcpy(&V[(i + 1) * 32], X, 128);
-
- salsa20_8(&X[0], &X[16]);
- salsa20_8(&X[16], &X[0]);
- }
- for (i = 0; i < 1024; i += 2) {
- j = X[16] & 1023;
- p2 = (uint64_t *)(&V[j * 32]);
- for(k = 0; k < 16; k++)
- p1[k] ^= p2[k];
-
- salsa20_8(&X[0], &X[16]);
- salsa20_8(&X[16], &X[0]);
-
- j = X[16] & 1023;
- p2 = (uint64_t *)(&V[j * 32]);
- for(k = 0; k < 16; k++)
- p1[k] ^= p2[k];
-
- salsa20_8(&X[0], &X[16]);
- salsa20_8(&X[16], &X[0]);
+ /* 2: for i = 0 to p - 1 do */
+ for (i = 0; i < p; i++) {
+ /* 3: B_i <-- MF(B_i, N) */
+ smix(&B[i * 128 * r], r, N, V, XY);
}
- return PBKDF2_SHA256_80_128_32(input, X);
+ /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
+ PBKDF2_SHA256((const uint8_t*)input, 80, B, p * 128 * r, 1, (uint8_t*)output, 32);
}
int scanhash_scrypt(int thr_id, unsigned char *pdata, unsigned char *scratchbuf,
const unsigned char *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
- uint32_t data[20];
- uint32_t tmp_hash7;
+ unsigned char data[80];
+ unsigned char tmp_hash[32];
+ uint32_t *nonce = (uint32_t *)(data + 64 + 12);
uint32_t n = 0;
- uint32_t Htarg = ((const uint32_t *)ptarget)[7];
+ uint32_t Htarg = *(uint32_t *)(ptarget + 28);
int i;
work_restart[thr_id].restart = 0;
- be32enc_vect(data, (const uint32_t *)pdata, 19);
+ for (i = 0; i < 80/4; i++)
+ ((uint32_t *)data)[i] = swab32(((uint32_t *)pdata)[i]);
while(1) {
n++;
- data[19] = n;
- tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
+ *nonce = n;
+ scrypt_1024_1_1_256_sp(data, tmp_hash, scratchbuf);
- if (tmp_hash7 <= Htarg) {
- ((uint32_t *)pdata)[19] = byteswap(n);
+ if (*(uint32_t *)(tmp_hash+28) <= Htarg) {
+ *(uint32_t *)(pdata + 64 + 12) = swab32(n);
*hashes_done = n;
return true;
}