aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-23 23:11:15 +0200
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-23 23:11:15 +0200
commit17c4a816629af7ff77f674096b8ca910d615a742 (patch)
treef74c4c6cb3fa5a788bb264d1d2287cd9918879d0
parent9a49455c492594532fcb773a53fde5b77ea10eb4 (diff)
SIMD friendly reordering of data (taken from Tarsnap SSE code)
-rw-r--r--scrypt.c44
1 files changed, 26 insertions, 18 deletions
diff --git a/scrypt.c b/scrypt.c
index ce16f91..71dcf31 100644
--- a/scrypt.c
+++ b/scrypt.c
@@ -513,27 +513,27 @@ salsa20_8(uint32_t B[16])
x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9);
x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18);
- x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9);
- x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18);
+ x[ 5] ^= R(x[ 1]+x[13], 7); x[ 9] ^= R(x[ 5]+x[ 1], 9);
+ x[13] ^= R(x[ 9]+x[ 5],13); x[ 1] ^= R(x[13]+x[ 9],18);
- x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9);
- x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18);
+ x[ 6] ^= R(x[ 2]+x[14], 7); x[10] ^= R(x[ 6]+x[ 2], 9);
+ x[14] ^= R(x[10]+x[ 6],13); x[ 2] ^= R(x[14]+x[10],18);
- x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9);
- x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18);
+ x[ 7] ^= R(x[ 3]+x[15], 7); x[11] ^= R(x[ 7]+x[ 3], 9);
+ x[15] ^= R(x[11]+x[ 7],13); x[ 3] ^= R(x[15]+x[11],18);
/* Operate on rows. */
- x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9);
- x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18);
+ x[13] ^= R(x[ 0]+x[ 7], 7); x[10] ^= R(x[13]+x[ 0], 9);
+ x[ 7] ^= R(x[10]+x[13],13); x[ 0] ^= R(x[ 7]+x[10],18);
- x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9);
- x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18);
+ x[14] ^= R(x[ 1]+x[ 4], 7); x[11] ^= R(x[14]+x[ 1], 9);
+ x[ 4] ^= R(x[11]+x[14],13); x[ 1] ^= R(x[ 4]+x[11],18);
- x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9);
- x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18);
+ x[15] ^= R(x[ 2]+x[ 5], 7); x[ 8] ^= R(x[15]+x[ 2], 9);
+ x[ 5] ^= R(x[ 8]+x[15],13); x[ 2] ^= R(x[ 5]+x[ 8],18);
- x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9);
- x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18);
+ x[12] ^= R(x[ 3]+x[ 6], 7); x[ 9] ^= R(x[12]+x[ 3], 9);
+ x[ 6] ^= R(x[ 9]+x[12],13); x[ 3] ^= R(x[ 6]+x[ 9],18);
#undef R
}
for (i = 0; i < 16; i++)
@@ -605,8 +605,12 @@ smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
size_t k;
/* 1: X <-- B */
- for (k = 0; k < 32 * r; k++)
- X[k] = le32dec(&B[4 * k]);
+ for (k = 0; k < 2 * r; k++) {
+ for (i = 0; i < 16; i++) {
+ X[k * 16 + i] =
+ le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]);
+ }
+ }
/* 2: for i = 0 to N - 1 do */
for (i = 0; i < N; i += 2) {
@@ -641,8 +645,12 @@ smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
}
/* 10: B' <-- X */
- for (k = 0; k < 32 * r; k++)
- le32enc(&B[4 * k], X[k]);
+ for (k = 0; k < 2 * r; k++) {
+ for (i = 0; i < 16; i++) {
+ le32enc(&B[(k * 16 + (i * 5 % 16)) * 4],
+ X[k * 16 + i]);
+ }
+ }
}
#if defined(__x86_64__)