aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-23 22:30:23 +0200
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2011-12-23 22:30:23 +0200
commitbd44eb15e08c62f71837cdc5e7f7f92082664da6 (patch)
treef1974f56dd637bde78b5bdb81fd1c1ddfc301f9e
parente39be36cc0744d084653264191490fb135e81d3d (diff)
Revert "blkcpy -> memcpy"
This reverts commit e39be36cc0744d084653264191490fb135e81d3d. Causes performance regression for MIPS and PPC (even though it's faster on x86).
-rw-r--r--scrypt.c24
1 files changed, 18 insertions, 6 deletions
diff --git a/scrypt.c b/scrypt.c
index 2504b7b..5dda34a 100644
--- a/scrypt.c
+++ b/scrypt.c
@@ -473,6 +473,18 @@ static uint64_t integerify(void *, size_t);
static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *);
static void
+blkcpy(void * dest, void * src, size_t len)
+{
+ size_t * D = dest;
+ size_t * S = src;
+ size_t L = len / sizeof(size_t);
+ size_t i;
+
+ for (i = 0; i < L; i++)
+ D[i] = S[i];
+}
+
+static void
blkxor(void * dest, void * src, size_t len)
{
size_t * D = dest;
@@ -494,7 +506,7 @@ salsa20_8(uint32_t B[16])
uint32_t x[16];
size_t i;
- memcpy(x, B, 64);
+ blkcpy(x, B, 64);
for (i = 0; i < 8; i += 2) {
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
@@ -540,7 +552,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
size_t i;
/* 1: X <-- B_{2r - 1} */
- memcpy(X, &Bin[(2 * r - 1) * 16], 64);
+ blkcpy(X, &Bin[(2 * r - 1) * 16], 64);
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < 2 * r; i += 2) {
@@ -550,7 +562,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
/* 4: Y_i <-- X */
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
- memcpy(&Bout[i * 8], X, 64);
+ blkcpy(&Bout[i * 8], X, 64);
/* 3: X <-- H(X \xor B_i) */
blkxor(X, &Bin[i * 16 + 16], 64);
@@ -558,7 +570,7 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r)
/* 4: Y_i <-- X */
/* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
- memcpy(&Bout[i * 8 + r * 16], X, 64);
+ blkcpy(&Bout[i * 8 + r * 16], X, 64);
}
}
@@ -599,13 +611,13 @@ smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY)
/* 2: for i = 0 to N - 1 do */
for (i = 0; i < N; i += 2) {
/* 3: V_i <-- X */
- memcpy(&V[i * (32 * r)], X, 128 * r);
+ blkcpy(&V[i * (32 * r)], X, 128 * r);
/* 4: X <-- H(X) */
blockmix_salsa8(X, Y, Z, r);
/* 3: V_i <-- X */
- memcpy(&V[(i + 1) * (32 * r)], Y, 128 * r);
+ blkcpy(&V[(i + 1) * (32 * r)], Y, 128 * r);
/* 4: X <-- H(X) */
blockmix_salsa8(Y, X, Z, r);