aboutsummaryrefslogtreecommitdiff
path: root/scrypt-simd-helpers.h
diff options
context:
space:
mode:
Diffstat (limited to 'scrypt-simd-helpers.h')
-rw-r--r--scrypt-simd-helpers.h33
1 files changed, 17 insertions, 16 deletions
diff --git a/scrypt-simd-helpers.h b/scrypt-simd-helpers.h
index 322d718..8dd5681 100644
--- a/scrypt-simd-helpers.h
+++ b/scrypt-simd-helpers.h
@@ -270,16 +270,17 @@ typedef union { uint32x4 q[8]; uint32_t w[32]; } XY;
* All buffers must be aligned at 64 byte boundary.
*/
static inline
-void scrypt_simd_core1(uint8_t databuf[128], void * scratch)
+void scrypt_simd_core1(uint32_t databuf[32], void * scratch)
{
+ uint32_t * databufA = (uint32_t *)&databuf[0];
XY * X = (XY *)((uintptr_t)scratch + 0);
uint32x4 * V = (uint32x4 *)((uintptr_t)scratch + 128);
int i, j;
/* 1: X <-- B */
for (i = 0; i < 16; i++) {
- X->w[i] = le32dec(&databuf[(i * 5 % 16) * 4]);
- X->w[16 + i] = le32dec(&databuf[(16 + (i * 5 % 16)) * 4]);
+ X->w[i] = databufA[i * 5 % 16];
+ X->w[16 + i] = databufA[16 + (i * 5 % 16)];
}
/* 2: for i = 0 to N - 1 do */
@@ -299,8 +300,8 @@ void scrypt_simd_core1(uint8_t databuf[128], void * scratch)
/* 10: B' <-- X */
for (i = 0; i < 16; i++) {
- le32enc(&databuf[(i * 5 % 16) * 4], X->w[i]);
- le32enc(&databuf[(16 + (i * 5 % 16)) * 4], X->w[16 + i]);
+ databufA[i * 5 % 16] = X->w[i];
+ databufA[16 + (i * 5 % 16)] = X->w[16 + i];
}
}
@@ -317,10 +318,10 @@ void scrypt_simd_core1(uint8_t databuf[128], void * scratch)
* All buffers must be aligned at 64 byte boundary.
*/
static inline
-void scrypt_simd_core2(uint8_t databuf[2 * 128], void * scratch)
+void scrypt_simd_core2(uint32_t databuf[2 * 32], void * scratch)
{
- uint8_t * databufA = &databuf[0];
- uint8_t * databufB = &databuf[128];
+ uint32_t * databufA = (uint32_t *)&databuf[0];
+ uint32_t * databufB = (uint32_t *)&databuf[32];
XY * XA = (XY *)((uintptr_t)scratch);
XY * XB = (XY *)((uintptr_t)scratch + 128 + 128 * 1024);
uint32x4 * VA = (uint32x4 *)((uintptr_t)XA + 128);
@@ -329,10 +330,10 @@ void scrypt_simd_core2(uint8_t databuf[2 * 128], void * scratch)
/* 1: X <-- B */
for (i = 0; i < 16; i++) {
- XA->w[i] = le32dec(&databufA[(i * 5 % 16) * 4]);
- XA->w[16 + i] = le32dec(&databufA[(16 + (i * 5 % 16)) * 4]);
- XB->w[i] = le32dec(&databufB[(i * 5 % 16) * 4]);
- XB->w[16 + i] = le32dec(&databufB[(16 + (i * 5 % 16)) * 4]);
+ XA->w[i] = databufA[i * 5 % 16];
+ XA->w[16 + i] = databufA[16 + (i * 5 % 16)];
+ XB->w[i] = databufB[i * 5 % 16];
+ XB->w[16 + i] = databufB[16 + (i * 5 % 16)];
}
/* 2: for i = 0 to N - 1 do */
@@ -355,10 +356,10 @@ void scrypt_simd_core2(uint8_t databuf[2 * 128], void * scratch)
/* 10: B' <-- X */
for (i = 0; i < 16; i++) {
- le32enc(&databufA[(i * 5 % 16) * 4], XA->w[i]);
- le32enc(&databufA[(16 + (i * 5 % 16)) * 4], XA->w[16 + i]);
- le32enc(&databufB[(i * 5 % 16) * 4], XB->w[i]);
- le32enc(&databufB[(16 + (i * 5 % 16)) * 4], XB->w[16 + i]);
+ databufA[i * 5 % 16] = XA->w[i];
+ databufA[16 + (i * 5 % 16)] = XA->w[16 + i];
+ databufB[i * 5 % 16] = XB->w[i];
+ databufB[16 + (i * 5 % 16)] = XB->w[16 + i];
}
}