From 0941296571ed0c2b7b5953d636907b299a1409e1 Mon Sep 17 00:00:00 2001 From: Art Forz Date: Thu, 6 Oct 2011 22:37:31 +0200 Subject: Rearrange order of operations in scrypt salsa20 3.73kH/s/core on a 3.6GHz PhenomII compiled with gcc 4.6.1 and CFLAGS="-march=amdfam10 -O3" --- scrypt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scrypt.c b/scrypt.c index f6778a1..803a45a 100644 --- a/scrypt.c +++ b/scrypt.c @@ -328,16 +328,16 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16]) for (i = 0; i < 8; i += 2) { #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) /* Operate on columns. */ - x04 ^= R(x00+x12, 7); x08 ^= R(x04+x00, 9); x12 ^= R(x08+x04,13); x00 ^= R(x12+x08,18); - x09 ^= R(x05+x01, 7); x13 ^= R(x09+x05, 9); x01 ^= R(x13+x09,13); x05 ^= R(x01+x13,18); - x14 ^= R(x10+x06, 7); x02 ^= R(x14+x10, 9); x06 ^= R(x02+x14,13); x10 ^= R(x06+x02,18); - x03 ^= R(x15+x11, 7); x07 ^= R(x03+x15, 9); x11 ^= R(x07+x03,13); x15 ^= R(x11+x07,18); + x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); + x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); + x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); + x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); /* Operate on rows. */ - x01 ^= R(x00+x03, 7); x02 ^= R(x01+x00, 9); x03 ^= R(x02+x01,13); x00 ^= R(x03+x02,18); - x06 ^= R(x05+x04, 7); x07 ^= R(x06+x05, 9); x04 ^= R(x07+x06,13); x05 ^= R(x04+x07,18); - x11 ^= R(x10+x09, 7); x08 ^= R(x11+x10, 9); x09 ^= R(x08+x11,13); x10 ^= R(x09+x08,18); - x12 ^= R(x15+x14, 7); x13 ^= R(x12+x15, 9); x14 ^= R(x13+x12,13); x15 ^= R(x14+x13,18); + x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); + x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); + x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); + x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); #undef R } B[ 0] += x00; -- cgit v1.2.3-18-g5258