From b690870238b0b468355ec0d5b83c5b43b00f8aa6 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Sun, 1 Jan 2012 04:04:00 +0200 Subject: Fix strict aliasing problems Currently the cast between uint32_t and uint64_t pointers breaks strict aliasing rules and needs -fno-strict-aliasing option as a workaround, otherwise the code gets miscompiled. But -fno-strict-aliasing can seriously inhibit optimization possibilities. For example, performance of 1 thread on Cell PPU (using Altivec instructions): CFLAGS="-O3 -mcpu=cell -fno-strict-aliasing" - 1.79 khash/sec CFLAGS="-O3 -mcpu=cell -fstrict-aliasing" - 2.60 khash/sec --- Makefile.am | 2 +- scrypt.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile.am b/Makefile.am index a981f2f..47affe0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ EXTRA_DIST = example-cfg.json SUBDIRS = compat -INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) +INCLUDES = $(PTHREAD_FLAGS) $(JANSSON_INCLUDES) bin_PROGRAMS = minerd diff --git a/scrypt.c b/scrypt.c index 3288f60..e26ea3b 100644 --- a/scrypt.c +++ b/scrypt.c @@ -101,8 +101,8 @@ static inline void scrypt_core1(uint32_t *X, uint32_t *V) uint32_t i; uint32_t j; uint32_t k; - uint64_t *p1, *p2; - p1 = (uint64_t *)X; + uint32_t *p1, *p2; + p1 = X; for (i = 0; i < 1024; i += 2) { memcpy(&V[i * 32], X, 128); @@ -116,16 +116,16 @@ static inline void scrypt_core1(uint32_t *X, uint32_t *V) } for (i = 0; i < 1024; i += 2) { j = X[16] & 1023; - p2 = (uint64_t *)(&V[j * 32]); - for(k = 0; k < 16; k++) + p2 = &V[j * 32]; + for(k = 0; k < 32; k++) p1[k] ^= p2[k]; salsa20_8(&X[0], &X[16]); salsa20_8(&X[16], &X[0]); j = X[16] & 1023; - p2 = (uint64_t *)(&V[j * 32]); - for(k = 0; k < 16; k++) + p2 = &V[j * 32]; + for(k = 0; k < 32; k++) p1[k] ^= p2[k]; salsa20_8(&X[0], &X[16]); -- cgit v1.2.3-18-g5258