diff options
author | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2012-01-01 04:04:00 +0200 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2012-01-01 04:04:54 +0200 |
commit | b690870238b0b468355ec0d5b83c5b43b00f8aa6 (patch) | |
tree | e72456da0ad5d0caf9547630c1dcbf74c430704e | |
parent | 6d1e5bf3c2300623153479fd43c08a5874dacbbc (diff) |
Fix strict aliasing problems
Currently the cast between uint32_t and uint64_t pointers
breaks strict aliasing rules and needs -fno-strict-aliasing
option as a workaround, otherwise the code gets miscompiled.
But -fno-strict-aliasing can seriously inhibit optimization
possibilities. For example, performance of 1 thread
on Cell PPU (using Altivec instructions):
CFLAGS="-O3 -mcpu=cell -fno-strict-aliasing" - 1.79 khash/sec
CFLAGS="-O3 -mcpu=cell -fstrict-aliasing" - 2.60 khash/sec
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | scrypt.c | 12 |
2 files changed, 7 insertions, 7 deletions
diff --git a/Makefile.am b/Makefile.am index a981f2f..47affe0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ EXTRA_DIST = example-cfg.json SUBDIRS = compat -INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) +INCLUDES = $(PTHREAD_FLAGS) $(JANSSON_INCLUDES) bin_PROGRAMS = minerd @@ -101,8 +101,8 @@ static inline void scrypt_core1(uint32_t *X, uint32_t *V) uint32_t i; uint32_t j; uint32_t k; - uint64_t *p1, *p2; - p1 = (uint64_t *)X; + uint32_t *p1, *p2; + p1 = X; for (i = 0; i < 1024; i += 2) { memcpy(&V[i * 32], X, 128); @@ -116,16 +116,16 @@ static inline void scrypt_core1(uint32_t *X, uint32_t *V) } for (i = 0; i < 1024; i += 2) { j = X[16] & 1023; - p2 = (uint64_t *)(&V[j * 32]); - for(k = 0; k < 16; k++) + p2 = &V[j * 32]; + for(k = 0; k < 32; k++) p1[k] ^= p2[k]; salsa20_8(&X[0], &X[16]); salsa20_8(&X[16], &X[0]); j = X[16] & 1023; - p2 = (uint64_t *)(&V[j * 32]); - for(k = 0; k < 16; k++) + p2 = &V[j * 32]; + for(k = 0; k < 32; k++) p1[k] ^= p2[k]; salsa20_8(&X[0], &X[16]); |