diff options
author | Alon Zakai <alonzakai@gmail.com> | 2011-06-11 19:05:20 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2011-06-11 19:05:20 -0700 |
commit | 3c8451a078ef0a61143c67590ebdf3e407f29182 (patch) | |
tree | d283c734cdd9e469bc5a8873796f43daf2db3325 | |
parent | 30537ea5fca2ceb0b09dd3e8af2a5443b35a697a (diff) |
optimize memset and memcpy in ta2
-rw-r--r-- | src/library.js | 1 | ||||
-rw-r--r-- | src/parseTools.js | 80 | ||||
-rw-r--r-- | tests/runner.py | 22 |
3 files changed, 78 insertions, 25 deletions
diff --git a/src/library.js b/src/library.js index 23bda6ce..b0f7ef86 100644 --- a/src/library.js +++ b/src/library.js @@ -622,7 +622,6 @@ var Library = { #if ASSERTIONS assert(num % 1 === 0, 'memcpy given ' + num + ' bytes to copy. Problem with QUANTUM_SIZE=1 corrections perhaps?'); #endif - // TODO: optimize for the typed arrays case // || 0, since memcpy sometimes copies uninitialized areas XXX: Investigate why initializing alloc'ed memory does not fix that too {{{ makeCopyValues('dest', 'src', 'num', 'null', ' || 0') }}}; }, diff --git a/src/parseTools.js b/src/parseTools.js index 34187a68..398c8b92 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -737,22 +737,47 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore) { } function makeSetValues(ptr, pos, value, type, num) { - function safety() { - return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + getFastValue(ptr, '+', pos) + '+$mspi$, ' + type + ', 1)' : ''); + function safety(where) { + where = where || getFastValue(ptr, '+', pos) + '+$mspi$'; + return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + where + ', ' + type + ', 1)' : ''); } if (USE_TYPED_ARRAYS in set(0, 1)) { return 'for (var $mspi$ = 0; $mspi$ < ' + num + '; $mspi$++) {\n' + makeSetValue(ptr, getFastValue(pos, '+', '$mspi$'), value, type) + ';\n}'; } else { // USE_TYPED_ARRAYS == 2 +/* return 'for (var $mspi$ = 0; $mspi$ < ' + num + '; $mspi$++) {\n' + ' HEAP8[' + getFastValue(ptr, '+', pos) + '+$mspi$] = ' + value + safety() + '\n}'; - // TODO: optimize this, setting 4 values at a time etc. like makeCopyValues +*/ + return '' + + 'var $dest$, $stop$, $stop4$, $fast$, $value4$;\n' + + '$dest$ = ' + getFastValue(ptr, '+', pos) + ';\n' + + '$stop$ = $dest$ + ' + num + ';\n' + + '$value4$ = ' + value + ';\n' + + 'if ($value4$ < 0) $value4$ += 256;\n' + + '$value4$ = $value4$ + ($value4$<<8) + ($value4$<<16) + ($value4$*16777216);\n' + + 'while ($dest$%4 !== 0 && $dest$ < $stop$) {\n' + + ' ' + safety('$dest$') + '; HEAP8[$dest$++] = ' + value + ';\n' + + '}\n' + + '$dest$ = $dest$ >> 2;\n' + + '$stop4$ = $stop$ >> 2;\n' + + 'while ($dest$ < $stop4$) {\n' + + safety('($dest$<<2)+0', '($src$<<2)+0') + ';' + safety('($dest$<<2)+1', '($src$<<2)+1') + ';' + + safety('($dest$<<2)+2', '($src$<<2)+2') + ';' + safety('($dest$<<2)+3', '($src$<<2)+3') + (SAFE_HEAP ? ';\n' : '') + + ' HEAP32[$dest$++] = $value4$;\n' + // this is the fast inner loop we try hard to stay in + '}\n' + + '$dest$ = $dest$ << 2;\n' + + 'while ($dest$ < $stop$) {\n' + + ' ' + safety('$dest$') + '; HEAP8[$dest$++] = ' + value + ';\n' + + '}' } } function makeCopyValues(dest, src, num, type, modifier) { - function safety() { - return ';' + (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + dest + '+$mcpi$, ' + src + '+$mcpi$)' : ''); + function safety(to, from) { + to = to || (dest + '+' + '$mcpi$'); + from = from || (src + '+' + '$mcpi$'); + return (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + to + ', ' + from + ')' : ''); } if (USE_TYPED_ARRAYS in set(0, 1)) { return 'for (var $mcpi$ = 0; $mcpi$ < ' + num + '; $mcpi$++) {\n' + @@ -760,31 +785,38 @@ function makeCopyValues(dest, src, num, type, modifier) { : // Null is special-cased: We copy over all heaps makeGetSlabs(dest, 'null', true).map(function(slab) { return slab + '[' + dest + '+$mcpi$]=' + slab + '[' + src + '+$mcpi$]' - }).join('; ') + safety() + }).join('; ') + '; ' + safety() ) + '\n' + '}'; } else { // USE_TYPED_ARRAYS == 2 +/* return 'for (var $mcpi$ = 0; $mcpi$ < ' + num + '; $mcpi$++) {\n' + ' HEAP8[' + dest + '+$mcpi$] = HEAP8[' + src + '+$mcpi$]; ' + safety() + ';\n' + '}'; -/* TODO: rework something like this potential optimizing code - if (isNumber(num) && num < 12) { - } else { // num >= 12 or unknown - } - var stop = src + num; - var fast = dest%4 === src%4; - while (src%4 !== 0 && src < stop) { - HEAP8[dest++] = HEAP8[src++]; - } - while (src+4 <= stop && fast) { - HEAP32[dest] = HEAP32[src]; - src += 4; - dest += 4; - } - while (src < stop) { - HEAP8[dest++] = HEAP8[src++]; - } - } */ + return '' + + 'var $src$, $dest$, $stop$, $stop4$, $fast$;\n' + + '$src$ = ' + src + ';\n' + + '$dest$ = ' + dest + ';\n' + + '$stop$ = $src$ + ' + num + ';\n' + + '$fast$ = ($dest$%4) === ($src$%4);\n' + + 'while ($src$%4 !== 0 && $src$ < $stop$) {\n' + + ' ' + safety('$dest$', '$src$') + '; HEAP8[$dest$++] = HEAP8[$src$++];\n' + + '}\n' + + 'if ($fast$) {\n' + + ' $src$ = $src$ >> 2;\n' + + ' $dest$ = $dest$ >> 2;\n' + + ' $stop4$ = $stop$ >> 2;\n' + + ' while ($src$ < $stop4$) {\n' + + safety('($dest$<<2)+0', '($src$<<2)+0') + ';' + safety('($dest$<<2)+1', '($src$<<2)+1') + ';' + + safety('($dest$<<2)+2', '($src$<<2)+2') + ';' + safety('($dest$<<2)+3', '($src$<<2)+3') + (SAFE_HEAP ? ';\n' : '') + + ' HEAP32[$dest$++] = HEAP32[$src$++];\n' + // this is the fast inner loop we try hard to stay in + ' }\n' + + ' $src$ = $src$ << 2;\n' + + ' $dest$ = $dest$ << 2;\n' + + '}\n' + + 'while ($src$ < $stop$) {\n' + + ' ' + safety('$dest$', '$src$') + '; HEAP8[$dest$++] = HEAP8[$src$++];\n' + + '}' } return null; } diff --git a/tests/runner.py b/tests/runner.py index 2a7321b7..a7888238 100644 --- a/tests/runner.py +++ b/tests/runner.py @@ -2729,6 +2729,28 @@ else: ''' self.do_benchmark(src, [], 'lastprime: 1297001.') + def test_memops(self): + src = ''' + #include<stdio.h> + #include<string.h> + #include<stdlib.h> + int main() { + int N = 10*1024*1024; + int final = 0; + char *buf = (char*)malloc(N); + for (int t = 0; t < 20; t++) { + for (int i = 0; i < N; i++) + buf[i] = (i*i)%256; + memcpy(buf, buf+N/2, N/2); + for (int i = 0; i < N; i++) + final += buf[i] & 1; + } + printf("final: %d.\\n", final); + return 1; + } + ''' + self.do_benchmark(src, [], 'final: 104857600.') + def test_fannkuch(self): src = open(path_from_root('tests', 'fannkuch.cpp'), 'r').read() self.do_benchmark(src, ['10'], 'Pfannkuchen(10) = 38.') |