aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/library.js1
-rw-r--r--src/parseTools.js80
-rw-r--r--tests/runner.py22
3 files changed, 78 insertions, 25 deletions
diff --git a/src/library.js b/src/library.js
index 23bda6ce..b0f7ef86 100644
--- a/src/library.js
+++ b/src/library.js
@@ -622,7 +622,6 @@ var Library = {
#if ASSERTIONS
assert(num % 1 === 0, 'memcpy given ' + num + ' bytes to copy. Problem with QUANTUM_SIZE=1 corrections perhaps?');
#endif
- // TODO: optimize for the typed arrays case
// || 0, since memcpy sometimes copies uninitialized areas XXX: Investigate why initializing alloc'ed memory does not fix that too
{{{ makeCopyValues('dest', 'src', 'num', 'null', ' || 0') }}};
},
diff --git a/src/parseTools.js b/src/parseTools.js
index 34187a68..398c8b92 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -737,22 +737,47 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore) {
}
function makeSetValues(ptr, pos, value, type, num) {
- function safety() {
- return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + getFastValue(ptr, '+', pos) + '+$mspi$, ' + type + ', 1)' : '');
+ function safety(where) {
+ where = where || getFastValue(ptr, '+', pos) + '+$mspi$';
+ return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + where + ', ' + type + ', 1)' : '');
}
if (USE_TYPED_ARRAYS in set(0, 1)) {
return 'for (var $mspi$ = 0; $mspi$ < ' + num + '; $mspi$++) {\n' +
makeSetValue(ptr, getFastValue(pos, '+', '$mspi$'), value, type) + ';\n}';
} else { // USE_TYPED_ARRAYS == 2
+/*
return 'for (var $mspi$ = 0; $mspi$ < ' + num + '; $mspi$++) {\n' +
' HEAP8[' + getFastValue(ptr, '+', pos) + '+$mspi$] = ' + value + safety() + '\n}';
- // TODO: optimize this, setting 4 values at a time etc. like makeCopyValues
+*/
+ return '' +
+ 'var $dest$, $stop$, $stop4$, $fast$, $value4$;\n' +
+ '$dest$ = ' + getFastValue(ptr, '+', pos) + ';\n' +
+ '$stop$ = $dest$ + ' + num + ';\n' +
+ '$value4$ = ' + value + ';\n' +
+ 'if ($value4$ < 0) $value4$ += 256;\n' +
+ '$value4$ = $value4$ + ($value4$<<8) + ($value4$<<16) + ($value4$*16777216);\n' +
+ 'while ($dest$%4 !== 0 && $dest$ < $stop$) {\n' +
+ ' ' + safety('$dest$') + '; HEAP8[$dest$++] = ' + value + ';\n' +
+ '}\n' +
+ '$dest$ = $dest$ >> 2;\n' +
+ '$stop4$ = $stop$ >> 2;\n' +
+ 'while ($dest$ < $stop4$) {\n' +
+ safety('($dest$<<2)+0', '($src$<<2)+0') + ';' + safety('($dest$<<2)+1', '($src$<<2)+1') + ';' +
+ safety('($dest$<<2)+2', '($src$<<2)+2') + ';' + safety('($dest$<<2)+3', '($src$<<2)+3') + (SAFE_HEAP ? ';\n' : '') +
+ ' HEAP32[$dest$++] = $value4$;\n' + // this is the fast inner loop we try hard to stay in
+ '}\n' +
+ '$dest$ = $dest$ << 2;\n' +
+ 'while ($dest$ < $stop$) {\n' +
+ ' ' + safety('$dest$') + '; HEAP8[$dest$++] = ' + value + ';\n' +
+ '}'
}
}
function makeCopyValues(dest, src, num, type, modifier) {
- function safety() {
- return ';' + (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + dest + '+$mcpi$, ' + src + '+$mcpi$)' : '');
+ function safety(to, from) {
+ to = to || (dest + '+' + '$mcpi$');
+ from = from || (src + '+' + '$mcpi$');
+ return (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + to + ', ' + from + ')' : '');
}
if (USE_TYPED_ARRAYS in set(0, 1)) {
return 'for (var $mcpi$ = 0; $mcpi$ < ' + num + '; $mcpi$++) {\n' +
@@ -760,31 +785,38 @@ function makeCopyValues(dest, src, num, type, modifier) {
: // Null is special-cased: We copy over all heaps
makeGetSlabs(dest, 'null', true).map(function(slab) {
return slab + '[' + dest + '+$mcpi$]=' + slab + '[' + src + '+$mcpi$]'
- }).join('; ') + safety()
+ }).join('; ') + '; ' + safety()
) + '\n' + '}';
} else { // USE_TYPED_ARRAYS == 2
+/*
return 'for (var $mcpi$ = 0; $mcpi$ < ' + num + '; $mcpi$++) {\n' +
' HEAP8[' + dest + '+$mcpi$] = HEAP8[' + src + '+$mcpi$]; ' + safety() + ';\n' +
'}';
-/* TODO: rework something like this potential optimizing code
- if (isNumber(num) && num < 12) {
- } else { // num >= 12 or unknown
- }
- var stop = src + num;
- var fast = dest%4 === src%4;
- while (src%4 !== 0 && src < stop) {
- HEAP8[dest++] = HEAP8[src++];
- }
- while (src+4 <= stop && fast) {
- HEAP32[dest] = HEAP32[src];
- src += 4;
- dest += 4;
- }
- while (src < stop) {
- HEAP8[dest++] = HEAP8[src++];
- }
- }
*/
+ return '' +
+ 'var $src$, $dest$, $stop$, $stop4$, $fast$;\n' +
+ '$src$ = ' + src + ';\n' +
+ '$dest$ = ' + dest + ';\n' +
+ '$stop$ = $src$ + ' + num + ';\n' +
+ '$fast$ = ($dest$%4) === ($src$%4);\n' +
+ 'while ($src$%4 !== 0 && $src$ < $stop$) {\n' +
+ ' ' + safety('$dest$', '$src$') + '; HEAP8[$dest$++] = HEAP8[$src$++];\n' +
+ '}\n' +
+ 'if ($fast$) {\n' +
+ ' $src$ = $src$ >> 2;\n' +
+ ' $dest$ = $dest$ >> 2;\n' +
+ ' $stop4$ = $stop$ >> 2;\n' +
+ ' while ($src$ < $stop4$) {\n' +
+ safety('($dest$<<2)+0', '($src$<<2)+0') + ';' + safety('($dest$<<2)+1', '($src$<<2)+1') + ';' +
+ safety('($dest$<<2)+2', '($src$<<2)+2') + ';' + safety('($dest$<<2)+3', '($src$<<2)+3') + (SAFE_HEAP ? ';\n' : '') +
+ ' HEAP32[$dest$++] = HEAP32[$src$++];\n' + // this is the fast inner loop we try hard to stay in
+ ' }\n' +
+ ' $src$ = $src$ << 2;\n' +
+ ' $dest$ = $dest$ << 2;\n' +
+ '}\n' +
+ 'while ($src$ < $stop$) {\n' +
+ ' ' + safety('$dest$', '$src$') + '; HEAP8[$dest$++] = HEAP8[$src$++];\n' +
+ '}'
}
return null;
}
diff --git a/tests/runner.py b/tests/runner.py
index 2a7321b7..a7888238 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -2729,6 +2729,28 @@ else:
'''
self.do_benchmark(src, [], 'lastprime: 1297001.')
+ def test_memops(self):
+ src = '''
+ #include<stdio.h>
+ #include<string.h>
+ #include<stdlib.h>
+ int main() {
+ int N = 10*1024*1024;
+ int final = 0;
+ char *buf = (char*)malloc(N);
+ for (int t = 0; t < 20; t++) {
+ for (int i = 0; i < N; i++)
+ buf[i] = (i*i)%256;
+ memcpy(buf, buf+N/2, N/2);
+ for (int i = 0; i < N; i++)
+ final += buf[i] & 1;
+ }
+ printf("final: %d.\\n", final);
+ return 1;
+ }
+ '''
+ self.do_benchmark(src, [], 'final: 104857600.')
+
def test_fannkuch(self):
src = open(path_from_root('tests', 'fannkuch.cpp'), 'r').read()
self.do_benchmark(src, ['10'], 'Pfannkuchen(10) = 38.')