aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2012-01-08 17:08:28 -0800
committerAlon Zakai <alonzakai@gmail.com>2012-01-08 17:08:28 -0800
commitc64c1afb59280a80c11238ee24006c92b598ca8c (patch)
tree44eced2d3c702a73e8ab95d0251380f62c93db21
parentaf942f1c89bb5d35e3641da46c39c6f27b8ffce4 (diff)
support alignment in memset and memcpy in ta2
-rw-r--r--src/jsifier.js2
-rw-r--r--src/library.js104
-rw-r--r--src/library_sdl.js2
-rw-r--r--src/parseTools.js170
-rw-r--r--src/utility.js4
5 files changed, 152 insertions, 130 deletions
diff --git a/src/jsifier.js b/src/jsifier.js
index 8a75e49f..f8fa0e0f 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -520,7 +520,7 @@ function JSify(data, functionsOnly, givenFunctions) {
var type = removePointing(param.type);
var typeInfo = Types.types[type];
func.JS += ' var tempParam = ' + param.ident + '; ' + param.ident + ' = ' + RuntimeGenerator.stackAlloc(typeInfo.flatSize) + ';' +
- makeCopyValues(param.ident, 'tempParam', typeInfo.flatSize, 'null') + ';\n';
+ makeCopyValues(param.ident, 'tempParam', typeInfo.flatSize, 'null', null, 1) + ';\n';
}
});
diff --git a/src/library.js b/src/library.js
index fb36c4a5..5a77315c 100644
--- a/src/library.js
+++ b/src/library.js
@@ -3676,19 +3676,58 @@ LibraryManager.library = {
// FIXME: memcpy, memmove and memset should all return their destination pointers.
- memcpy__inline: function (dest, src, num, idunno) {
+ memcpy__inline: function (dest, src, num, align) {
var ret = '';
#if ASSERTIONS
ret += "assert(" + num + " % 1 === 0, 'memcpy given ' + " + num + " + ' bytes to copy. Problem with quantum=1 corrections perhaps?');";
#endif
- ret += makeCopyValues(dest, src, num, 'null');
+ ret += makeCopyValues(dest, src, num, 'null', null, align);
return ret;
},
- memcpy: function (dest, src, num, idunno) {
+ memcpy: function (dest, src, num, align) {
#if ASSERTIONS
assert(num % 1 === 0, 'memcpy given ' + num + ' bytes to copy. Problem with quantum=1 corrections perhaps?');
#endif
- {{{ makeCopyValues('dest', 'src', 'num', 'null') }}};
+#if USE_TYPED_ARRAYS == 2
+ if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}} && src % 2 == dest % 2) {
+ // This is unaligned, but quite large, and potentially alignable, so work hard to get to aligned settings
+ if (src % 4 == dest % 4) {
+ var stop = src + num;
+ while (src % 4) { // no need to check for stop, since we have large num
+ HEAP8[dest++] = HEAP8[src++];
+ }
+ var src4 = src >> 2, dest4 = dest >> 2, stop4 = stop >> 2;
+ while (src4 < stop4) {
+ HEAP32[dest4++] = HEAP32[src4++];
+ }
+ src = src4 << 2;
+ dest = dest4 << 2;
+ while (src < stop) {
+ HEAP8[dest++] = HEAP8[src++];
+ }
+ } else {
+ var stop = src + num;
+ if (src % 2) { // no need to check for stop, since we have large num
+ HEAP8[dest++] = HEAP8[src++];
+ }
+ var src2 = src >> 1, dest2 = dest >> 1, stop2 = stop >> 1;
+ while (src2 < stop2) {
+ HEAP16[dest2++] = HEAP16[src2++];
+ }
+ src = src2 << 1;
+ dest = dest2 << 1;
+ if (src < stop) {
+ HEAP8[dest++] = HEAP8[src++];
+ }
+ }
+ } else {
+ while (num--) {
+ HEAP8[dest++] = HEAP8[src++];
+ }
+ }
+#else
+ {{{ makeCopyValues('dest', 'src', 'num', 'null', null, 'align') }}};
+#endif
},
llvm_memcpy_i32: 'memcpy',
@@ -3696,7 +3735,8 @@ LibraryManager.library = {
llvm_memcpy_p0i8_p0i8_i32: 'memcpy',
llvm_memcpy_p0i8_p0i8_i64: 'memcpy',
- memmove: function(dest, src, num, idunno) {
+ memmove__deps: ['memcpy'],
+ memmove: function(dest, src, num, align) {
if (src < dest && dest < src + num) {
// Copy backwards in a safe manner
src += num;
@@ -3704,10 +3744,10 @@ LibraryManager.library = {
while (num--) {
dest--;
src--;
- {{{ makeCopyValues('dest', 'src', 1, 'null') }}};
+ {{{ makeCopyValues('dest', 'src', 1, 'null', null, 1) }}};
}
} else {
- {{{ makeCopyValues('dest', 'src', 'num', 'null') }}};
+ _memcpy(dest, src, num, align);
}
},
llvm_memmove_i32: 'memmove',
@@ -3715,11 +3755,35 @@ LibraryManager.library = {
llvm_memmove_p0i8_p0i8_i32: 'memmove',
llvm_memmove_p0i8_p0i8_i64: 'memmove',
- memset__inline: function(ptr, value, num) {
- return makeSetValues(ptr, 0, value, 'null', num);
- },
- memset: function(ptr, value, num) {
- {{{ makeSetValues('ptr', '0', 'value', 'null', 'num') }}}
+ memset__inline: function(ptr, value, num, align) {
+ return makeSetValues(ptr, 0, value, 'null', num, align);
+ },
+ memset: function(ptr, value, num, align) {
+#if USE_TYPED_ARRAYS == 2
+ // TODO: make these settings, and in memcpy, {{'s
+ if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) {
+ // This is unaligned, but quite large, so work hard to get to aligned settings
+ var stop = ptr + num;
+ while (ptr % 4) { // no need to check for stop, since we have large num
+ HEAP8[ptr++] = value;
+ }
+ if (value < 0) value += 256; // make it unsigned
+ var ptr4 = ptr >> 2, stop4 = stop >> 2, value4 = value | (value << 8) | (value << 16) | (value << 24);
+ while (ptr4 < stop4) {
+ HEAP32[ptr4++] = value4;
+ }
+ ptr = ptr4 << 2;
+ while (ptr < stop) {
+ HEAP8[ptr++] = value;
+ }
+ } else {
+ while (num--) {
+ HEAP8[ptr++] = value;
+ }
+ }
+#else
+ {{{ makeSetValues('ptr', '0', 'value', 'null', 'num', 'align') }}};
+#endif
},
llvm_memset_i32: 'memset',
llvm_memset_p0i8_i32: 'memset',
@@ -3753,7 +3817,7 @@ LibraryManager.library = {
strcpy: function(pdest, psrc) {
var i = 0;
do {
- {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8') }}}
+ {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8', null, 1) }}};
i ++;
} while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0);
return pdest;
@@ -3761,7 +3825,7 @@ LibraryManager.library = {
stpcpy: function(pdest, psrc) {
var i = 0;
do {
- {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8') }}}
+ {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8', null, 1) }}};
i ++;
} while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0);
return pdest + i - 1;
@@ -3782,7 +3846,7 @@ LibraryManager.library = {
var len = _strlen(pdest);
var i = 0;
do {
- {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8') }}}
+ {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8', null, 1) }}};
i ++;
} while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0);
return pdest;
@@ -3793,7 +3857,7 @@ LibraryManager.library = {
var len = _strlen(pdest);
var i = 0;
while(1) {
- {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8') }}}
+ {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8', null, 1) }}};
if ({{{ makeGetValue('pdest', 'len+i', 'i8') }}} == 0) break;
i ++;
if (i == num) {
@@ -3901,7 +3965,7 @@ LibraryManager.library = {
strdup: function(ptr) {
var len = String_len(ptr);
var newStr = _malloc(len + 1);
- {{{ makeCopyValues('newStr', 'ptr', 'len', 'null') }}};
+ {{{ makeCopyValues('newStr', 'ptr', 'len', 'null', null, 1) }}};
{{{ makeSetValue('newStr', 'len', '0', 'i8') }}};
return newStr;
},
@@ -4161,7 +4225,7 @@ LibraryManager.library = {
llvm_va_end: function() {},
llvm_va_copy: function(ppdest, ppsrc) {
- {{{ makeCopyValues('ppdest', 'ppsrc', Runtime.QUANTUM_SIZE, 'null') }}}
+ {{{ makeCopyValues('ppdest', 'ppsrc', Runtime.QUANTUM_SIZE, 'null', null, 1) }}};
/* Alternate implementation that copies the actual DATA; it assumes the va_list is prefixed by its size
var psrc = IHEAP[ppsrc]-1;
var num = IHEAP[psrc]; // right before the data, is the number of (flattened) values
@@ -5079,13 +5143,13 @@ LibraryManager.library = {
// ==========================================================================
__tms_struct_layout: Runtime.generateStructInfo(null, '%struct.tms'),
- times__deps: ['__tms_struct_layout'],
+ times__deps: ['__tms_struct_layout', 'memset'],
times: function(buffer) {
// clock_t times(struct tms *buffer);
// http://pubs.opengroup.org/onlinepubs/009695399/functions/times.html
// NOTE: This is fake, since we can't calculate real CPU time usage in JS.
if (buffer !== 0) {
- {{{ makeSetValues('buffer', '0', '0', 'null', '___tms_struct_layout.__size__') }}}
+ memset(buffer, 0, ___tms_struct_layout.__size__);
}
return 0;
},
diff --git a/src/library_sdl.js b/src/library_sdl.js
index 5797ccec..a2028a7a 100644
--- a/src/library_sdl.js
+++ b/src/library_sdl.js
@@ -387,7 +387,7 @@ mergeInto(LibraryManager.library, {
var srcData = SDL.surfaces[src];
var dstData = SDL.surfaces[dst];
assert(srcData.width === dstData.width && srcData.height === dstData.height);
- {{{ makeCopyValues('dstData.buffer', 'srcData.buffer', 'srcData.width*srcData.height*4', 'i8') }}}
+ {{{ makeCopyValues('dstData.buffer', 'srcData.buffer', 'srcData.width*srcData.height*4', 'i8', null, 1) }}}
return 0;
},
diff --git a/src/parseTools.js b/src/parseTools.js
index c0a1879a..719fe458 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -1030,130 +1030,84 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe)
}
}
+var SEEK_OPTIMAL_ALIGN_MIN = 40;
var UNROLL_LOOP_MAX = 5;
-var ZERO_ONE = set(0, 1);
-
-function makeSetValues(ptr, pos, value, type, num) {
- function safety(where) {
- where = where || getFastValue(ptr, '+', pos) + '+mspi';
- return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + where + ', ' + type + ', 1)' : '');
+function makeSetValues(ptr, pos, value, type, num, align) {
+ function unroll(type, num, jump) {
+ jump = jump || 1;
+ return range(num).map(function(i) {
+ return makeSetValue(ptr, getFastValue(pos, '+', i*jump), value, type);
+ }).join('; ');
}
- if (USE_TYPED_ARRAYS in ZERO_ONE) {
- if (isNumber(num)) {
- if (parseInt(num) <= UNROLL_LOOP_MAX) {
- return range(num).map(function(i) {
- return makeSetValue(ptr, getFastValue(pos, '+', i), value, type);
- }).join('; ');
- }
+ if (USE_TYPED_ARRAYS <= 1) {
+ if (isNumber(num) && parseInt(num) <= UNROLL_LOOP_MAX) {
+ return unroll(type, num);
}
- return 'for (var mspi = 0; mspi < ' + num + '; mspi++) {\n' +
- makeSetValue(ptr, getFastValue(pos, '+', 'mspi'), value, type) + '\n}';
+ return 'for (var $$i = 0; $$i < ' + num + '; $$i++) {\n' +
+ makeSetValue(ptr, getFastValue(pos, '+', '$$i'), value, type) + '\n}';
} else { // USE_TYPED_ARRAYS == 2
-/*
- return 'for (var mspi = 0; mspi < ' + num + '; mspi++) {\n' +
- ' HEAP8[' + getFastValue(ptr, '+', pos) + '+mspi] = ' + value + safety() + '\n}';
-*/
- return '' +
- 'var dest_, stop_, stop4_, fast_, value_;\n' +
- 'dest_ = ' + getFastValue(ptr, '+', pos) + ';\n' +
- 'stop_ = dest_ + ' + num + ';\n' +
- 'value_ = ' + value + ';\n' +
- 'if (value_ < 0) value_ += 256;\n' +
- 'value_ = value_ + (value_<<8) + (value_<<16) + (value_*16777216);\n' +
- 'while (dest_%4 !== 0 && dest_ < stop_) {\n' +
- ' ' + safety('dest_') + '; HEAP8[dest_++] = ' + value + ';\n' +
- '}\n' +
- 'dest_ >>= 2;\n' +
- 'stop4_ = stop_ >> 2;\n' +
- 'while (dest_ < stop4_) {\n' +
- safety('(dest_<<2)+0', '(src_<<2)+0') + ';' + safety('(dest_<<2)+1', '(src_<<2)+1') + ';' +
- safety('(dest_<<2)+2', '(src_<<2)+2') + ';' + safety('(dest_<<2)+3', '(src_<<2)+3') + (SAFE_HEAP ? ';\n' : '') +
- ' HEAP32[dest_++] = value_;\n' + // this is the fast inner loop we try hard to stay in
- '}\n' +
- 'dest_ <<= 2;\n' +
- 'while (dest_ < stop_) {\n' +
- ' ' + safety('dest_') + '; HEAP8[dest_++] = ' + value + ';\n' +
- '}'
+ // If we don't know how to handle this at compile-time, or handling it is best done in a large amount of code, call memset
+ if (!isNumber(num) || (align < 4 && parseInt(num) >= SEEK_OPTIMAL_ALIGN_MIN)) {
+ return '_memset(' + getFastValue(ptr, '+', pos) + ', ' + value + ', ' + num + ', ' + align + ')';
+ }
+ num = parseInt(num);
+ var ret = [];
+ [4, 2, 1].forEach(function(possibleAlign) {
+ if (num == 0) return;
+ if (align >= possibleAlign) {
+ if (num <= UNROLL_LOOP_MAX*possibleAlign) {
+ ret.push(unroll('i' + (possibleAlign*8), Math.floor(num/possibleAlign), possibleAlign));
+ } else {
+ ret.push('for (var $$i = 0, $$base = ' + getFastValue(ptr, '+', pos) + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') +
+ '; $$i < ' + Math.floor(num/possibleAlign) + '; $$i++) {\n' +
+ ' HEAP' + (possibleAlign*8) + '[$$base+$$i] = ' + value + '\n}');
+ }
+ num %= possibleAlign;
+ }
+ });
+ return ret.join('; ');
}
}
var TYPED_ARRAY_SET_MIN = Infinity; // .set() as memcpy seems to just slow us down
-function makeCopyValues(dest, src, num, type, modifier) {
- function safety(to, from) {
- to = to || (dest + '+' + 'mcpi');
- from = from || (src + '+' + 'mcpi');
- return (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + to + ', ' + from + ')' : '');
+function makeCopyValues(dest, src, num, type, modifier, align) {
+ function unroll(type, num, jump) {
+ jump = jump || 1;
+ return range(num).map(function(i) {
+ return makeSetValue(dest, i*jump, makeGetValue(src, i*jump, type), type);
+ }).join('; ');
}
if (USE_TYPED_ARRAYS <= 1) {
- if (isNumber(num)) {
- if (parseInt(num) <= UNROLL_LOOP_MAX) {
- return range(num).map(function(i) {
- return type !== 'null' ? makeSetValue(dest, i, makeGetValue(src, i, type) + (modifier || ''), type)
- : // Null is special-cased: We copy over all heaps
- makeGetSlabs(dest, 'null', true).map(function(slab) {
- return slab + '[' + getFastValue(dest, '+', i) + ']=' + slab + '[' + getFastValue(src, '+', i) + ']';
- }).join('; ') + '; ' + safety(dest + '+' + i, src + '+' + i)
- }).join('; ');
- }
- }
- if (SAFE_HEAP) {
- return 'for (var mcpi = 0; mcpi < ' + num + '; mcpi++) {\n' +
- (type !== 'null' ? makeSetValue(dest, 'mcpi', makeGetValue(src, 'mcpi', type) + (modifier || ''), type)
- : // Null is special-cased: We copy over all heaps
- makeGetSlabs(dest, 'null', true).map(function(slab) {
- return slab + '[' + dest + '+mcpi]=' + slab + '[' + src + '+mcpi]'
- }).join('; ') + '; ' + safety()
- ) + '\n' + '}';
- }
- if (USE_TYPED_ARRAYS == 0) {
- return 'for (var mcpi_s=' + src + ',mcpi_e=' + src + '+' + num + ',mcpi_d=' + dest + '; mcpi_s<mcpi_e; mcpi_s++, mcpi_d++) {\n' +
- ' HEAP[mcpi_d] = HEAP[mcpi_s];\n' +
- '}';
- } else { // USE_TYPED_ARRAYS == 1
- if (isNumber(num) && parseInt(num) >= TYPED_ARRAY_SET_MIN) {
- return 'IHEAP.set(IHEAP.subarray(' + src + ',' + src + '+' + num + '), ' + dest + '); ' +
- 'FHEAP.set(FHEAP.subarray(' + src + ',' + src + '+' + num + '), ' + dest + ')';
- }
- return 'for (var mcpi_s=' + src + ',mcpi_e=' + src + '+' + num + ',mcpi_d=' + dest + '; mcpi_s<mcpi_e; mcpi_s++, mcpi_d++) {\n' +
- ' IHEAP[mcpi_d] = IHEAP[mcpi_s];' + (USE_FHEAP ? ' FHEAP[mcpi_d] = FHEAP[mcpi_s];' : '') + '\n' +
- '}';
+ if (isNumber(num) && parseInt(num) <= UNROLL_LOOP_MAX) {
+ return unroll(type, num);
}
+ return 'for (var $$i = 0; $$i < ' + num + '; $$i++) {\n' +
+ makeSetValue(dest, '$$i', makeGetValue(src, i*jump, type), type) + '\n}';
} else { // USE_TYPED_ARRAYS == 2
- // TODO: optimize, add unrolling, etc.
- var ret = '' +
- 'var src_, dest_, stop_, stop4_;\n' +
- 'src_ = ' + src + ';\n' +
- 'dest_ = ' + dest + ';\n' +
- 'stop_ = src_ + ' + num + ';\n' +
- 'if ((dest_%4) == (src_%4) && ' + num + ' > 8) {\n' +
- ' while (src_%4 !== 0 && src_ < stop_) {\n' +
- ' ' + safety('dest_', 'src_') + '; HEAP8[dest_++] = HEAP8[src_++];\n' +
- ' }\n';
- if (SAFE_HEAP || !(isNumber(num) && parseInt(num) >= TYPED_ARRAY_SET_MIN)) {
- ret += ' src_ >>= 2;\n' +
- ' dest_ >>= 2;\n' +
- ' stop4_ = stop_ >> 2;\n' +
- ' while (src_ < stop4_) {\n' +
- safety('(dest_<<2)+0', '(src_<<2)+0') + ';' + safety('(dest_<<2)+1', '(src_<<2)+1') + ';' +
- safety('(dest_<<2)+2', '(src_<<2)+2') + ';' + safety('(dest_<<2)+3', '(src_<<2)+3') + (SAFE_HEAP ? ';\n' : '') +
- ' HEAP32[dest_++] = HEAP32[src_++];\n' +
- ' }\n' +
- ' src_ <<= 2;\n' +
- ' dest_ <<= 2;\n';
- } else {
- ret += ' var src4_ = src_ >> 2, stop4_ = stop_ >> 2, num4_ = (stop4_ - src4_) << 2;\n' +
- ' HEAP32.set(HEAP32.subarray(src4_, stop4_), dest_ >> 2);\n' +
- ' src_ += num4_; dest_ += num4_;\n';
+ // If we don't know how to handle this at compile-time, or handling it is best done in a large amount of code, call memset
+ if (!isNumber(num) || (align < 4 && parseInt(num) >= SEEK_OPTIMAL_ALIGN_MIN)) {
+ return '_memcpy(' + dest + ', ' + src + ', ' + num + ', ' + align + ')';
}
- ret += '}' +
- 'while (src_ < stop_) {\n' +
- ' ' + safety('dest_', 'src_') + '; HEAP8[dest_++] = HEAP8[src_++];\n' +
- '}';
- return ret;
+ num = parseInt(num);
+ var ret = [];
+ [4, 2, 1].forEach(function(possibleAlign) {
+ if (num == 0) return;
+ if (align >= possibleAlign) {
+ if (num <= UNROLL_LOOP_MAX*possibleAlign) {
+ ret.push(unroll('i' + (possibleAlign*8), Math.floor(num/possibleAlign), possibleAlign));
+ } else {
+ ret.push('for (var $$i = 0, $$src = ' + src + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') + ', ' +
+ '$$dest = ' + dest + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') + '; ' +
+ '$$i < ' + Math.floor(num/possibleAlign) + '; $$i++) {\n' +
+ ' HEAP' + (possibleAlign*8) + '[$$dest+$$i] = HEAP' + (possibleAlign*8) + '[$$src+$$i]\n}');
+ }
+ num %= possibleAlign;
+ }
+ });
+ return ret.join('; ');
}
- return null;
}
var PLUS_MUL = set('+', '*');
diff --git a/src/utility.js b/src/utility.js
index 40b16a97..e49c8509 100644
--- a/src/utility.js
+++ b/src/utility.js
@@ -302,3 +302,7 @@ function sleep(secs) {
while (Date.now() - start < secs*1000) {};
}
+function log2(x) {
+ return Math.log(x)/Math.LN2;
+}
+