diff options
author | Alon Zakai <alonzakai@gmail.com> | 2012-01-08 17:08:28 -0800 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2012-01-08 17:08:28 -0800 |
commit | c64c1afb59280a80c11238ee24006c92b598ca8c (patch) | |
tree | 44eced2d3c702a73e8ab95d0251380f62c93db21 | |
parent | af942f1c89bb5d35e3641da46c39c6f27b8ffce4 (diff) |
support alignment in memset and memcpy in ta2
-rw-r--r-- | src/jsifier.js | 2 | ||||
-rw-r--r-- | src/library.js | 104 | ||||
-rw-r--r-- | src/library_sdl.js | 2 | ||||
-rw-r--r-- | src/parseTools.js | 170 | ||||
-rw-r--r-- | src/utility.js | 4 |
5 files changed, 152 insertions, 130 deletions
diff --git a/src/jsifier.js b/src/jsifier.js index 8a75e49f..f8fa0e0f 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -520,7 +520,7 @@ function JSify(data, functionsOnly, givenFunctions) { var type = removePointing(param.type); var typeInfo = Types.types[type]; func.JS += ' var tempParam = ' + param.ident + '; ' + param.ident + ' = ' + RuntimeGenerator.stackAlloc(typeInfo.flatSize) + ';' + - makeCopyValues(param.ident, 'tempParam', typeInfo.flatSize, 'null') + ';\n'; + makeCopyValues(param.ident, 'tempParam', typeInfo.flatSize, 'null', null, 1) + ';\n'; } }); diff --git a/src/library.js b/src/library.js index fb36c4a5..5a77315c 100644 --- a/src/library.js +++ b/src/library.js @@ -3676,19 +3676,58 @@ LibraryManager.library = { // FIXME: memcpy, memmove and memset should all return their destination pointers. - memcpy__inline: function (dest, src, num, idunno) { + memcpy__inline: function (dest, src, num, align) { var ret = ''; #if ASSERTIONS ret += "assert(" + num + " % 1 === 0, 'memcpy given ' + " + num + " + ' bytes to copy. Problem with quantum=1 corrections perhaps?');"; #endif - ret += makeCopyValues(dest, src, num, 'null'); + ret += makeCopyValues(dest, src, num, 'null', null, align); return ret; }, - memcpy: function (dest, src, num, idunno) { + memcpy: function (dest, src, num, align) { #if ASSERTIONS assert(num % 1 === 0, 'memcpy given ' + num + ' bytes to copy. Problem with quantum=1 corrections perhaps?'); #endif - {{{ makeCopyValues('dest', 'src', 'num', 'null') }}}; +#if USE_TYPED_ARRAYS == 2 + if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}} && src % 2 == dest % 2) { + // This is unaligned, but quite large, and potentially alignable, so work hard to get to aligned settings + if (src % 4 == dest % 4) { + var stop = src + num; + while (src % 4) { // no need to check for stop, since we have large num + HEAP8[dest++] = HEAP8[src++]; + } + var src4 = src >> 2, dest4 = dest >> 2, stop4 = stop >> 2; + while (src4 < stop4) { + HEAP32[dest4++] = HEAP32[src4++]; + } + src = src4 << 2; + dest = dest4 << 2; + while (src < stop) { + HEAP8[dest++] = HEAP8[src++]; + } + } else { + var stop = src + num; + if (src % 2) { // no need to check for stop, since we have large num + HEAP8[dest++] = HEAP8[src++]; + } + var src2 = src >> 1, dest2 = dest >> 1, stop2 = stop >> 1; + while (src2 < stop2) { + HEAP16[dest2++] = HEAP16[src2++]; + } + src = src2 << 1; + dest = dest2 << 1; + if (src < stop) { + HEAP8[dest++] = HEAP8[src++]; + } + } + } else { + while (num--) { + HEAP8[dest++] = HEAP8[src++]; + } + } +#else + {{{ makeCopyValues('dest', 'src', 'num', 'null', null, 'align') }}}; +#endif }, llvm_memcpy_i32: 'memcpy', @@ -3696,7 +3735,8 @@ LibraryManager.library = { llvm_memcpy_p0i8_p0i8_i32: 'memcpy', llvm_memcpy_p0i8_p0i8_i64: 'memcpy', - memmove: function(dest, src, num, idunno) { + memmove__deps: ['memcpy'], + memmove: function(dest, src, num, align) { if (src < dest && dest < src + num) { // Copy backwards in a safe manner src += num; @@ -3704,10 +3744,10 @@ LibraryManager.library = { while (num--) { dest--; src--; - {{{ makeCopyValues('dest', 'src', 1, 'null') }}}; + {{{ makeCopyValues('dest', 'src', 1, 'null', null, 1) }}}; } } else { - {{{ makeCopyValues('dest', 'src', 'num', 'null') }}}; + _memcpy(dest, src, num, align); } }, llvm_memmove_i32: 'memmove', @@ -3715,11 +3755,35 @@ LibraryManager.library = { llvm_memmove_p0i8_p0i8_i32: 'memmove', llvm_memmove_p0i8_p0i8_i64: 'memmove', - memset__inline: function(ptr, value, num) { - return makeSetValues(ptr, 0, value, 'null', num); - }, - memset: function(ptr, value, num) { - {{{ makeSetValues('ptr', '0', 'value', 'null', 'num') }}} + memset__inline: function(ptr, value, num, align) { + return makeSetValues(ptr, 0, value, 'null', num, align); + }, + memset: function(ptr, value, num, align) { +#if USE_TYPED_ARRAYS == 2 + // TODO: make these settings, and in memcpy, {{'s + if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) { + // This is unaligned, but quite large, so work hard to get to aligned settings + var stop = ptr + num; + while (ptr % 4) { // no need to check for stop, since we have large num + HEAP8[ptr++] = value; + } + if (value < 0) value += 256; // make it unsigned + var ptr4 = ptr >> 2, stop4 = stop >> 2, value4 = value | (value << 8) | (value << 16) | (value << 24); + while (ptr4 < stop4) { + HEAP32[ptr4++] = value4; + } + ptr = ptr4 << 2; + while (ptr < stop) { + HEAP8[ptr++] = value; + } + } else { + while (num--) { + HEAP8[ptr++] = value; + } + } +#else + {{{ makeSetValues('ptr', '0', 'value', 'null', 'num', 'align') }}}; +#endif }, llvm_memset_i32: 'memset', llvm_memset_p0i8_i32: 'memset', @@ -3753,7 +3817,7 @@ LibraryManager.library = { strcpy: function(pdest, psrc) { var i = 0; do { - {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8') }}} + {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8', null, 1) }}}; i ++; } while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0); return pdest; @@ -3761,7 +3825,7 @@ LibraryManager.library = { stpcpy: function(pdest, psrc) { var i = 0; do { - {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8') }}} + {{{ makeCopyValues('pdest+i', 'psrc+i', 1, 'i8', null, 1) }}}; i ++; } while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0); return pdest + i - 1; @@ -3782,7 +3846,7 @@ LibraryManager.library = { var len = _strlen(pdest); var i = 0; do { - {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8') }}} + {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8', null, 1) }}}; i ++; } while ({{{ makeGetValue('psrc', 'i-1', 'i8') }}} != 0); return pdest; @@ -3793,7 +3857,7 @@ LibraryManager.library = { var len = _strlen(pdest); var i = 0; while(1) { - {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8') }}} + {{{ makeCopyValues('pdest+len+i', 'psrc+i', 1, 'i8', null, 1) }}}; if ({{{ makeGetValue('pdest', 'len+i', 'i8') }}} == 0) break; i ++; if (i == num) { @@ -3901,7 +3965,7 @@ LibraryManager.library = { strdup: function(ptr) { var len = String_len(ptr); var newStr = _malloc(len + 1); - {{{ makeCopyValues('newStr', 'ptr', 'len', 'null') }}}; + {{{ makeCopyValues('newStr', 'ptr', 'len', 'null', null, 1) }}}; {{{ makeSetValue('newStr', 'len', '0', 'i8') }}}; return newStr; }, @@ -4161,7 +4225,7 @@ LibraryManager.library = { llvm_va_end: function() {}, llvm_va_copy: function(ppdest, ppsrc) { - {{{ makeCopyValues('ppdest', 'ppsrc', Runtime.QUANTUM_SIZE, 'null') }}} + {{{ makeCopyValues('ppdest', 'ppsrc', Runtime.QUANTUM_SIZE, 'null', null, 1) }}}; /* Alternate implementation that copies the actual DATA; it assumes the va_list is prefixed by its size var psrc = IHEAP[ppsrc]-1; var num = IHEAP[psrc]; // right before the data, is the number of (flattened) values @@ -5079,13 +5143,13 @@ LibraryManager.library = { // ========================================================================== __tms_struct_layout: Runtime.generateStructInfo(null, '%struct.tms'), - times__deps: ['__tms_struct_layout'], + times__deps: ['__tms_struct_layout', 'memset'], times: function(buffer) { // clock_t times(struct tms *buffer); // http://pubs.opengroup.org/onlinepubs/009695399/functions/times.html // NOTE: This is fake, since we can't calculate real CPU time usage in JS. if (buffer !== 0) { - {{{ makeSetValues('buffer', '0', '0', 'null', '___tms_struct_layout.__size__') }}} + memset(buffer, 0, ___tms_struct_layout.__size__); } return 0; }, diff --git a/src/library_sdl.js b/src/library_sdl.js index 5797ccec..a2028a7a 100644 --- a/src/library_sdl.js +++ b/src/library_sdl.js @@ -387,7 +387,7 @@ mergeInto(LibraryManager.library, { var srcData = SDL.surfaces[src]; var dstData = SDL.surfaces[dst]; assert(srcData.width === dstData.width && srcData.height === dstData.height); - {{{ makeCopyValues('dstData.buffer', 'srcData.buffer', 'srcData.width*srcData.height*4', 'i8') }}} + {{{ makeCopyValues('dstData.buffer', 'srcData.buffer', 'srcData.width*srcData.height*4', 'i8', null, 1) }}} return 0; }, diff --git a/src/parseTools.js b/src/parseTools.js index c0a1879a..719fe458 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -1030,130 +1030,84 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe) } } +var SEEK_OPTIMAL_ALIGN_MIN = 40; var UNROLL_LOOP_MAX = 5; -var ZERO_ONE = set(0, 1); - -function makeSetValues(ptr, pos, value, type, num) { - function safety(where) { - where = where || getFastValue(ptr, '+', pos) + '+mspi'; - return ';' + (SAFE_HEAP ? 'SAFE_HEAP_ACCESS(' + where + ', ' + type + ', 1)' : ''); +function makeSetValues(ptr, pos, value, type, num, align) { + function unroll(type, num, jump) { + jump = jump || 1; + return range(num).map(function(i) { + return makeSetValue(ptr, getFastValue(pos, '+', i*jump), value, type); + }).join('; '); } - if (USE_TYPED_ARRAYS in ZERO_ONE) { - if (isNumber(num)) { - if (parseInt(num) <= UNROLL_LOOP_MAX) { - return range(num).map(function(i) { - return makeSetValue(ptr, getFastValue(pos, '+', i), value, type); - }).join('; '); - } + if (USE_TYPED_ARRAYS <= 1) { + if (isNumber(num) && parseInt(num) <= UNROLL_LOOP_MAX) { + return unroll(type, num); } - return 'for (var mspi = 0; mspi < ' + num + '; mspi++) {\n' + - makeSetValue(ptr, getFastValue(pos, '+', 'mspi'), value, type) + '\n}'; + return 'for (var $$i = 0; $$i < ' + num + '; $$i++) {\n' + + makeSetValue(ptr, getFastValue(pos, '+', '$$i'), value, type) + '\n}'; } else { // USE_TYPED_ARRAYS == 2 -/* - return 'for (var mspi = 0; mspi < ' + num + '; mspi++) {\n' + - ' HEAP8[' + getFastValue(ptr, '+', pos) + '+mspi] = ' + value + safety() + '\n}'; -*/ - return '' + - 'var dest_, stop_, stop4_, fast_, value_;\n' + - 'dest_ = ' + getFastValue(ptr, '+', pos) + ';\n' + - 'stop_ = dest_ + ' + num + ';\n' + - 'value_ = ' + value + ';\n' + - 'if (value_ < 0) value_ += 256;\n' + - 'value_ = value_ + (value_<<8) + (value_<<16) + (value_*16777216);\n' + - 'while (dest_%4 !== 0 && dest_ < stop_) {\n' + - ' ' + safety('dest_') + '; HEAP8[dest_++] = ' + value + ';\n' + - '}\n' + - 'dest_ >>= 2;\n' + - 'stop4_ = stop_ >> 2;\n' + - 'while (dest_ < stop4_) {\n' + - safety('(dest_<<2)+0', '(src_<<2)+0') + ';' + safety('(dest_<<2)+1', '(src_<<2)+1') + ';' + - safety('(dest_<<2)+2', '(src_<<2)+2') + ';' + safety('(dest_<<2)+3', '(src_<<2)+3') + (SAFE_HEAP ? ';\n' : '') + - ' HEAP32[dest_++] = value_;\n' + // this is the fast inner loop we try hard to stay in - '}\n' + - 'dest_ <<= 2;\n' + - 'while (dest_ < stop_) {\n' + - ' ' + safety('dest_') + '; HEAP8[dest_++] = ' + value + ';\n' + - '}' + // If we don't know how to handle this at compile-time, or handling it is best done in a large amount of code, call memset + if (!isNumber(num) || (align < 4 && parseInt(num) >= SEEK_OPTIMAL_ALIGN_MIN)) { + return '_memset(' + getFastValue(ptr, '+', pos) + ', ' + value + ', ' + num + ', ' + align + ')'; + } + num = parseInt(num); + var ret = []; + [4, 2, 1].forEach(function(possibleAlign) { + if (num == 0) return; + if (align >= possibleAlign) { + if (num <= UNROLL_LOOP_MAX*possibleAlign) { + ret.push(unroll('i' + (possibleAlign*8), Math.floor(num/possibleAlign), possibleAlign)); + } else { + ret.push('for (var $$i = 0, $$base = ' + getFastValue(ptr, '+', pos) + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') + + '; $$i < ' + Math.floor(num/possibleAlign) + '; $$i++) {\n' + + ' HEAP' + (possibleAlign*8) + '[$$base+$$i] = ' + value + '\n}'); + } + num %= possibleAlign; + } + }); + return ret.join('; '); } } var TYPED_ARRAY_SET_MIN = Infinity; // .set() as memcpy seems to just slow us down -function makeCopyValues(dest, src, num, type, modifier) { - function safety(to, from) { - to = to || (dest + '+' + 'mcpi'); - from = from || (src + '+' + 'mcpi'); - return (SAFE_HEAP ? 'SAFE_HEAP_COPY_HISTORY(' + to + ', ' + from + ')' : ''); +function makeCopyValues(dest, src, num, type, modifier, align) { + function unroll(type, num, jump) { + jump = jump || 1; + return range(num).map(function(i) { + return makeSetValue(dest, i*jump, makeGetValue(src, i*jump, type), type); + }).join('; '); } if (USE_TYPED_ARRAYS <= 1) { - if (isNumber(num)) { - if (parseInt(num) <= UNROLL_LOOP_MAX) { - return range(num).map(function(i) { - return type !== 'null' ? makeSetValue(dest, i, makeGetValue(src, i, type) + (modifier || ''), type) - : // Null is special-cased: We copy over all heaps - makeGetSlabs(dest, 'null', true).map(function(slab) { - return slab + '[' + getFastValue(dest, '+', i) + ']=' + slab + '[' + getFastValue(src, '+', i) + ']'; - }).join('; ') + '; ' + safety(dest + '+' + i, src + '+' + i) - }).join('; '); - } - } - if (SAFE_HEAP) { - return 'for (var mcpi = 0; mcpi < ' + num + '; mcpi++) {\n' + - (type !== 'null' ? makeSetValue(dest, 'mcpi', makeGetValue(src, 'mcpi', type) + (modifier || ''), type) - : // Null is special-cased: We copy over all heaps - makeGetSlabs(dest, 'null', true).map(function(slab) { - return slab + '[' + dest + '+mcpi]=' + slab + '[' + src + '+mcpi]' - }).join('; ') + '; ' + safety() - ) + '\n' + '}'; - } - if (USE_TYPED_ARRAYS == 0) { - return 'for (var mcpi_s=' + src + ',mcpi_e=' + src + '+' + num + ',mcpi_d=' + dest + '; mcpi_s<mcpi_e; mcpi_s++, mcpi_d++) {\n' + - ' HEAP[mcpi_d] = HEAP[mcpi_s];\n' + - '}'; - } else { // USE_TYPED_ARRAYS == 1 - if (isNumber(num) && parseInt(num) >= TYPED_ARRAY_SET_MIN) { - return 'IHEAP.set(IHEAP.subarray(' + src + ',' + src + '+' + num + '), ' + dest + '); ' + - 'FHEAP.set(FHEAP.subarray(' + src + ',' + src + '+' + num + '), ' + dest + ')'; - } - return 'for (var mcpi_s=' + src + ',mcpi_e=' + src + '+' + num + ',mcpi_d=' + dest + '; mcpi_s<mcpi_e; mcpi_s++, mcpi_d++) {\n' + - ' IHEAP[mcpi_d] = IHEAP[mcpi_s];' + (USE_FHEAP ? ' FHEAP[mcpi_d] = FHEAP[mcpi_s];' : '') + '\n' + - '}'; + if (isNumber(num) && parseInt(num) <= UNROLL_LOOP_MAX) { + return unroll(type, num); } + return 'for (var $$i = 0; $$i < ' + num + '; $$i++) {\n' + + makeSetValue(dest, '$$i', makeGetValue(src, i*jump, type), type) + '\n}'; } else { // USE_TYPED_ARRAYS == 2 - // TODO: optimize, add unrolling, etc. - var ret = '' + - 'var src_, dest_, stop_, stop4_;\n' + - 'src_ = ' + src + ';\n' + - 'dest_ = ' + dest + ';\n' + - 'stop_ = src_ + ' + num + ';\n' + - 'if ((dest_%4) == (src_%4) && ' + num + ' > 8) {\n' + - ' while (src_%4 !== 0 && src_ < stop_) {\n' + - ' ' + safety('dest_', 'src_') + '; HEAP8[dest_++] = HEAP8[src_++];\n' + - ' }\n'; - if (SAFE_HEAP || !(isNumber(num) && parseInt(num) >= TYPED_ARRAY_SET_MIN)) { - ret += ' src_ >>= 2;\n' + - ' dest_ >>= 2;\n' + - ' stop4_ = stop_ >> 2;\n' + - ' while (src_ < stop4_) {\n' + - safety('(dest_<<2)+0', '(src_<<2)+0') + ';' + safety('(dest_<<2)+1', '(src_<<2)+1') + ';' + - safety('(dest_<<2)+2', '(src_<<2)+2') + ';' + safety('(dest_<<2)+3', '(src_<<2)+3') + (SAFE_HEAP ? ';\n' : '') + - ' HEAP32[dest_++] = HEAP32[src_++];\n' + - ' }\n' + - ' src_ <<= 2;\n' + - ' dest_ <<= 2;\n'; - } else { - ret += ' var src4_ = src_ >> 2, stop4_ = stop_ >> 2, num4_ = (stop4_ - src4_) << 2;\n' + - ' HEAP32.set(HEAP32.subarray(src4_, stop4_), dest_ >> 2);\n' + - ' src_ += num4_; dest_ += num4_;\n'; + // If we don't know how to handle this at compile-time, or handling it is best done in a large amount of code, call memset + if (!isNumber(num) || (align < 4 && parseInt(num) >= SEEK_OPTIMAL_ALIGN_MIN)) { + return '_memcpy(' + dest + ', ' + src + ', ' + num + ', ' + align + ')'; } - ret += '}' + - 'while (src_ < stop_) {\n' + - ' ' + safety('dest_', 'src_') + '; HEAP8[dest_++] = HEAP8[src_++];\n' + - '}'; - return ret; + num = parseInt(num); + var ret = []; + [4, 2, 1].forEach(function(possibleAlign) { + if (num == 0) return; + if (align >= possibleAlign) { + if (num <= UNROLL_LOOP_MAX*possibleAlign) { + ret.push(unroll('i' + (possibleAlign*8), Math.floor(num/possibleAlign), possibleAlign)); + } else { + ret.push('for (var $$i = 0, $$src = ' + src + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') + ', ' + + '$$dest = ' + dest + (possibleAlign > 1 ? '>>' + log2(possibleAlign) : '') + '; ' + + '$$i < ' + Math.floor(num/possibleAlign) + '; $$i++) {\n' + + ' HEAP' + (possibleAlign*8) + '[$$dest+$$i] = HEAP' + (possibleAlign*8) + '[$$src+$$i]\n}'); + } + num %= possibleAlign; + } + }); + return ret.join('; '); } - return null; } var PLUS_MUL = set('+', '*'); diff --git a/src/utility.js b/src/utility.js index 40b16a97..e49c8509 100644 --- a/src/utility.js +++ b/src/utility.js @@ -302,3 +302,7 @@ function sleep(secs) { while (Date.now() - start < secs*1000) {}; } +function log2(x) { + return Math.log(x)/Math.LN2; +} + |