diff options
author | Alon Zakai <alonzakai@gmail.com> | 2013-01-24 14:07:29 -0800 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2013-01-24 14:07:29 -0800 |
commit | 823c8534b9712810bae2bd9bbe1c7f89cd062cc2 (patch) | |
tree | 83d7370c6187963fbfaf9cc7ecba61aa324005b9 /src | |
parent | af99c279356f4703562633214142e4cccd558a2e (diff) |
optimize memset
Diffstat (limited to 'src')
-rw-r--r-- | src/library.js | 43 | ||||
-rw-r--r-- | src/parseTools.js | 10 | ||||
-rw-r--r-- | src/preamble.js | 8 |
3 files changed, 38 insertions, 23 deletions
diff --git a/src/library.js b/src/library.js index acbda42b..a069b6ab 100644 --- a/src/library.js +++ b/src/library.js @@ -4261,31 +4261,36 @@ LibraryManager.library = { memset__inline: function(ptr, value, num, align) { return makeSetValues(ptr, 0, value, 'null', num, align); }, - memset: function(ptr, value, num, align) { + memset__sig: 'viii', + memset__asm: true, + memset: function(ptr, value, num) { #if USE_TYPED_ARRAYS == 2 - // TODO: make these settings, and in memcpy, {{'s - if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) { + ptr = ptr|0; value = value|0; num = num|0; + var stop = 0, value4 = 0, stop4 = 0, unaligned = 0; + stop = (ptr + num)|0; + if (num|0 >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) { // This is unaligned, but quite large, so work hard to get to aligned settings - var stop = ptr + num; - while (ptr % 4) { // no need to check for stop, since we have large num - HEAP8[ptr++] = value; - } - if (value < 0) value += 256; // make it unsigned - var ptr4 = ptr >> 2, stop4 = stop >> 2, value4 = value | (value << 8) | (value << 16) | (value << 24); - while (ptr4 < stop4) { - HEAP32[ptr4++] = value4; - } - ptr = ptr4 << 2; - while (ptr < stop) { - HEAP8[ptr++] = value; + unaligned = ptr & 3; + value4 = value | (value << 8) | (value << 16) | (value << 24); + stop4 = stop & ~3; + if (unaligned) { + unaligned = (ptr + 4 - unaligned)|0; + while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num + {{{ makeSetValueAsm('ptr', 0, 'value', 'i8') }}}; + ptr = (ptr+1)|0; + } } - } else { - while (num--) { - HEAP8[ptr++] = value; + while ((ptr|0) < (stop4|0)) { + {{{ makeSetValueAsm('ptr', 0, 'value4', 'i8') }}}; + ptr = (ptr+4)|0; } } + while ((ptr|0) < (stop|0)) { + {{{ makeSetValueAsm('ptr', 0, 'value', 'i8') }}}; + ptr = (ptr+1)|0; + } #else - {{{ makeSetValues('ptr', '0', 'value', 'null', 'num', 'align') }}}; + {{{ makeSetValues('ptr', '0', 'value', 'null', 'num') }}}; #endif }, llvm_memset_i32: 'memset', diff --git a/src/parseTools.js b/src/parseTools.js index f3ae7a22..3e361731 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -1174,7 +1174,7 @@ function indexizeFunctions(value, type) { //! 'null' means, in the context of SAFE_HEAP, that we should accept all types; //! which means we should write to all slabs, ignore type differences if any on reads, etc. //! @param noNeedFirst Whether to ignore the offset in the pointer itself. -function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign) { +function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign, forceAsm) { if (UNALIGNED_MEMORY && !forcedAlign) align = 1; sep = sep || ';'; if (isStructType(type)) { @@ -1236,12 +1236,14 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, if (type[0] === '#') type = type.substr(1); return 'SAFE_HEAP_STORE(' + offset + ', ' + value + ', ' + type + ', ' + ((!checkSafeHeap() || ignore)|0) + ')'; } else { - return makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type) + ']=' + value }).join(sep); - //return '(print("set:"+(' + value + ')+":"+(' + getHeapOffset(offset, type) + ')),' + - // makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type) + ']=' + value }).join('; ') + ')'; + return makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type, forceAsm) + ']=' + value }).join(sep); } } +function makeSetValueAsm(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign) { + return makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign, true); +} + var SEEK_OPTIMAL_ALIGN_MIN = 20; var UNROLL_LOOP_MAX = 8; diff --git a/src/preamble.js b/src/preamble.js index 52e6a7ca..a677dfaa 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -473,6 +473,14 @@ Module['ALLOC_STACK'] = ALLOC_STACK; Module['ALLOC_STATIC'] = ALLOC_STATIC; Module['ALLOC_NONE'] = ALLOC_NONE; +// Simple unoptimized memset - necessary during startup +var _memset = function(ptr, value, num) { + var stop = ptr + num; + while (ptr < stop) { + {{{ makeSetValueAsm('ptr++', 0, 'value', 'i8') }}}; + } +} + // allocate(): This is for internal use. You can use it yourself as well, but the interface // is a little tricky (see docs right below). The reason is that it is optimized // for multiple syntaxes to save space in generated code. So you should |