diff options
| author | Alon Zakai <alonzakai@gmail.com> | 2013-01-24 14:07:29 -0800 | 
|---|---|---|
| committer | Alon Zakai <alonzakai@gmail.com> | 2013-01-24 14:07:29 -0800 | 
| commit | 823c8534b9712810bae2bd9bbe1c7f89cd062cc2 (patch) | |
| tree | 83d7370c6187963fbfaf9cc7ecba61aa324005b9 /src | |
| parent | af99c279356f4703562633214142e4cccd558a2e (diff) | |
optimize memset
Diffstat (limited to 'src')
| -rw-r--r-- | src/library.js | 43 | ||||
| -rw-r--r-- | src/parseTools.js | 10 | ||||
| -rw-r--r-- | src/preamble.js | 8 | 
3 files changed, 38 insertions, 23 deletions
| diff --git a/src/library.js b/src/library.js index acbda42b..a069b6ab 100644 --- a/src/library.js +++ b/src/library.js @@ -4261,31 +4261,36 @@ LibraryManager.library = {    memset__inline: function(ptr, value, num, align) {      return makeSetValues(ptr, 0, value, 'null', num, align);    }, -  memset: function(ptr, value, num, align) { +  memset__sig: 'viii', +  memset__asm: true, +  memset: function(ptr, value, num) {  #if USE_TYPED_ARRAYS == 2 -    // TODO: make these settings, and in memcpy, {{'s -    if (num >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) { +    ptr = ptr|0; value = value|0; num = num|0; +    var stop = 0, value4 = 0, stop4 = 0, unaligned = 0; +    stop = (ptr + num)|0; +    if (num|0 >= {{{ SEEK_OPTIMAL_ALIGN_MIN }}}) {        // This is unaligned, but quite large, so work hard to get to aligned settings -      var stop = ptr + num; -      while (ptr % 4) { // no need to check for stop, since we have large num -        HEAP8[ptr++] = value; -      } -      if (value < 0) value += 256; // make it unsigned -      var ptr4 = ptr >> 2, stop4 = stop >> 2, value4 = value | (value << 8) | (value << 16) | (value << 24); -      while (ptr4 < stop4) { -        HEAP32[ptr4++] = value4; -      } -      ptr = ptr4 << 2; -      while (ptr < stop) { -        HEAP8[ptr++] = value; +      unaligned = ptr & 3; +      value4 = value | (value << 8) | (value << 16) | (value << 24); +      stop4 = stop & ~3; +      if (unaligned) { +        unaligned = (ptr + 4 - unaligned)|0; +        while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num +          {{{ makeSetValueAsm('ptr', 0, 'value', 'i8') }}}; +          ptr = (ptr+1)|0; +        }        } -    } else { -      while (num--) { -        HEAP8[ptr++] = value; +      while ((ptr|0) < (stop4|0)) { +        {{{ makeSetValueAsm('ptr', 0, 'value4', 'i8') }}}; +        ptr = (ptr+4)|0;        }      } +    while ((ptr|0) < (stop|0)) { +      {{{ makeSetValueAsm('ptr', 0, 'value', 'i8') }}}; +      ptr = (ptr+1)|0; +    }  #else -    {{{ makeSetValues('ptr', '0', 'value', 'null', 'num', 'align') }}}; +    {{{ makeSetValues('ptr', '0', 'value', 'null', 'num') }}};  #endif    },    llvm_memset_i32: 'memset', diff --git a/src/parseTools.js b/src/parseTools.js index f3ae7a22..3e361731 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -1174,7 +1174,7 @@ function indexizeFunctions(value, type) {  //!             'null' means, in the context of SAFE_HEAP, that we should accept all types;  //!             which means we should write to all slabs, ignore type differences if any on reads, etc.  //! @param noNeedFirst Whether to ignore the offset in the pointer itself. -function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign) { +function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign, forceAsm) {    if (UNALIGNED_MEMORY && !forcedAlign) align = 1;    sep = sep || ';';    if (isStructType(type)) { @@ -1236,12 +1236,14 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe,      if (type[0] === '#') type = type.substr(1);      return 'SAFE_HEAP_STORE(' + offset + ', ' + value + ', ' + type + ', ' + ((!checkSafeHeap() || ignore)|0) + ')';    } else { -    return makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type) + ']=' + value }).join(sep); -    //return '(print("set:"+(' + value + ')+":"+(' + getHeapOffset(offset, type) + ')),' +  -    //        makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type) + ']=' + value }).join('; ') + ')'; +    return makeGetSlabs(ptr, type, true).map(function(slab) { return slab + '[' + getHeapOffset(offset, type, forceAsm) + ']=' + value }).join(sep);    }  } +function makeSetValueAsm(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign) { +  return makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, sep, forcedAlign, true); +} +  var SEEK_OPTIMAL_ALIGN_MIN = 20;  var UNROLL_LOOP_MAX = 8; diff --git a/src/preamble.js b/src/preamble.js index 52e6a7ca..a677dfaa 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -473,6 +473,14 @@ Module['ALLOC_STACK'] = ALLOC_STACK;  Module['ALLOC_STATIC'] = ALLOC_STATIC;  Module['ALLOC_NONE'] = ALLOC_NONE; +// Simple unoptimized memset - necessary during startup +var _memset = function(ptr, value, num) { +  var stop = ptr + num; +  while (ptr < stop) { +    {{{ makeSetValueAsm('ptr++', 0, 'value', 'i8') }}}; +  } +} +  // allocate(): This is for internal use. You can use it yourself as well, but the interface  //             is a little tricky (see docs right below). The reason is that it is optimized  //             for multiple syntaxes to save space in generated code. So you should | 
