initial support for unaligned reads/writes in t2

author: Alon Zakai <alonzakai@gmail.com> 2011-11-16 12:49:07 -0800
committer: Alon Zakai <alonzakai@gmail.com> 2011-11-16 12:49:07 -0800
commit: f240c68de7fc6399af7b7ea08a42d9ee7c8d2748 (patch)
tree: 696507c4fe88bd65f0ef3f0cdd58c00e43a5acb9
parent: 1e3a60f206041ec004ef9755a673103c62e3c213 (diff)
7 files changed, 152 insertions, 39 deletions
diff --git a/src/intertyper.js b/src/intertyper.js
index 30fd7250..c1b9b78e 100644
--- a/src/intertyper.js
+++ b/src/intertyper.js
@@ -546,7 +546,14 @@ function intertyper(data, parseFunctions, baseLineNum) {
       item.valueType = item.type = removePointing(item.pointerType);
       Types.needAnalysis[item.type] = 0;
       var last = getTokenIndexByText(item.tokens, ';');
-      item.pointer = parseLLVMSegment(item.tokens.slice(1, last)); // TODO: Use this everywhere else too
+      var segments = splitTokenList(item.tokens.slice(1, last));
+      item.pointer = parseLLVMSegment(segments[0]);
+      if (segments.length > 1) {
+        assert(segments[1][0].text == 'align');
+        item.align = parseInt(segments[1][1].text) || QUANTUM_SIZE; // 0 means preferred arch align
+      } else {
+        item.align = QUANTUM_SIZE;
+      }
       item.ident = item.pointer.ident || null;
       this.forwardItem(item, 'Reintegrator');
     }
@@ -792,6 +799,12 @@ function intertyper(data, parseFunctions, baseLineNum) {
       ret.ident = toNiceIdent(ret.pointer.ident);
       ret.pointerType = ret.pointer.type;
       Types.needAnalysis[ret.pointerType] = 0;
+      if (segments.length > 2) {
+        assert(segments[2][0].text == 'align');
+        ret.align = parseInt(segments[2][1].text) || QUANTUM_SIZE; // 0 means preferred arch align
+      } else {
+        ret.align = QUANTUM_SIZE;
+      }
       return [ret];
     }
   });
diff --git a/src/jsifier.js b/src/jsifier.js
index adf465ed..5f75b374 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -645,9 +645,9 @@ function JSify(data, functionsOnly, givenFunctions, givenGlobalVariables) {
         break;
       case VAR_EMULATED:
         if (item.pointer.intertype == 'value') {
-          return makeSetValue(item.ident, 0, value, item.valueType) + ';';
+          return makeSetValue(item.ident, 0, value, item.valueType, 0, 0, item.align) + ';';
         } else {
-          return makeSetValue(0, finalizeLLVMParameter(item.pointer), value, item.valueType) + ';';
+          return makeSetValue(0, finalizeLLVMParameter(item.pointer), value, item.valueType, 0, 0, item.align) + ';';
         }
         break;
       default:
@@ -784,7 +784,7 @@ function JSify(data, functionsOnly, givenFunctions, givenGlobalVariables) {
       case VAR_NATIVIZED: {
         return value; // We have the actual value here
       }
-      case VAR_EMULATED: return makeGetValue(value, 0, item.type, 0, item.unsigned);
+      case VAR_EMULATED: return makeGetValue(value, 0, item.type, 0, item.unsigned, 0, item.align);
       default: throw "unknown [load] impl: " + impl;
     }
   });
diff --git a/src/parseTools.js b/src/parseTools.js
index dc6f597b..423ed0cb 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -531,8 +531,11 @@ function makeI64(low, high) {
     return '[' + low + ',' + (high || '0') + ']';
     // FIXME with this?       return '[unSign(' + low + ',32),' + (high ? ('unSign(' + high + ',32)') : '0') + ']';
   } else {
-    assert(!high);
-    return low;
+    var ret = low;
+    if (high) {
+      ret = '(' + low + '+(4294967296*' + high + '))';
+    }
+    return ret;
   }
 }
 
@@ -540,13 +543,20 @@ function makeI64(low, high) {
 // Will suffer from rounding. margeI64 does the opposite.
 function splitI64(value) {
   assert(I64_MODE == 1);
-  return '(tempInt=' + value + ',' + makeI64('tempInt|0', 'Math.floor(tempInt/4294967296)') + ')';
+  return '(tempInt=' + value + ',' + makeI64('tempInt>>>0', 'Math.floor(tempInt/4294967296)') + ')';
 }
 function mergeI64(value) {
   assert(I64_MODE == 1);
   return '(tempI64=' + value + ',tempI64[0]+tempI64[1]*4294967296)';
 }
 
+// Takes an i64 value and changes it into the [low, high] form used in i64 mode 1. In that
+// mode, this is a no-op
+function ensureI64_1(value) {
+  if (I64_MODE == 1) return value;
+  return '(tempInt=' + value + ',[tempInt>>>0, Math.floor(tempInt/4294967296)])';
+}
+
 function makeCopyI64(value) {
   assert(I64_MODE == 1);
 
@@ -687,7 +697,7 @@ function getLabelIds(labels) {
   return labels.map(function(label) { return label.ident });
 }
 
-//! Returns the size of a type, as C/C++ would have it (in 32-bit, for now).
+//! Returns the size of a type, as C/C++ would have it (in 32-bit, for now), in bytes.
 //! @param type The type, by name.
 function getNativeTypeSize(type) {
   if (QUANTUM_SIZE == 1) return 1;
@@ -851,7 +861,7 @@ function getHeapOffset(offset, type) {
 }
 
 // See makeSetValue
-function makeGetValue(ptr, pos, type, noNeedFirst, unsigned, ignore) {
+function makeGetValue(ptr, pos, type, noNeedFirst, unsigned, ignore, align) {
   if (isStructType(type)) {
     var typeData = Types.types[type];
     var ret = [];
@@ -861,6 +871,29 @@ function makeGetValue(ptr, pos, type, noNeedFirst, unsigned, ignore) {
     return '{ ' + ret.join(', ') + ' }';
   }
 
+  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+    // Alignment is important here. May need to split this up
+    var bytes = getNativeTypeSize(type);
+    if (bytes > align) {
+      var ret = '/* unaligned */(';
+      if (bytes <= 4) {
+        for (var i = 0; i < bytes; i++) {
+          ret += 'tempInt' + (i == 0 ? '=' : (i < bytes-1 ? '+=((' : '+(('));
+          ret += makeSignOp(makeGetValue(ptr, getFastValue(pos, '+', i), 'i8', noNeedFirst, unsigned, ignore), 'i8', 'un', true);
+          if (i > 0) ret += ')<<' + (8*i) + ')';
+          if (i < bytes-1) ret += ',';
+        }
+      } else {
+        assert(bytes == 8);
+        ret += 'tempBigInt=' + makeGetValue(ptr, pos, 'i32', noNeedFirst, true, ignore, align) + ',';
+        ret += 'tempBigInt2=' + makeGetValue(ptr, getFastValue(pos, '+', getNativeTypeSize('i32')), 'i32', noNeedFirst, true, ignore, align) + ',';
+        ret += makeI64('tempBigInt', 'tempBigInt2');
+      }
+      ret += ')';
+      return ret;
+    }
+  }
+
   if (type == 'i64' && I64_MODE == 1) {
     return '[' + makeGetValue(ptr, pos, 'i32', noNeedFirst, unsigned, ignore) + ','
                + makeGetValue(ptr, getFastValue(pos, '+', getNativeTypeSize('i32')), 'i32', noNeedFirst, unsigned, ignore) + ']';
@@ -899,7 +932,7 @@ function indexizeFunctions(value, type) {
 //!             'null' means, in the context of SAFE_HEAP, that we should accept all types;
 //!             which means we should write to all slabs, ignore type differences if any on reads, etc.
 //! @param noNeedFirst Whether to ignore the offset in the pointer itself.
-function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore) {
+function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align) {
   if (isStructType(type)) {
     var typeData = Types.types[type];
     var ret = [];
@@ -914,6 +947,27 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore) {
     return ret.join('; ');
   }
 
+  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+    // Alignment is important here. May need to split this up
+    var bytes = getNativeTypeSize(type);
+    if (bytes > align) {
+      var ret = '/* unaligned */';
+      if (bytes <= 4) {
+        ret += 'tempInt=' + value + ';';
+        for (var i = 0; i < bytes; i++) {
+          ret += makeSetValue(ptr, getFastValue(pos, '+', i), 'tempInt&0xff', 'i8', noNeedFirst, ignore) + ';';
+          if (i < bytes-1) ret += 'tempInt>>=8;';
+        }
+      } else {
+        assert(bytes == 8);
+        ret += 'tempPair=' + ensureI64_1(value) + ';';
+        ret += makeSetValue(ptr, pos, 'tempPair[0]', 'i32', noNeedFirst, ignore, align) + ';';
+        ret += makeSetValue(ptr, getFastValue(pos, '+', getNativeTypeSize('i32')), 'tempPair[1]', 'i32', noNeedFirst, ignore, align) + ';';
+      }
+      return ret;
+    }
+  }
+
   if (type == 'i64' && I64_MODE == 1) {
     return '(' + makeSetValue(ptr, pos, value + '[0]', 'i32', noNeedFirst, ignore) + ','
                + makeSetValue(ptr, getFastValue(pos, '+', getNativeTypeSize('i32')), value + '[1]', 'i32', noNeedFirst, ignore) + ')';
@@ -1338,7 +1392,7 @@ function finalizeLLVMParameter(param, noIndexizeFunctions) {
   return ret;
 }
 
-function makeSignOp(value, type, op) {
+function makeSignOp(value, type, op, force) {
   if (isPointerType(type)) type = 'i32'; // Pointers are treated as 32-bit ints
   if (!value) return value;
   var bits, full;
@@ -1352,7 +1406,7 @@ function makeSignOp(value, type, op) {
       return eval(full).toString();
     }
   }
-  if (!correctSigns() && !CHECK_SIGNS) return value;
+  if (!correctSigns() && !CHECK_SIGNS && !force) return value;
   if (type in Runtime.INT_TYPES) {
     // shortcuts
     if (!CHECK_SIGNS) {
diff --git a/src/preamble.js b/src/preamble.js
index 7aa07cc1..4bdb4333 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -66,7 +66,7 @@ var warned64 = false;
 function warn64() {
   if (!warned64) {
     __ATEXIT__.push({ func: function() {
-      print('Warning: using a 64-bit type with USE_TYPED_ARRAYS == 2. This is emulated as a 32-bit value, and will likely fail horribly.');
+      print('Warning: using a 64-bit type with USE_TYPED_ARRAYS == 2. Depending on I64_MODE this may be problematic.');
     } });
     warned64 = true;
   }
@@ -381,7 +381,7 @@ var __ATEXIT__ = [];
 var ABORT = false;
 
 var undef = 0;
-var tempValue, tempInt, tempBigInt;
+var tempValue, tempInt, tempBigInt, tempInt2, tempBigInt2, tempPair;
 #if I64_MODE == 1
 var tempI64, tempI64b;
 #endif
diff --git a/src/settings.js b/src/settings.js
index 092822e9..7e076288 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -56,6 +56,13 @@ I64_MODE = 0; // How to implement 64-bit integers:
               //                    use doubles for addition etc., like mode 0. This mode is slower than
               //                    mode 0, so its only benefit is proper support for 64 bit bitops.
               // TODO: Full bignum support
+EMULATE_UNALIGNED_ACCESSES = 1; // If set, the compiler will 'emulate' loads and stores that are not known to
+                                // be sufficiently aligned, by working on individual bytes. This can be
+                                // important in USE_TYPED_ARRAYS == 2, where unaligned accesses do not work,
+                                // specifically in the case where unsafe LLVM optimizations have generated possibly
+                                // unaligned code. (Without unsafe LLVM optimizations, there should be no
+                                // need for this option.)
+                                // Currently this only works for integers, not doubles and floats.
 
 SKIP_STACK_IN_SMALL = 1; // When enabled, does not push/pop the stack at all in
                          // functions that have no basic stack usage. But, they
diff --git a/tests/runner.py b/tests/runner.py
index 6ab84708..f3a938db 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -60,12 +60,10 @@ class RunnerCore(unittest.TestCase):
     return self.working_dir
 
   # Similar to LLVM::createStandardModulePasses()
-  def pick_llvm_opts(self, optimization_level):
+  def pick_llvm_opts(self, optimization_level, safe=True):
     global LLVM_OPT_OPTS
 
-    # TODO: TA2 should be able to withstand unsafe opts, and we do use I64_MODE = 1 there
-    LLVM_OPT_OPTS = pick_llvm_opts(optimization_level, safe=True)
-    #LLVM_OPT_OPTS = pick_llvm_opts(optimization_level, safe=Settings.USE_TYPED_ARRAYS != 2)
+    LLVM_OPT_OPTS = pick_llvm_opts(optimization_level, safe)
 
   def prep_ll_run(self, filename, ll_file, force_recompile=False, build_ll_hook=None):
     if ll_file.endswith(('.bc', '.o')):
@@ -463,6 +461,41 @@ if 'benchmark' not in str(sys.argv):
                          '*18446744073709552000*\n*576460752303423500*\n' +
                          'm1: 127\n*123*\n*127*\n')
 
+    def test_unaligned(self):
+        if Settings.QUANTUM_SIZE == 1: return self.skip('No meaning to unaligned addresses in q1')
+        if Settings.USE_TYPED_ARRAYS != 2: return self.skip('No meaning to unaligned addresses without t2')
+
+        src = r'''
+          #include <stdio.h>
+
+          int main()
+          {
+            int x[10];
+            char *p = (char*)&x[0];
+            p++;
+            short *q = (short*)p;
+            *q = 300;
+            printf("*%d:%d*\n", *q, ((int)q)%2);
+            int *r = (int*)p;
+            *r = 515559;
+            printf("*%d*\n", *r);
+            long long *t = (long long*)p;
+            *t = 42949672960;
+            printf("*%Ld*\n", *t);
+            return 0;
+          }
+        '''
+
+        Settings.EMULATE_UNALIGNED_ACCESSES = 0
+
+        try:
+          self.do_run(src, '*300:1*\n*515559*\n*42949672960*\n')
+        except Exception, e:
+          assert 'must be aligned' in str(e), e # expected to fail without emulation
+
+        # XXX TODO Settings.EMULATE_UNALIGNED_ACCESSES = 1
+        #self.do_run(src, '*300:1*\n*515559*\n*42949672960*\n') # but succeeds with it
+
     def test_unsigned(self):
         Settings.CORRECT_SIGNS = 1 # We test for exactly this sort of thing here
         Settings.CHECK_SIGNS = 0
@@ -4314,7 +4347,7 @@ class %s(T):
     super(%s, self).setUp()
   
     Building.COMPILER = %r
-    llvm_opts = %d
+    llvm_opts = %d # 1 is yes, 2 is yes and unsafe
     embetter = %d
     quantum_size = %d
     Settings.USE_TYPED_ARRAYS = %d
@@ -4336,6 +4369,7 @@ class %s(T):
     Settings.DISABLE_EXCEPTION_CATCHING = 0
     Settings.PROFILE = 0
     Settings.TOTAL_MEMORY = Settings.FAST_MEMORY = None
+    Settings.EMULATE_UNALIGNED_ACCESSES = Settings.USE_TYPED_ARRAYS == 2 and Building.LLVM_OPTS == 2
     if Settings.USE_TYPED_ARRAYS == 2:
       Settings.I64_MODE = 1
       Settings.SAFE_HEAP = 1 # only checks for alignment problems, which is very important with unsafe opts
@@ -4344,7 +4378,7 @@ class %s(T):
       Settings.RELOOP = 0 # XXX Would be better to use this, but it isn't really what we test in these cases, and is very slow
 
     if Building.LLVM_OPTS:
-      self.pick_llvm_opts(3)
+      self.pick_llvm_opts(3, safe=Building.LLVM_OPTS != 2)
 
     Building.COMPILER_TEST_OPTS = ['-g']
 
@@ -4354,18 +4388,23 @@ TT = %s
 ''' % (fullname, fullname, fullname, compiler, llvm_opts, embetter, quantum_size, typed_arrays, fullname))
     return TT
 
-  for llvm_opts in [0,1]:
-    for name, compiler, quantum, embetter, typed_arrays in [
-      ('clang', CLANG, 1, 0, 0),
-      ('clang', CLANG, 4, 0, 0),
-      ('clang', CLANG, 1, 1, 1),
-      ('clang', CLANG, 4, 1, 1),
-      ('clang', CLANG, 4, 1, 2),
-    ]:
-      fullname = '%s_%d_%d%s%s' % (
-        name, llvm_opts, embetter, '' if quantum == 4 else '_q' + str(quantum), '' if typed_arrays in [0, 1] else '_t' + str(typed_arrays)
-      )
-      exec('%s = make_run(%r,%r,%d,%d,%d,%d)' % (fullname, fullname, compiler, llvm_opts, embetter, quantum, typed_arrays))
+  for name, compiler, quantum, embetter, typed_arrays, llvm_opts in [
+    ('clang', CLANG, 1, 0, 0, 0),
+    ('clang', CLANG, 1, 0, 0, 1),
+    ('clang', CLANG, 4, 0, 0, 0),
+    ('clang', CLANG, 4, 0, 0, 1),
+    ('clang', CLANG, 1, 1, 1, 0),
+    ('clang', CLANG, 1, 1, 1, 1),
+    ('clang', CLANG, 4, 1, 1, 0),
+    ('clang', CLANG, 4, 1, 1, 1),
+    ('clang', CLANG, 4, 1, 2, 0),
+    ('clang', CLANG, 4, 1, 2, 1),
+    #('clang', CLANG, 4, 1, 2, 2),
+  ]:
+    fullname = '%s_%d_%d%s%s' % (
+      name, llvm_opts, embetter, '' if quantum == 4 else '_q' + str(quantum), '' if typed_arrays in [0, 1] else '_t' + str(typed_arrays)
+    )
+    exec('%s = make_run(%r,%r,%d,%d,%d,%d)' % (fullname, fullname, compiler, llvm_opts, embetter, quantum, typed_arrays))
 
   del T # T is just a shape for the specific subclasses, we don't test it itself
 
diff --git a/tools/shared.py b/tools/shared.py
index 4bbfe81c..d3cc330b 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -295,12 +295,12 @@ class Building:
   # Emscripten optimizations that we run on the .ll file
   @staticmethod
   def ll_opts(filename):
-    # Remove target info. This helps LLVM opts, if we run them later
-    cleaned = filter(lambda line: not line.startswith('target datalayout = ') and not line.startswith('target triple = '),
-                     open(filename + '.o.ll', 'r').readlines())
-    os.unlink(filename + '.o.ll')
-    open(filename + '.o.ll.orig', 'w').write(''.join(cleaned))
-
+    ## Remove target info. This helps LLVM opts, if we run them later
+    #cleaned = filter(lambda line: not line.startswith('target datalayout = ') and not line.startswith('target triple = '),
+    #                 open(filename + '.o.ll', 'r').readlines())
+    #os.unlink(filename + '.o.ll')
+    #open(filename + '.o.ll.orig', 'w').write(''.join(cleaned))
+    shutil.move(filename + '.o.ll', filename + '.o.ll.orig')
     output = Popen(['python', DFE, filename + '.o.ll.orig', filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
     assert os.path.exists(filename + '.o.ll'), 'Failed to run ll optimizations'
 
@@ -340,7 +340,7 @@ class Building:
 
     # Run Emscripten
     exported_settings = {}
-    for setting in ['QUANTUM_SIZE', 'RELOOP', 'OPTIMIZE', 'ASSERTIONS', 'USE_TYPED_ARRAYS', 'SAFE_HEAP', 'CHECK_OVERFLOWS', 'CORRECT_OVERFLOWS', 'CORRECT_SIGNS', 'CHECK_SIGNS', 'CORRECT_OVERFLOWS_LINES', 'CORRECT_SIGNS_LINES', 'CORRECT_ROUNDINGS', 'CORRECT_ROUNDINGS_LINES', 'INVOKE_RUN', 'SAFE_HEAP_LINES', 'INIT_STACK', 'AUTO_OPTIMIZE', 'EXPORTED_FUNCTIONS', 'EXPORTED_GLOBALS', 'BUILD_AS_SHARED_LIB', 'INCLUDE_FULL_LIBRARY', 'RUNTIME_TYPE_INFO', 'DISABLE_EXCEPTION_CATCHING', 'TOTAL_MEMORY', 'FAST_MEMORY', 'EXCEPTION_DEBUG', 'PROFILE', 'I64_MODE']:
+    for setting in ['QUANTUM_SIZE', 'RELOOP', 'OPTIMIZE', 'ASSERTIONS', 'USE_TYPED_ARRAYS', 'SAFE_HEAP', 'CHECK_OVERFLOWS', 'CORRECT_OVERFLOWS', 'CORRECT_SIGNS', 'CHECK_SIGNS', 'CORRECT_OVERFLOWS_LINES', 'CORRECT_SIGNS_LINES', 'CORRECT_ROUNDINGS', 'CORRECT_ROUNDINGS_LINES', 'INVOKE_RUN', 'SAFE_HEAP_LINES', 'INIT_STACK', 'AUTO_OPTIMIZE', 'EXPORTED_FUNCTIONS', 'EXPORTED_GLOBALS', 'BUILD_AS_SHARED_LIB', 'INCLUDE_FULL_LIBRARY', 'RUNTIME_TYPE_INFO', 'DISABLE_EXCEPTION_CATCHING', 'TOTAL_MEMORY', 'FAST_MEMORY', 'EXCEPTION_DEBUG', 'PROFILE', 'I64_MODE', 'EMULATE_UNALIGNED_ACCESSES']:
       try:
         value = eval('Settings.' + setting)
         if value is not None:
author	Alon Zakai <alonzakai@gmail.com>	2011-11-16 12:49:07 -0800
committer	Alon Zakai <alonzakai@gmail.com>	2011-11-16 12:49:07 -0800
commit	f240c68de7fc6399af7b7ea08a42d9ee7c8d2748 (patch)
tree	696507c4fe88bd65f0ef3f0cdd58c00e43a5acb9
parent	1e3a60f206041ec004ef9755a673103c62e3c213 (diff)