clean up simd code, fix uint32x4 heap offsets, and add testing

author: Alon Zakai <alonzakai@gmail.com> 2013-10-29 17:58:09 -0700
committer: Alon Zakai <alonzakai@gmail.com> 2013-10-29 17:58:09 -0700
commit: c45cd7c72a019f54b0f6ee873641200ecb043a25 (patch)
tree: c33e9da60e6c7e64a68e2cf8e5f70a6c4d250a74
parent: 4af62232d03bd71db0f1ec907ff2a52ecef2f5d9 (diff)
3 files changed, 51 insertions, 31 deletions
diff --git a/src/jsifier.js b/src/jsifier.js
index b36e11ed..ec7ad1c2 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -948,11 +948,12 @@ function JSify(data, functionsOnly, givenFunctions) {
     }
     if (item.valueType[item.valueType.length-1] === '>') {
       // vector store TODO: move to makeSetValue?
-      var base = getVectorBaseType(item.valueType);
-      return '(' + makeSetValue(item.ident,  0, value + '.x', base, 0, 0, item.align) + ',' +
-                   makeSetValue(item.ident,  4, value + '.y', base, 0, 0, item.align) + ',' +
-                   makeSetValue(item.ident,  8, value + '.z', base, 0, 0, item.align) + ',' +
-                   makeSetValue(item.ident, 12, value + '.w', base, 0, 0, item.align) + ')';
+      var native = getVectorNativeType(item.valueType);
+      var base = getSIMDName(native);
+      return '(' + makeSetValue(item.ident,  0, value + '.x', native, 0, 0, item.align) + ',' +
+                   makeSetValue(item.ident,  4, value + '.y', native, 0, 0, item.align) + ',' +
+                   makeSetValue(item.ident,  8, value + '.z', native, 0, 0, item.align) + ',' +
+                   makeSetValue(item.ident, 12, value + '.w', native, 0, 0, item.align) + ')';
     }
     switch (impl) {
       case VAR_NATIVIZED:
@@ -1323,11 +1324,12 @@ function JSify(data, functionsOnly, givenFunctions) {
     var value = finalizeLLVMParameter(item.pointer);
     if (item.valueType[item.valueType.length-1] === '>') {
       // vector load
-      var base = getVectorBaseType(item.valueType);
-      return base + '32x4(' + makeGetValue(value,  0, base, 0, item.unsigned, 0, item.align) + ',' +
-                              makeGetValue(value,  4, base, 0, item.unsigned, 0, item.align) + ',' +
-                              makeGetValue(value,  8, base, 0, item.unsigned, 0, item.align) + ',' +
-                              makeGetValue(value, 12, base, 0, item.unsigned, 0, item.align) + ')';
+      var native = getVectorNativeType(item.valueType);
+      var base = getSIMDName(native);
+      return base + '32x4(' + makeGetValue(value,  0, native, 0, item.unsigned, 0, item.align) + ',' +
+                              makeGetValue(value,  4, native, 0, item.unsigned, 0, item.align) + ',' +
+                              makeGetValue(value,  8, native, 0, item.unsigned, 0, item.align) + ',' +
+                              makeGetValue(value, 12, native, 0, item.unsigned, 0, item.align) + ')';
     }
     var impl = item.ident ? getVarImpl(item.funcData, item.ident) : VAR_EMULATED;
     switch (impl) {
diff --git a/src/parseTools.js b/src/parseTools.js
index 223adbbf..16f4058c 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -328,28 +328,29 @@ function getVectorSize(type) {
   return parseInt(type.substring(1, type.indexOf(' ')));
 }
 
-function getVectorBaseType(type) {
+function getVectorNativeType(type) {
   Types.usesSIMD = true;
   switch (type) {
     case '<2 x float>':
     case '<4 x float>': return 'float';
     case '<2 x i32>':
-    case '<4 x i32>': return 'uint';
+    case '<4 x i32>': return 'i32';
     default: throw 'unknown vector type ' + type;
   }
 }
 
-function getVectorNativeType(type) {
-  Types.usesSIMD = true;
+function getSIMDName(type) {
   switch (type) {
-    case '<2 x float>':
-    case '<4 x float>': return 'float';
-    case '<2 x i32>':
-    case '<4 x i32>': return 'i32';
-    default: throw 'unknown vector type ' + type;
+    case 'i32': return 'uint';
+    case 'float': return 'float';
+    default: throw 'getSIMDName ' + type;
   }
 }
 
+function getVectorBaseType(type) {
+  return getSIMDName(getVectorNativeType(type));
+}
+
 function addIdent(token) {
   token.ident = token.text;
   return token;
@@ -1807,7 +1808,7 @@ function makeGetSlabs(ptr, type, allowMultiple, unsigned) {
     switch(type) {
       case 'i1': case 'i8': return [unsigned ? 'HEAPU8' : 'HEAP8']; break;
       case 'i16': return [unsigned ? 'HEAPU16' : 'HEAP16']; break;
-      case '<4 x i32>': case 'uint':
+      case '<4 x i32>':
       case 'i32': case 'i64': return [unsigned ? 'HEAPU32' : 'HEAP32']; break;
       case 'double': {
         if (TARGET_LE32) return ['HEAPF64']; // in le32, we do have the ability to assume 64-bit alignment
diff --git a/tests/test_other.py b/tests/test_other.py
index e251da5d..185e83d1 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -2022,6 +2022,12 @@ a(int [32], char [5]*)
 
 #include <emscripten/vector.h>
 
+static inline float32x4 __attribute__((always_inline))
+_mm_set_ps(const float __Z, const float __Y, const float __X, const float __W)
+{
+  return (float32x4){ __W, __X, __Y, __Z };
+}
+
 int main(int argc, char **argv) {
   float data[8];
   for (int i = 0; i < 32; i++) data[i] = (1+i+argc)*(2+i+argc*argc);
@@ -2031,11 +2037,11 @@ int main(int argc, char **argv) {
     float32x4 c, d;
     c = *a;
     d = *b;
-    printf("floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
+    printf("1floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
     c = c+d;
-    printf("floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
+    printf("2floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
     d = c*d;
-    printf("floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
+    printf("3floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
   }
   {
     uint32x4 *a = (uint32x4*)&data[0];
@@ -2043,19 +2049,30 @@ int main(int argc, char **argv) {
     uint32x4 c, d, e, f;
     c = *a;
     d = *b;
-    printf("uints! %d, %d, %d, %d   %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]);
+    printf("4uints! %d, %d, %d, %d   %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]);
     e = c+d;
     f = c-d;
-    printf("uints! %d, %d, %d, %d   %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
+    printf("5uints! %d, %d, %d, %d   %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
+  }
+  {
+    float32x4 c, d, e, f;
+    c = _mm_set_ps(9.0, 4.0, 0, -9.0);
+    d = _mm_set_ps(10.0, 14.0, -12, -2.0);
+    printf("6floats! %d, %d, %d, %d   %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3], (int)d[0], (int)d[1], (int)d[2], (int)d[3]);
   }
+
   return 0;
 }
     ''')
-    Popen([PYTHON, EMCC, 'src.cpp', '-O2']).communicate()
-    self.assertContained('''floats! 6, 12, 20, 30   42, 56, 72, 90
-floats! 48, 68, 92, 120   42, 56, 72, 90
-floats! 48, 68, 92, 120   2016, 3808, 6624, 10800
-uints! 1086324736, 1094713344, 1101004800, 1106247680   1109917696, 1113587712, 1116733440, 1119092736
-uints! -2098724864, -2086666240, -2077229056, -2069626880   -23592960, -18874368, -15728640, -12845056
+
+    for opts in [[], ['-O1'], ['-O2']]:
+      print opts
+      Popen([PYTHON, EMCC, 'src.cpp'] + opts).communicate()
+      self.assertContained('''1floats! 6, 12, 20, 30   42, 56, 72, 90
+2floats! 48, 68, 92, 120   42, 56, 72, 90
+3floats! 48, 68, 92, 120   2016, 3808, 6624, 10800
+4uints! 1086324736, 1094713344, 1101004800, 1106247680   1109917696, 1113587712, 1116733440, 1119092736
+5uints! -2098724864, -2086666240, -2077229056, -2069626880   -23592960, -18874368, -15728640, -12845056
+6floats! -9, 0, 4, 9   -2, -12, 14, 10
 ''', run_js('a.out.js'))
author	Alon Zakai <alonzakai@gmail.com>	2013-10-29 17:58:09 -0700
committer	Alon Zakai <alonzakai@gmail.com>	2013-10-29 17:58:09 -0700
commit	c45cd7c72a019f54b0f6ee873641200ecb043a25 (patch)
tree	c33e9da60e6c7e64a68e2cf8e5f70a6c4d250a74
parent	4af62232d03bd71db0f1ec907ff2a52ecef2f5d9 (diff)