summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--src/jsifier.js3
-rw-r--r--src/library.js68
-rw-r--r--src/parseTools.js29
-rw-r--r--src/simd.js1031
-rw-r--r--system/include/emscripten/emmintrin.h87
-rw-r--r--system/include/emscripten/vector.h20
-rw-r--r--system/include/emscripten/xmmintrin.h131
-rw-r--r--tests/test_core.py496
9 files changed, 1406 insertions, 460 deletions
diff --git a/AUTHORS b/AUTHORS
index 2b2b21ed..b03bfe7b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -110,3 +110,4 @@ a license to everyone to use it as detailed in LICENSE.)
* John Vilk <jvilk@cs.umass.edu>
* Daniel Baulig <dbaulig@fb.com> (copyright owned by Facebook, Inc.)
* Lu Wang <coolwanglu@gmail.com>
+* Heidi Pan <heidi.pan@intel.com> (copyright owned by Intel)
diff --git a/src/jsifier.js b/src/jsifier.js
index cb753e57..fb6c5ba8 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -1373,8 +1373,9 @@ function JSify(data, functionsOnly, givenFunctions) {
function insertelementHandler(item) {
var base = getVectorBaseType(item.type);
var ident = ensureVector(item.ident, base);
+ var laneOp = ((base == 'float') ? 'SIMD.float32x4.with' : 'SIMD.int32x4.with');
//return ident + '.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + finalizeLLVMParameter(item.value) + ')';
- return 'SIMD.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')';
+ return laneOp + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')';
}
function extractelementHandler(item) {
var base = getVectorBaseType(item.type);
diff --git a/src/library.js b/src/library.js
index 128bb211..faca945c 100644
--- a/src/library.js
+++ b/src/library.js
@@ -8736,8 +8736,72 @@ LibraryManager.library = {
// emscripten vector ops
//============================
- emscripten_float32x4_signmask__inline: function(x) {
- return x + '.signMask()';
+ emscripten_float32x4_signmask__inline: function(a) {
+ return 'SIMD.float32x4.bitsToInt32x4(' + a + ').signMask';
+ },
+
+ emscripten_float32x4_min__inline: function(a, b) {
+ return 'SIMD.float32x4.min(' + a + ', ' + b + ')';
+ },
+
+ emscripten_float32x4_max__inline: function(a, b) {
+ return 'SIMD.float32x4.max(' + a + ', ' + b + ')';
+ },
+
+ emscripten_float32x4_sqrt__inline: function(a) {
+ return 'SIMD.float32x4.sqrt(' + a + ')';
+ },
+
+ emscripten_float32x4_lessThan__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThan(' + a + ', ' + b + '))';
+ },
+
+ emscripten_float32x4_lessThanOrEqual__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThanOrEqual(' + a + ', ' + b + '))';
+ },
+
+ emscripten_float32x4_equal__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.equal(' + a + ', ' + b + '))';
+ },
+
+ emscripten_float32x4_greaterThanOrEqual__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThanOrEqual(' + a + ', ' + b + '))';
+ },
+
+ emscripten_float32x4_greaterThan__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThan(' + a + ', ' + b + '))';
+ },
+
+ emscripten_float32x4_and__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))';
+ },
+
+ emscripten_float32x4_andNot__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.int32x4.not(SIMD.float32x4.bitsToInt32x4(' + a + ')), SIMD.float32x4.bitsToInt32x4(' + b + ')))';
+ },
+
+ emscripten_float32x4_or__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))';
+ },
+
+ emscripten_float32x4_xor__inline: function(a, b) {
+ return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.xor(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))';
+ },
+
+ emscripten_int32x4_bitsToFloat32x4__inline: function(a) {
+ return 'SIMD.int32x4.bitsToFloat32x4(' + a + ')';
+ },
+
+ emscripten_int32x4_toFloat32x4__inline: function(a) {
+ return 'SIMD.int32x4.toFloat32x4(' + a + ')';
+ },
+
+ emscripten_float32x4_bitsToInt32x4__inline: function(a) {
+ return 'SIMD.float32x4.bitsToInt32x4(' + a + ')';
+ },
+
+ emscripten_float32x4_toInt32x4__inline: function(a) {
+ return 'SIMD.float32x4.toInt32x4(' + a + ')';
},
//============================
diff --git a/src/parseTools.js b/src/parseTools.js
index 134cb89a..ffd7c758 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -362,7 +362,7 @@ function getVectorNativeType(type) {
function getSIMDName(type) {
switch (type) {
- case 'i32': return 'uint';
+ case 'i32': return 'int';
case 'float': return 'float';
default: throw 'getSIMDName ' + type;
}
@@ -2372,29 +2372,28 @@ function processMathop(item) {
// vector/SIMD operation
Types.usesSIMD = true;
switch (op) {
- case 'fadd': return 'SIMD.add(' + idents[0] + ',' + idents[1] + ')';
- case 'fsub': return 'SIMD.sub(' + idents[0] + ',' + idents[1] + ')';
- case 'fmul': return 'SIMD.mul(' + idents[0] + ',' + idents[1] + ')';
- case 'fdiv': return 'SIMD.div(' + idents[0] + ',' + idents[1] + ')';
- case 'add' : return 'SIMD.addu32(' + idents[0] + ',' + idents[1] + ')';
- case 'sub' : return 'SIMD.subu32(' + idents[0] + ',' + idents[1] + ')';
- case 'mul' : return 'SIMD.mulu32(' + idents[0] + ',' + idents[1] + ')';
- case 'udiv': return 'SIMD.divu32(' + idents[0] + ',' + idents[1] + ')';
+ case 'fadd': return 'SIMD.float32x4.add(' + idents[0] + ',' + idents[1] + ')';
+ case 'fsub': return 'SIMD.float32x4.sub(' + idents[0] + ',' + idents[1] + ')';
+ case 'fmul': return 'SIMD.float32x4.mul(' + idents[0] + ',' + idents[1] + ')';
+ case 'fdiv': return 'SIMD.float32x4.div(' + idents[0] + ',' + idents[1] + ')';
+ case 'add' : return 'SIMD.int32x4.add(' + idents[0] + ',' + idents[1] + ')';
+ case 'sub' : return 'SIMD.int32x4.sub(' + idents[0] + ',' + idents[1] + ')';
+ case 'mul' : return 'SIMD.int32x4.mul(' + idents[0] + ',' + idents[1] + ')';
case 'bitcast': {
var inType = item.params[0].type;
var outType = item.type;
if (inType === '<4 x float>') {
assert(outType === '<4 x i32>');
- return 'SIMD.float32x4BitsToUint32x4(' + idents[0] + ')';
+ return 'SIMD.float32x4.bitsToInt32x4(' + idents[0] + ')';
} else {
assert(inType === '<4 x i32>');
assert(outType === '<4 x float>');
- return 'SIMD.uint32x4BitsToFloat32x4(' + idents[0] + ')';
+ return 'SIMD.int32x4.bitsToFloat32x4(' + idents[0] + ')';
}
}
- case 'and': return 'SIMD.and(' + idents[0] + ',' + idents[1] + ')';
- case 'or': return 'SIMD.or(' + idents[0] + ',' + idents[1] + ')';
- case 'xor': return 'SIMD.xor(' + idents[0] + ',' + idents[1] + ')';
+ case 'and': return 'SIMD.int32x4.and(' + idents[0] + ',' + idents[1] + ')';
+ case 'or': return 'SIMD.int32x4.or(' + idents[0] + ',' + idents[1] + ')';
+ case 'xor': return 'SIMD.int32x4.xor(' + idents[0] + ',' + idents[1] + ')';
default: throw 'vector op todo: ' + dump(item);
}
}
@@ -2698,7 +2697,7 @@ var simdLane = ['x', 'y', 'z', 'w'];
function ensureVector(ident, base) {
Types.usesSIMD = true;
- return ident == 0 ? base + '32x4.zero()' : ident;
+ return ident == 0 ? base + '32x4.splat(0)' : ident;
}
function ensureValidFFIType(type) {
diff --git a/src/simd.js b/src/simd.js
index bbb12d0a..c7f5ff48 100644
--- a/src/simd.js
+++ b/src/simd.js
@@ -20,8 +20,10 @@
https://github.com/johnmccutchan/ecmascript_simd/blob/master/src/ecmascript_simd.js
*/
+"use strict";
+
/**
- * Construct a new instance of a float32x4 number.
+ * Construct a new instance of float32x4 number.
* @param {double} value used for x lane.
* @param {double} value used for y lane.
* @param {double} value used for z lane.
@@ -40,7 +42,7 @@ function float32x4(x, y, z, w) {
}
/**
- * Construct a new instance of a float32x4 number with 0.0 in all lanes.
+ * Construct a new instance of float32x4 number with 0.0 in all lanes.
* @constructor
*/
float32x4.zero = function() {
@@ -48,7 +50,7 @@ float32x4.zero = function() {
}
/**
- * Construct a new instance of a float32x4 number with the same value
+ * Construct a new instance of float32x4 number with the same value
* in all lanes.
* @param {double} value used for all lanes.
* @constructor
@@ -87,18 +89,18 @@ Object.defineProperty(float32x4.prototype, 'signMask', {
});
/**
- * Construct a new instance of a uint32x4 number.
+ * Construct a new instance of int32x4 number.
* @param {integer} 32-bit unsigned value used for x lane.
* @param {integer} 32-bit unsigned value used for y lane.
* @param {integer} 32-bit unsigned value used for z lane.
* @param {integer} 32-bit unsigned value used for w lane.
* @constructor
*/
-function uint32x4(x, y, z, w) {
- if (!(this instanceof uint32x4)) {
- return new uint32x4(x, y, z, w);
+function int32x4(x, y, z, w) {
+ if (!(this instanceof int32x4)) {
+ return new int32x4(x, y, z, w);
}
- this.storage_ = new Uint32Array(4);
+ this.storage_ = new Int32Array(4);
this.storage_[0] = x;
this.storage_[1] = y;
this.storage_[2] = z;
@@ -106,7 +108,7 @@ function uint32x4(x, y, z, w) {
}
/**
- * Construct a new instance of a uint32x4 number with 0xFFFFFFFF or 0x0 in each
+ * Construct a new instance of int32x4 number with 0xFFFFFFFF or 0x0 in each
* lane, depending on the truth value in x, y, z, and w.
* @param {boolean} flag used for x lane.
* @param {boolean} flag used for y lane.
@@ -114,59 +116,59 @@ function uint32x4(x, y, z, w) {
* @param {boolean} flag used for w lane.
* @constructor
*/
-uint32x4.bool = function(x, y, z, w) {
- return uint32x4(x ? 0xFFFFFFFF : 0x0,
- y ? 0xFFFFFFFF : 0x0,
- z ? 0xFFFFFFFF : 0x0,
- w ? 0xFFFFFFFF : 0x0);
+int32x4.bool = function(x, y, z, w) {
+ return int32x4(x ? -1 : 0x0,
+ y ? -1 : 0x0,
+ z ? -1 : 0x0,
+ w ? -1 : 0x0);
}
/**
- * Construct a new instance of a uint32x4 number with the same value
+ * Construct a new instance of int32x4 number with the same value
* in all lanes.
* @param {integer} value used for all lanes.
* @constructor
*/
-uint32x4.splat = function(s) {
- return uint32x4(s, s, s, s);
+int32x4.splat = function(s) {
+ return int32x4(s, s, s, s);
}
-Object.defineProperty(uint32x4.prototype, 'x', {
+Object.defineProperty(int32x4.prototype, 'x', {
get: function() { return this.storage_[0]; }
});
-Object.defineProperty(uint32x4.prototype, 'y', {
+Object.defineProperty(int32x4.prototype, 'y', {
get: function() { return this.storage_[1]; }
});
-Object.defineProperty(uint32x4.prototype, 'z', {
+Object.defineProperty(int32x4.prototype, 'z', {
get: function() { return this.storage_[2]; }
});
-Object.defineProperty(uint32x4.prototype, 'w',
+Object.defineProperty(int32x4.prototype, 'w',
{ get: function() { return this.storage_[3]; }
});
-Object.defineProperty(uint32x4.prototype, 'flagX', {
+Object.defineProperty(int32x4.prototype, 'flagX', {
get: function() { return this.storage_[0] != 0x0; }
});
-Object.defineProperty(uint32x4.prototype, 'flagY', {
+Object.defineProperty(int32x4.prototype, 'flagY', {
get: function() { return this.storage_[1] != 0x0; }
});
-Object.defineProperty(uint32x4.prototype, 'flagZ', {
+Object.defineProperty(int32x4.prototype, 'flagZ', {
get: function() { return this.storage_[2] != 0x0; }
});
-Object.defineProperty(uint32x4.prototype, 'flagW',
+Object.defineProperty(int32x4.prototype, 'flagW',
{ get: function() { return this.storage_[3] != 0x0; }
});
/**
* Extract the sign bit from each lane return them in the first 4 bits.
*/
-Object.defineProperty(uint32x4.prototype, 'signMask', {
+Object.defineProperty(int32x4.prototype, 'signMask', {
get: function() {
var mx = (this.storage_[0] & 0x80000000) >>> 31;
var my = (this.storage_[1] & 0x80000000) >>> 31;
@@ -287,414 +289,580 @@ Float32x4Array.prototype.setAt = function(i, v) {
this.storage_[i*4+3] = v.w;
}
+
+function Int32x4Array(a, b, c) {
+
+ function isNumber(o) {
+ return typeof o == "number" || (typeof o == "object" && o.constructor === Number);
+ }
+
+ function isTypedArray(o) {
+ return (o instanceof Int8Array) ||
+ (o instanceof Uint8Array) ||
+ (o instanceof Uint8ClampedArray) ||
+ (o instanceof Int16Array) ||
+ (o instanceof Uint16Array) ||
+ (o instanceof Int32Array) ||
+ (o instanceof Uint32Array) ||
+ (o instanceof Float32Array) ||
+ (o instanceof Float64Array) ||
+ (o instanceof Int32x4Array) ||
+ (o instanceof Float32x4Array);
+ }
+
+ function isArrayBuffer(o) {
+ return (o instanceof ArrayBuffer);
+ }
+
+ if (isNumber(a)) {
+ this.storage_ = new Int32Array(a*4);
+ this.length_ = a;
+ this.byteOffset_ = 0;
+ return;
+ } else if (isTypedArray(a)) {
+ if (!(a instanceof Int32x4Array)) {
+ throw "Copying typed array of non-Int32x4Array is unimplemented.";
+ }
+ this.storage_ = new Int32Array(a.length * 4);
+ this.length_ = a.length;
+ this.byteOffset_ = 0;
+ // Copy floats.
+ for (var i = 0; i < a.length*4; i++) {
+ this.storage_[i] = a.storage_[i];
+ }
+ } else if (isArrayBuffer(a)) {
+ if ((b != undefined) && (b % Int32x4Array.BYTES_PER_ELEMENT) != 0) {
+ throw "byteOffset must be a multiple of 16.";
+ }
+ if (c != undefined) {
+ c *= 4;
+ this.storage_ = new Int32Array(a, b, c);
+ }
+ else {
+ // Note: new Int32Array(a, b) is NOT equivalent to new Float32Array(a, b, undefined)
+ this.storage_ = new Int32Array(a, b);
+ }
+ this.length_ = this.storage_.length / 4;
+ this.byteOffset_ = b != undefined ? b : 0;
+ } else {
+ throw "Unknown type of first argument.";
+ }
+}
+
+Object.defineProperty(Int32x4Array.prototype, 'length',
+ { get: function() { return this.length_; }
+});
+
+Object.defineProperty(Int32x4Array.prototype, 'byteLength',
+ { get: function() { return this.length_ * Int32x4Array.BYTES_PER_ELEMENT; }
+});
+
+Object.defineProperty(Int32x4Array, 'BYTES_PER_ELEMENT',
+ { get: function() { return 16; }
+});
+
+Object.defineProperty(Int32x4Array.prototype, 'BYTES_PER_ELEMENT',
+ { get: function() { return 16; }
+});
+
+Object.defineProperty(Int32x4Array.prototype, 'byteOffset',
+ { get: function() { return this.byteOffset_; }
+});
+
+Object.defineProperty(Int32x4Array.prototype, 'buffer',
+ { get: function() { return this.storage_.buffer; }
+});
+
+Int32x4Array.prototype.getAt = function(i) {
+ if (i < 0) {
+ throw "Index must be >= 0.";
+ }
+ if (i >= this.length) {
+ throw "Index out of bounds.";
+ }
+ var x = this.storage_[i*4+0];
+ var y = this.storage_[i*4+1];
+ var z = this.storage_[i*4+2];
+ var w = this.storage_[i*4+3];
+ return float32x4(x, y, z, w);
+}
+
+Int32x4Array.prototype.setAt = function(i, v) {
+ if (i < 0) {
+ throw "Index must be >= 0.";
+ }
+ if (i >= this.length) {
+ throw "Index out of bounds.";
+ }
+ if (!(v instanceof int32x4)) {
+ throw "Value is not a int32x4.";
+ }
+ this.storage_[i*4+0] = v.x;
+ this.storage_[i*4+1] = v.y;
+ this.storage_[i*4+2] = v.z;
+ this.storage_[i*4+3] = v.w;
+}
+
var SIMD = (function () {
return {
- /**
- * @return {float32x4} New instance of float32x4 with absolute values of
- * t.
- */
- abs: function(t) {
- return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z),
- Math.abs(t.w));
- },
- /**
- * @return {float32x4} New instance of float32x4 with negated values of
- * t.
- */
- neg: function(t) {
- return new float32x4(-t.x, -t.y, -t.z, -t.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with a + b.
- */
- add: function(a, b) {
- return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with a - b.
- */
- sub: function(a, b) {
- return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with a * b.
- */
- mul: function(a, b) {
- return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with a / b.
- */
- div: function(a, b) {
- return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with t's values clamped
- * between lowerLimit and upperLimit.
- */
- clamp: function(t, lowerLimit, upperLimit) {
- var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x;
- var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y;
- var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z;
- var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w;
- cx = cx > upperLimit.x ? upperLimit.x : cx;
- cy = cy > upperLimit.y ? upperLimit.y : cy;
- cz = cz > upperLimit.z ? upperLimit.z : cz;
- cw = cw > upperLimit.w ? upperLimit.w : cw;
- return new float32x4(cx, cy, cz, cw);
- },
- /**
- * @return {float32x4} New instance of float32x4 with the minimum value of
- * t and other.
- */
- min: function(t, other) {
- var cx = t.x > other.x ? other.x : t.x;
- var cy = t.y > other.y ? other.y : t.y;
- var cz = t.z > other.z ? other.z : t.z;
- var cw = t.w > other.w ? other.w : t.w;
- return new float32x4(cx, cy, cz, cw);
- },
- /**
- * @return {float32x4} New instance of float32x4 with the maximum value of
- * t and other.
- */
- max: function(t, other) {
- var cx = t.x < other.x ? other.x : t.x;
- var cy = t.y < other.y ? other.y : t.y;
- var cz = t.z < other.z ? other.z : t.z;
- var cw = t.w < other.w ? other.w : t.w;
- return new float32x4(cx, cy, cz, cw);
- },
- /**
- * @return {float32x4} New instance of float32x4 with reciprocal value of
- * t.
- */
- reciprocal: function(t) {
- return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with square root of the
- * reciprocal value of t.
- */
- reciprocalSqrt: function(t) {
- return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y),
- Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w));
- },
- /**
- * @return {float32x4} New instance of float32x4 with values of t
- * scaled by s.
- */
- scale: function(t, s) {
- return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w);
- },
- /**
- * @return {float32x4} New instance of float32x4 with square root of
- * values of t.
- */
- sqrt: function(t) {
- return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y),
- Math.sqrt(t.z), Math.sqrt(t.w));
- },
- /**
- * @param {float32x4} t An instance of float32x4 to be shuffled.
- * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX.
- * @return {float32x4} New instance of float32x4 with lanes shuffled.
- */
- shuffle: function(t, mask) {
- var _x = (mask) & 0x3;
- var _y = (mask >> 2) & 0x3;
- var _z = (mask >> 4) & 0x3;
- var _w = (mask >> 6) & 0x3;
- return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z],
- t.storage_[_w]);
+ float32x4: {
+ /**
+ * @return {float32x4} New instance of float32x4 with absolute values of
+ * t.
+ */
+ abs: function(t) {
+ return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z),
+ Math.abs(t.w));
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with negated values of
+ * t.
+ */
+ neg: function(t) {
+ return new float32x4(-t.x, -t.y, -t.z, -t.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with a + b.
+ */
+ add: function(a, b) {
+ return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with a - b.
+ */
+ sub: function(a, b) {
+ return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with a * b.
+ */
+ mul: function(a, b) {
+ return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with a / b.
+ */
+ div: function(a, b) {
+ return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with t's values clamped
+ * between lowerLimit and upperLimit.
+ */
+ clamp: function(t, lowerLimit, upperLimit) {
+ var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x;
+ var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y;
+ var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z;
+ var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w;
+ cx = cx > upperLimit.x ? upperLimit.x : cx;
+ cy = cy > upperLimit.y ? upperLimit.y : cy;
+ cz = cz > upperLimit.z ? upperLimit.z : cz;
+ cw = cw > upperLimit.w ? upperLimit.w : cw;
+ return new float32x4(cx, cy, cz, cw);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with the minimum value of
+ * t and other.
+ */
+ min: function(t, other) {
+ var cx = t.x > other.x ? other.x : t.x;
+ var cy = t.y > other.y ? other.y : t.y;
+ var cz = t.z > other.z ? other.z : t.z;
+ var cw = t.w > other.w ? other.w : t.w;
+ return new float32x4(cx, cy, cz, cw);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with the maximum value of
+ * t and other.
+ */
+ max: function(t, other) {
+ var cx = t.x < other.x ? other.x : t.x;
+ var cy = t.y < other.y ? other.y : t.y;
+ var cz = t.z < other.z ? other.z : t.z;
+ var cw = t.w < other.w ? other.w : t.w;
+ return new float32x4(cx, cy, cz, cw);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with reciprocal value of
+ * t.
+ */
+ reciprocal: function(t) {
+ return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with square root of the
+ * reciprocal value of t.
+ */
+ reciprocalSqrt: function(t) {
+ return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y),
+ Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w));
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with values of t
+ * scaled by s.
+ */
+ scale: function(t, s) {
+ return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w);
+ },
+ /**
+ * @return {float32x4} New instance of float32x4 with square root of
+ * values of t.
+ */
+ sqrt: function(t) {
+ return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y),
+ Math.sqrt(t.z), Math.sqrt(t.w));
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4 to be shuffled.
+ * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX.
+ * @return {float32x4} New instance of float32x4 with lanes shuffled.
+ */
+ shuffle: function(t, mask) {
+ var _x = (mask) & 0x3;
+ var _y = (mask >> 2) & 0x3;
+ var _z = (mask >> 4) & 0x3;
+ var _w = (mask >> 6) & 0x3;
+ return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z],
+ t.storage_[_w]);
+ },
+ /**
+ * @param {float32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result
+ * @param {float32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result
+ * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX.
+ * @return {float32x4} New instance of float32x4 with lanes shuffled.
+ */
+ shuffleMix: function(t1, t2, mask) {
+ var _x = (mask) & 0x3;
+ var _y = (mask >> 2) & 0x3;
+ var _z = (mask >> 4) & 0x3;
+ var _w = (mask >> 6) & 0x3;
+ return new float32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z],
+ t2.storage_[_w]);
+ },
+ /**
+ * @param {double} value used for x lane.
+ * @return {float32x4} New instance of float32x4 with the values in t and
+ * x replaced with {x}.
+ */
+ withX: function(t, x) {
+ return new float32x4(x, t.y, t.z, t.w);
+ },
+ /**
+ * @param {double} value used for y lane.
+ * @return {float32x4} New instance of float32x4 with the values in t and
+ * y replaced with {y}.
+ */
+ withY: function(t, y) {
+ return new float32x4(t.x, y, t.z, t.w);
+ },
+ /**
+ * @param {double} value used for z lane.
+ * @return {float32x4} New instance of float32x4 with the values in t and
+ * z replaced with {z}.
+ */
+ withZ: function(t, z) {
+ return new float32x4(t.x, t.y, z, t.w);
+ },
+ /**
+ * @param {double} value used for w lane.
+ * @return {float32x4} New instance of float32x4 with the values in t and
+ * w replaced with {w}.
+ */
+ withW: function(t, w) {
+ return new float32x4(t.x, t.y, t.z, w);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t < other.
+ */
+ lessThan: function(t, other) {
+ var cx = t.x < other.x;
+ var cy = t.y < other.y;
+ var cz = t.z < other.z;
+ var cw = t.w < other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t <= other.
+ */
+ lessThanOrEqual: function(t, other) {
+ var cx = t.x <= other.x;
+ var cy = t.y <= other.y;
+ var cz = t.z <= other.z;
+ var cw = t.w <= other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t == other.
+ */
+ equal: function(t, other) {
+ var cx = t.x == other.x;
+ var cy = t.y == other.y;
+ var cz = t.z == other.z;
+ var cw = t.w == other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t != other.
+ */
+ notEqual: function(t, other) {
+ var cx = t.x != other.x;
+ var cy = t.y != other.y;
+ var cz = t.z != other.z;
+ var cw = t.w != other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t >= other.
+ */
+ greaterThanOrEqual: function(t, other) {
+ var cx = t.x >= other.x;
+ var cy = t.y >= other.y;
+ var cz = t.z >= other.z;
+ var cw = t.w >= other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @param {float32x4} other An instance of float32x4.
+ * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on
+ * the result of t > other.
+ */
+ greaterThan: function(t, other) {
+ var cx = t.x > other.x;
+ var cy = t.y > other.y;
+ var cz = t.z > other.z;
+ var cw = t.w > other.w;
+ return int32x4.bool(cx, cy, cz, cw);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @return {int32x4} a bit-wise copy of t as a int32x4.
+ */
+ bitsToInt32x4: function(t) {
+ var alias = new Int32Array(t.storage_.buffer);
+ return new int32x4(alias[0], alias[1], alias[2], alias[3]);
+ },
+ /**
+ * @param {float32x4} t An instance of float32x4.
+ * @return {int32x4} with a integer to float conversion of t.
+ */
+ toInt32x4: function(t) {
+ var a = new int32x4(t.storage_[0], t.storage_[1], t.storage_[2],
+ t.storage_[3]);
+ return a;
+ }
},
- /**
- * @param {double} value used for x lane.
- * @return {float32x4} New instance of float32x4 with the values in t and
- * x replaced with {x}.
- */
- withX: function(t, x) {
- return new float32x4(x, t.y, t.z, t.w);
- },
- /**
- * @param {double} value used for y lane.
- * @return {float32x4} New instance of float32x4 with the values in t and
- * y replaced with {y}.
- */
- withY: function(t, y) {
- return new float32x4(t.x, y, t.z, t.w);
- },
- /**
- * @param {double} value used for z lane.
- * @return {float32x4} New instance of float32x4 with the values in t and
- * z replaced with {z}.
- */
- withZ: function(t, z) {
- return new float32x4(t.x, t.y, z, t.w);
- },
- /**
- * @param {double} value used for w lane.
- * @return {float32x4} New instance of float32x4 with the values in t and
- * w replaced with {w}.
- */
- withW: function(t, w) {
- return new float32x4(t.x, t.y, t.z, w);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t < other.
- */
- lessThan: function(t, other) {
- var cx = t.x < other.x;
- var cy = t.y < other.y;
- var cz = t.z < other.z;
- var cw = t.w < other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t <= other.
- */
- lessThanOrEqual: function(t, other) {
- var cx = t.x <= other.x;
- var cy = t.y <= other.y;
- var cz = t.z <= other.z;
- var cw = t.w <= other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t == other.
- */
- equal: function(t, other) {
- var cx = t.x == other.x;
- var cy = t.y == other.y;
- var cz = t.z == other.z;
- var cw = t.w == other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t != other.
- */
- notEqual: function(t, other) {
- var cx = t.x != other.x;
- var cy = t.y != other.y;
- var cz = t.z != other.z;
- var cw = t.w != other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t >= other.
- */
- greaterThanOrEqual: function(t, other) {
- var cx = t.x >= other.x;
- var cy = t.y >= other.y;
- var cz = t.z >= other.z;
- var cw = t.w >= other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @param {float32x4} other An instance of a float32x4.
- * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on
- * the result of t > other.
- */
- greaterThan: function(t, other) {
- var cx = t.x > other.x;
- var cy = t.y > other.y;
- var cz = t.z > other.z;
- var cw = t.w > other.w;
- return uint32x4.bool(cx, cy, cz, cw);
- },
- /**
- * @param {uint32x4} a An instance of a uint32x4.
- * @param {uint32x4} b An instance of a uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a & b.
- */
- and: function(a, b) {
- return new uint32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
- },
- /**
- * @param {uint32x4} a An instance of a uint32x4.
- * @param {uint32x4} b An instance of a uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a | b.
- */
- or: function(a, b) {
- return new uint32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
- },
- /**
- * @param {uint32x4} a An instance of a uint32x4.
- * @param {uint32x4} b An instance of a uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a ^ b.
- */
- xor: function(a, b) {
- return new uint32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of ~a
- */
- negu32: function(t) {
- return new uint32x4(~t.x, ~t.y, ~t.z, ~t.w);
- },
- /**
- * @param {uint32x4} a An instance of uint32x4.
- * @param {uint32x4} b An instance of uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a + b.
- */
- addu32: function(a, b) {
- return new uint32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
- },
- /**
- * @param {uint32x4} a An instance of uint32x4.
- * @param {uint32x4} b An instance of uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a - b.
- */
- subu32: function(a, b) {
- return new uint32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
- },
- /**
- * @param {uint32x4} a An instance of uint32x4.
- * @param {uint32x4} b An instance of uint32x4.
- * @return {uint32x4} New instance of uint32x4 with values of a * b.
- */
- mulu32: function(a, b) {
- return new uint32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y),
- Math.imul(a.z, b.z), Math.imul(a.w, b.w));
- },
- /**
- * @param {float32x4}
- */
- select: function(t, trueValue, falseValue) {
- var tv = SIMD.float32x4BitsToUint32x4(trueValue);
- var fv = SIMD.float32x4BitsToUint32x4(falseValue);
- var tr = SIMD.and(t, tv);
- var fr = SIMD.and(SIMD.negu32(t), fv);
- return SIMD.uint32x4BitsToFloat32x4(SIMD.or(tr, fr));
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {integer} 32-bit value used for x lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * x lane replaced with {x}.
- */
- withXu32: function(t, x) {
- return new uint32x4(x, t.y, t.z, t.w);
- },
- /**
- * param {uint32x4} t An instance of a uint32x4.
- * @param {integer} 32-bit value used for y lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * y lane replaced with {y}.
- */
- withYu32: function(t, y) {
- return new uint32x4(t.x, y, t.z, t.w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {integer} 32-bit value used for z lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * z lane replaced with {z}.
- */
- withZu32: function(t, z) {
- return new uint32x4(t.x, t.y, z, t.w);
- },
- /**
- * @param {integer} 32-bit value used for w lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * w lane replaced with {w}.
- */
- withWu32: function(t, w) {
- return new uint32x4(t.x, t.y, t.z, w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {boolean} x flag used for x lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * x lane replaced with {x}.
- */
- withFlagX: function(t, flagX) {
- var x = flagX ? 0xFFFFFFFF : 0x0;
- return new uint32x4(x, t.y, t.z, t.w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {boolean} y flag used for y lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * y lane replaced with {y}.
- */
- withFlagY: function(t, flagY) {
- var y = flagY ? 0xFFFFFFFF : 0x0;
- return new uint32x4(t.x, y, t.z, t.w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {boolean} z flag used for z lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * z lane replaced with {z}.
- */
- withFlagZ: function(t, flagZ) {
- var z = flagZ ? 0xFFFFFFFF : 0x0;
- return new uint32x4(t.x, t.y, z, t.w);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @param {boolean} w flag used for w lane.
- * @return {uint32x4} New instance of uint32x4 with the values in t and
- * w lane replaced with {w}.
- */
- withFlagW: function(t, flagW) {
- var w = flagW ? 0xFFFFFFFF : 0x0;
- return new uint32x4(t.x, t.y, t.z, w);
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @return {uint32x4} a bit-wise copy of t as a uint32x4.
- */
- float32x4BitsToUint32x4: function(t) {
- var alias = new Uint32Array(t.storage_.buffer);
- return new uint32x4(alias[0], alias[1], alias[2], alias[3]);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @return {float32x4} a bit-wise copy of t as a float32x4.
- */
- uint32x4BitsToFloat32x4: function(t) {
- var alias = new Float32Array(t.storage_.buffer);
- return new float32x4(alias[0], alias[1], alias[2], alias[3]);
- },
- /**
- * @param {uint32x4} t An instance of a uint32x4.
- * @return {float32x4} with a float to integer conversion copy of t.
- */
- uint32x4ToFloat32x4: function(t) {
- var a = float32x4.zero();
- a.storage_[0] = t.storage_[0];
- a.storage_[1] = t.storage_[1];
- a.storage_[2] = t.storage_[2];
- a.storage_[3] = t.storage_[3];
- return a;
- },
- /**
- * @param {float32x4} t An instance of a float32x4.
- * @return {uint32x4} with a integer to float conversion of t.
- */
- float32x4ToUint32x4: function(t) {
- var a = new uint32x4(t.storage_[0], t.storage_[1], t.storage_[2],
- t.storage_[3]);
- return a;
+ int32x4: {
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a & b.
+ */
+ and: function(a, b) {
+ return new int32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
+ },
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a | b.
+ */
+ or: function(a, b) {
+ return new int32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
+ },
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a ^ b.
+ */
+ xor: function(a, b) {
+ return new int32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of ~t
+ */
+ not: function(t) {
+ return new int32x4(~t.x, ~t.y, ~t.z, ~t.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of -t
+ */
+ neg: function(t) {
+ return new int32x4(-t.x, -t.y, -t.z, -t.w);
+ },
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a + b.
+ */
+ add: function(a, b) {
+ return new int32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+ },
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a - b.
+ */
+ sub: function(a, b) {
+ return new int32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+ },
+ /**
+ * @param {int32x4} a An instance of int32x4.
+ * @param {int32x4} b An instance of int32x4.
+ * @return {int32x4} New instance of int32x4 with values of a * b.
+ */
+ mul: function(a, b) {
+ return new int32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y),
+ Math.imul(a.z, b.z), Math.imul(a.w, b.w));
+ },
+ /**
+ * @param {int32x4} t An instance of float32x4 to be shuffled.
+ * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX.
+ * @return {int32x4} New instance of float32x4 with lanes shuffled.
+ */
+ shuffle: function(t, mask) {
+ var _x = (mask) & 0x3;
+ var _y = (mask >> 2) & 0x3;
+ var _z = (mask >> 4) & 0x3;
+ var _w = (mask >> 6) & 0x3;
+ return new int32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z],
+ t.storage_[_w]);
+ },
+ /**
+ * @param {int32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result
+ * @param {int32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result
+ * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX.
+ * @return {int32x4} New instance of float32x4 with lanes shuffled.
+ */
+ shuffleMix: function(t1, t2, mask) {
+ var _x = (mask) & 0x3;
+ var _y = (mask >> 2) & 0x3;
+ var _z = (mask >> 4) & 0x3;
+ var _w = (mask >> 6) & 0x3;
+ return new int32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z],
+ t2.storage_[_w]);
+ },
+ /**
+ * @param {float32x4}
+ */
+ select: function(t, trueValue, falseValue) {
+ var tv = SIMD.float32x4.bitsToInt32x4(trueValue);
+ var fv = SIMD.float32x4.bitsToInt32x4(falseValue);
+ var tr = SIMD.int32x4.and(t, tv);
+ var fr = SIMD.int32x4.and(SIMD.int32x4.not(t), fv);
+ return SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(tr, fr));
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {integer} 32-bit value used for x lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * x lane replaced with {x}.
+ */
+ withX: function(t, x) {
+ return new int32x4(x, t.y, t.z, t.w);
+ },
+ /**
+ * param {int32x4} t An instance of int32x4.
+ * @param {integer} 32-bit value used for y lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * y lane replaced with {y}.
+ */
+ withY: function(t, y) {
+ return new int32x4(t.x, y, t.z, t.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {integer} 32-bit value used for z lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * z lane replaced with {z}.
+ */
+ withZ: function(t, z) {
+ return new int32x4(t.x, t.y, z, t.w);
+ },
+ /**
+ * @param {integer} 32-bit value used for w lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * w lane replaced with {w}.
+ */
+ withW: function(t, w) {
+ return new int32x4(t.x, t.y, t.z, w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {boolean} x flag used for x lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * x lane replaced with {x}.
+ */
+ withFlagX: function(t, flagX) {
+ var x = flagX ? 0xFFFFFFFF : 0x0;
+ return new int32x4(x, t.y, t.z, t.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {boolean} y flag used for y lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * y lane replaced with {y}.
+ */
+ withFlagY: function(t, flagY) {
+ var y = flagY ? 0xFFFFFFFF : 0x0;
+ return new int32x4(t.x, y, t.z, t.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {boolean} z flag used for z lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * z lane replaced with {z}.
+ */
+ withFlagZ: function(t, flagZ) {
+ var z = flagZ ? 0xFFFFFFFF : 0x0;
+ return new int32x4(t.x, t.y, z, t.w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @param {boolean} w flag used for w lane.
+ * @return {int32x4} New instance of int32x4 with the values in t and
+ * w lane replaced with {w}.
+ */
+ withFlagW: function(t, flagW) {
+ var w = flagW ? 0xFFFFFFFF : 0x0;
+ return new int32x4(t.x, t.y, t.z, w);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @return {float32x4} a bit-wise copy of t as a float32x4.
+ */
+ bitsToFloat32x4: function(t) {
+ var alias = new Float32Array(t.storage_.buffer);
+ return new float32x4(alias[0], alias[1], alias[2], alias[3]);
+ },
+ /**
+ * @param {int32x4} t An instance of int32x4.
+ * @return {float32x4} with a float to integer conversion copy of t.
+ */
+ toFloat32x4: function(t) {
+ var a = float32x4.zero();
+ a.storage_[0] = t.storage_[0];
+ a.storage_[1] = t.storage_[1];
+ a.storage_[2] = t.storage_[2];
+ a.storage_[3] = t.storage_[3];
+ return a;
+ }
}
}
})();
@@ -955,4 +1123,3 @@ Object.defineProperty(SIMD, 'WWWX', { get: function() { return 0x3F; } });
Object.defineProperty(SIMD, 'WWWY', { get: function() { return 0x7F; } });
Object.defineProperty(SIMD, 'WWWZ', { get: function() { return 0xBF; } });
Object.defineProperty(SIMD, 'WWWW', { get: function() { return 0xFF; } });
-
diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h
new file mode 100644
index 00000000..31265db8
--- /dev/null
+++ b/system/include/emscripten/emmintrin.h
@@ -0,0 +1,87 @@
+#include <xmmintrin.h>
+
+typedef int32x4 __m128i;
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_set_epi32(int z, int y, int x, int w)
+{
+ return (__m128i){ w, x, y, z };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_set1_epi32(int w)
+{
+ return (__m128i){ w, w, w, w };
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_setzero_si128()
+{
+ return (__m128i){ 0, 0, 0, 0 };
+}
+
+static __inline__ void __attribute__((__always_inline__))
+_mm_store_si128(__m128i *p, __m128i a)
+{
+ *p = a;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_and_si128(__m128i a, __m128i b)
+{
+ return a & b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_andnot_si128(__m128i a, __m128i b)
+{
+ return ~a & b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_or_si128(__m128i a, __m128i b)
+{
+ return a | b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_xor_si128(__m128i a, __m128i b)
+{
+ return a ^ b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_add_epi32(__m128i a, __m128i b)
+{
+ return a + b;
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_sub_epi32(__m128i a, __m128i b)
+{
+ return a - b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_castsi128_ps(__m128i a)
+{
+ return emscripten_int32x4_bitsToFloat32x4(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cvtepi32_ps(__m128i a)
+{
+ return emscripten_int32x4_toFloat32x4(a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_castps_si128(__m128 a)
+{
+ return emscripten_float32x4_bitsToInt32x4(a);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__))
+_mm_cvtps_epi32(__m128 a)
+{
+ return emscripten_float32x4_toInt32x4(a);
+} \ No newline at end of file
diff --git a/system/include/emscripten/vector.h b/system/include/emscripten/vector.h
index 938f2369..cf26a5d6 100644
--- a/system/include/emscripten/vector.h
+++ b/system/include/emscripten/vector.h
@@ -2,7 +2,7 @@
// Support for the JS SIMD API proposal, https://github.com/johnmccutchan/ecmascript_simd
typedef float float32x4 __attribute__((__vector_size__(16)));
-typedef unsigned int uint32x4 __attribute__((__vector_size__(16)));
+typedef unsigned int int32x4 __attribute__((__vector_size__(16)));
#ifdef __cplusplus
extern "C" {
@@ -10,6 +10,24 @@ extern "C" {
unsigned int emscripten_float32x4_signmask(float32x4 x);
+float32x4 emscripten_float32x4_min(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_max(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_sqrt(float32x4 a);
+float32x4 emscripten_float32x4_lessThan(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_lessThanOrEqual(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_equal(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_greaterThanOrEqual(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_greaterThan(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_and(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_andNot(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_or(float32x4 a, float32x4 b);
+float32x4 emscripten_float32x4_xor(float32x4 a, float32x4 b);
+
+float32x4 emscripten_int32x4_bitsToFloat32x4(int32x4 a);
+float32x4 emscripten_int32x4_toFloat32x4(int32x4 a);
+int32x4 emscripten_float32x4_bitsToInt32x4(float32x4 a);
+int32x4 emscripten_float32x4_toInt32x4(float32x4 a);
+
#ifdef __cplusplus
}
#endif
diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h
new file mode 100644
index 00000000..1b9108fa
--- /dev/null
+++ b/system/include/emscripten/xmmintrin.h
@@ -0,0 +1,131 @@
+#include <vector.h>
+
+typedef float32x4 __m128;
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_set_ps(float z, float y, float x, float w)
+{
+ return (__m128){ w, x, y, z };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_set1_ps(float w)
+{
+ return (__m128){ w, w, w, w };
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_setzero_ps(void)
+{
+ return (__m128){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+static __inline__ void __attribute__((__always_inline__))
+_mm_store_ps(float *p, __m128 a)
+{
+ *(__m128 *)p = a;
+}
+
+static __inline__ int __attribute__((__always_inline__))
+_mm_movemask_ps(__m128 a)
+{
+ return emscripten_float32x4_signmask(a);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_add_ps(__m128 a, __m128 b)
+{
+ return a + b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_sub_ps(__m128 a, __m128 b)
+{
+ return a - b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_mul_ps(__m128 a, __m128 b)
+{
+ return a * b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_div_ps(__m128 a, __m128 b)
+{
+ return a / b;
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_min_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_min(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_max_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_max(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_sqrt_ps(__m128 a)
+{
+ return emscripten_float32x4_sqrt(a);
+}
+
+/* TODO: shuffles */
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cmplt_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_lessThan(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cmple_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_lessThanOrEqual(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cmpeq_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_equal(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cmpge_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_greaterThanOrEqual(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_cmpgt_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_greaterThan(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_and_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_and(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_andnot_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_andNot(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_or_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_or(a, b);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__))
+_mm_xor_ps(__m128 a, __m128 b)
+{
+ return emscripten_float32x4_xor(a, b);
+}
diff --git a/tests/test_core.py b/tests/test_core.py
index cbde794f..ec00c0a5 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -8811,20 +8811,20 @@ int main(int argc, char **argv) {
printf("zeros %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3]);
}
{
- uint32x4 *a = (uint32x4*)&data[0];
- uint32x4 *b = (uint32x4*)&data[4];
- uint32x4 c, d, e, f;
+ int32x4 *a = (int32x4*)&data[0];
+ int32x4 *b = (int32x4*)&data[4];
+ int32x4 c, d, e, f;
c = *a;
d = *b;
- printf("4uints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]);
+ printf("4ints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]);
e = c+d;
f = c-d;
- printf("5uints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
+ printf("5ints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
e = c&d;
f = c|d;
e = ~c&d;
f = c^d;
- printf("5uintops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
+ printf("5intops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]);
}
{
float32x4 c, d, e, f;
@@ -8842,9 +8842,9 @@ int main(int argc, char **argv) {
2floats! 48, 68, 92, 120 42, 56, 72, 90
3floats! 48, 68, 92, 120 2016, 3808, 6624, 10800
zeros 0, 0, 0, 0
-4uints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736
-5uints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056
-5uintops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096
+4ints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736
+5ints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056
+5intops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096
6floats! -9, 0, 4, 9 -2, -12, 14, 10
''')
@@ -8895,6 +8895,484 @@ zeros 0, 0, 0, 0
16.000000
''')
+ def test_simd3(self):
+ if Settings.USE_TYPED_ARRAYS != 2: return self.skip('needs ta2')
+ if Settings.ASM_JS: Settings.ASM_JS = 2 # does not validate
+ src = r'''
+ #include <iostream>
+ #include <emmintrin.h>
+ #include <assert.h>
+ #include <stdint.h>
+ #include <bitset>
+
+ using namespace std;
+
+ void testSetPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 4.0);
+ assert(ar[1] == 3.0);
+ assert(ar[2] == 2.0);
+ assert(ar[3] == 1.0);
+ }
+
+ void testSet1Ps() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_set1_ps(5.5);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 5.5);
+ assert(ar[1] == 5.5);
+ assert(ar[2] == 5.5);
+ assert(ar[3] == 5.5);
+ }
+
+ void testSetZeroPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_setzero_ps();
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testSetEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_set_epi32(5, 7, 126, 381);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 381);
+ assert(ar[1] == 126);
+ assert(ar[2] == 7);
+ assert(ar[3] == 5);
+ v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x12345678);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xaaaaaaaa);
+ assert(ar[3] == 0x55555555);
+ }
+
+ void testSet1Epi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_set1_epi32(-5);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == -5);
+ assert(ar[1] == -5);
+ assert(ar[2] == -5);
+ assert(ar[3] == -5);
+ }
+
+ void testSetZeroSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_setzero_si128();
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testBitCasts() {
+ int32_t __attribute__((__aligned__(16))) ar1[4];
+ float __attribute__((__aligned__(16))) ar2[4];
+ __m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000);
+ __m128 v2 = _mm_castsi128_ps(v1);
+ _mm_store_ps(ar2, v2);
+ assert(ar2[0] == 4.0);
+ assert(ar2[1] == 3.0);
+ assert(ar2[2] == 2.0);
+ assert(ar2[3] == 1.0);
+ v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0);
+ v1 = _mm_castps_si128(v2);
+ _mm_store_si128((__m128i *)ar1, v1);
+ assert(ar1[0] == 0x41000000);
+ assert(ar1[1] == 0x40e00000);
+ assert(ar1[2] == 0x40c00000);
+ assert(ar1[3] == 0x40a00000);
+ float w = 0;
+ float z = -278.3;
+ float y = 5.2;
+ float x = -987654321;
+ v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x));
+ _mm_store_ps(ar2, _mm_castsi128_ps(v1));
+ assert(ar2[0] == x);
+ assert(ar2[1] == y);
+ assert(ar2[2] == z);
+ assert(ar2[3] == w);
+ /*
+ std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned long*>(&(ar2[0])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned long*>(&(ar2[1])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned long*>(&(ar2[2])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned long*>(&(ar2[3])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned long*>(&x));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned long*>(&y));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned long*>(&z));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned long*>(&w));
+ assert(bits1x == bits2x);
+ assert(bits1y == bits2y);
+ assert(bits1z == bits2z);
+ assert(bits1w == bits2w);
+ */
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa));
+ _mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2));
+ assert(ar1[0] == 0xaaaaaaaa);
+ assert(ar1[1] == 0x5555cccc);
+ assert(ar1[2] == 0);
+ assert(ar1[3] == 0xffffffff);
+ }
+
+ void testConversions() {
+ int32_t __attribute__((__aligned__(16))) ar1[4];
+ float __attribute__((__aligned__(16))) ar2[4];
+ __m128i v1 = _mm_set_epi32(0, -3, -517, 256);
+ __m128 v2 = _mm_cvtepi32_ps(v1);
+ _mm_store_ps(ar2, v2);
+ assert(ar2[0] == 256.0);
+ assert(ar2[1] == -517.0);
+ assert(ar2[2] == -3.0);
+ assert(ar2[3] == 0);
+ v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0);
+ v1 = _mm_cvtps_epi32(v2);
+ _mm_store_si128((__m128i *)ar1, v1);
+ assert(ar1[0] == -8);
+ assert(ar1[1] == 7);
+ assert(ar1[2] == 6);
+ assert(ar1[3] == 5);
+ }
+
+ void testMoveMaskPs() {
+ __m128 v = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ int mask = _mm_movemask_ps(v);
+ assert(mask == 13);
+ }
+
+ void testAddPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_add_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 41.0);
+ assert(ar[1] == 32.0);
+ assert(ar[2] == 23.0);
+ assert(ar[3] == 14.0);
+ }
+
+ void testSubPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_sub_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == -39.0);
+ assert(ar[1] == -28.0);
+ assert(ar[2] == -17.0);
+ assert(ar[3] == -6.0);
+ }
+
+ void testMulPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_mul_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 40.0);
+ assert(ar[1] == 60.0);
+ assert(ar[2] == 60.0);
+ assert(ar[3] == 40.0);
+ }
+
+ void testDivPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0);
+ __m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5);
+ __m128 v = _mm_div_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 2.0);
+ assert(ar[1] == 8.0);
+ assert(ar[2] == 3.0);
+ assert(ar[3] == 2.0);
+ }
+
+ void testMinPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
+ __m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0);
+ __m128 v = _mm_min_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0.0);
+ assert(ar[1] == 30.0);
+ assert(ar[2] == 1.0);
+ assert(ar[3] == -20.0);
+ }
+
+ void testMaxPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
+ __m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0);
+ __m128 v = _mm_max_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 1.0);
+ assert(ar[1] == 55.0);
+ assert(ar[2] == 10.0);
+ assert(ar[3] == 2.5);
+ }
+
+ void testSqrtPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0);
+ __m128 v = _mm_sqrt_ps(v1);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 1.0);
+ assert(ar[1] == 2.0);
+ assert(ar[2] == 3.0);
+ assert(ar[3] == 4.0);
+ }
+
+ void testCmpLtPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmplt_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0xffffffff);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0xffffffff);
+ assert(_mm_movemask_ps(v) == 9);
+ }
+
+ void testCmpLePs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmple_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0xffffffff);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ assert(_mm_movemask_ps(v) == 13);
+ }
+
+ void testCmpEqPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpeq_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 4);
+ }
+
+ void testCmpGePs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpge_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 6);
+ }
+
+ void testCmpGtPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpgt_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 2);
+ }
+
+ void testAndPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(425, -501, -32, 68);
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ __m128 v = _mm_and_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 68);
+ assert(ar[1] == 0);
+ assert(ar[2] == -501);
+ assert(ar[3] == 425);
+ int32_t __attribute__((__aligned__(16))) ar2[4];
+ v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ v = _mm_and_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
+ assert(ar2[0] == 0);
+ assert(ar2[1] == 0);
+ assert(ar2[2] == 0);
+ assert(ar2[3] == 0);
+ }
+
+ void testAndNotPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(425, -501, -32, 68);
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ __m128 v = _mm_andnot_ps(v2, v1);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == -32);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ int32_t __attribute__((__aligned__(16))) ar2[4];
+ v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ v = _mm_andnot_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
+ assert(ar2[0] == 0x55555555);
+ assert(ar2[1] == 0x55555555);
+ assert(ar2[2] == 0x55555555);
+ assert(ar2[3] == 0x55555555);
+ }
+
+ void testOrPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ __m128 v = _mm_or_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testXorPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ __m128 v = _mm_xor_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xaaaaaaaa);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testAndSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_and_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testAndNotSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_andnot_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0x55555555);
+ assert(ar[2] == 0x55555555);
+ assert(ar[3] == 0x55555555);
+ }
+
+ void testOrSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_or_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testXorSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_xor_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xaaaaaaaa);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testAddEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
+ __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
+ __m128i v = _mm_add_epi32(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 41);
+ assert(ar[1] == 32);
+ assert(ar[2] == 23);
+ assert(ar[3] == 14);
+ }
+
+ void testSubEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
+ __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
+ __m128i v = _mm_sub_epi32(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == -39);
+ assert(ar[1] == -28);
+ assert(ar[2] == -17);
+ assert(ar[3] == -6);
+ }
+
+ int main(int argc, char ** argv) {
+ testSetPs();
+ testSet1Ps();
+ testSetZeroPs();
+ testSetEpi32();
+ testSet1Epi32();
+ testSetZeroSi128();
+ testBitCasts();
+ testConversions();
+ testMoveMaskPs();
+ testAddPs();
+ testSubPs();
+ testMulPs();
+ testDivPs();
+ testMaxPs();
+ testMinPs();
+ testSqrtPs();
+ testCmpLtPs();
+ testCmpLePs();
+ testCmpEqPs();
+ testCmpGePs();
+ testCmpGtPs();
+ testAndPs();
+ testAndNotPs();
+ testOrPs();
+ testXorPs();
+ testAndSi128();
+ testAndNotSi128();
+ testOrSi128();
+ testXorSi128();
+ testAddEpi32();
+ testSubEpi32();
+ printf("DONE");
+ return 0;
+ }
+ '''
+
+ self.do_run(src, 'DONE')
+
+
def test_gcc_unmangler(self):
Settings.NAMED_GLOBALS = 1 # test coverage for this