diff options
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | src/jsifier.js | 3 | ||||
-rw-r--r-- | src/library.js | 68 | ||||
-rw-r--r-- | src/parseTools.js | 29 | ||||
-rw-r--r-- | src/simd.js | 1031 | ||||
-rw-r--r-- | system/include/emscripten/emmintrin.h | 87 | ||||
-rw-r--r-- | system/include/emscripten/vector.h | 20 | ||||
-rw-r--r-- | system/include/emscripten/xmmintrin.h | 131 | ||||
-rw-r--r-- | tests/test_core.py | 496 |
9 files changed, 1406 insertions, 460 deletions
@@ -110,3 +110,4 @@ a license to everyone to use it as detailed in LICENSE.) * John Vilk <jvilk@cs.umass.edu> * Daniel Baulig <dbaulig@fb.com> (copyright owned by Facebook, Inc.) * Lu Wang <coolwanglu@gmail.com> +* Heidi Pan <heidi.pan@intel.com> (copyright owned by Intel) diff --git a/src/jsifier.js b/src/jsifier.js index cb753e57..fb6c5ba8 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -1373,8 +1373,9 @@ function JSify(data, functionsOnly, givenFunctions) { function insertelementHandler(item) { var base = getVectorBaseType(item.type); var ident = ensureVector(item.ident, base); + var laneOp = ((base == 'float') ? 'SIMD.float32x4.with' : 'SIMD.int32x4.with'); //return ident + '.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + finalizeLLVMParameter(item.value) + ')'; - return 'SIMD.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')'; + return laneOp + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')'; } function extractelementHandler(item) { var base = getVectorBaseType(item.type); diff --git a/src/library.js b/src/library.js index 128bb211..faca945c 100644 --- a/src/library.js +++ b/src/library.js @@ -8736,8 +8736,72 @@ LibraryManager.library = { // emscripten vector ops //============================ - emscripten_float32x4_signmask__inline: function(x) { - return x + '.signMask()'; + emscripten_float32x4_signmask__inline: function(a) { + return 'SIMD.float32x4.bitsToInt32x4(' + a + ').signMask'; + }, + + emscripten_float32x4_min__inline: function(a, b) { + return 'SIMD.float32x4.min(' + a + ', ' + b + ')'; + }, + + emscripten_float32x4_max__inline: function(a, b) { + return 'SIMD.float32x4.max(' + a + ', ' + b + ')'; + }, + + emscripten_float32x4_sqrt__inline: function(a) { + return 'SIMD.float32x4.sqrt(' + a + ')'; + }, + + emscripten_float32x4_lessThan__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThan(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_lessThanOrEqual__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThanOrEqual(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_equal__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.equal(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_greaterThanOrEqual__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThanOrEqual(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_greaterThan__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThan(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_and__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_andNot__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.int32x4.not(SIMD.float32x4.bitsToInt32x4(' + a + ')), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_or__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_xor__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.xor(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_int32x4_bitsToFloat32x4__inline: function(a) { + return 'SIMD.int32x4.bitsToFloat32x4(' + a + ')'; + }, + + emscripten_int32x4_toFloat32x4__inline: function(a) { + return 'SIMD.int32x4.toFloat32x4(' + a + ')'; + }, + + emscripten_float32x4_bitsToInt32x4__inline: function(a) { + return 'SIMD.float32x4.bitsToInt32x4(' + a + ')'; + }, + + emscripten_float32x4_toInt32x4__inline: function(a) { + return 'SIMD.float32x4.toInt32x4(' + a + ')'; }, //============================ diff --git a/src/parseTools.js b/src/parseTools.js index 134cb89a..ffd7c758 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -362,7 +362,7 @@ function getVectorNativeType(type) { function getSIMDName(type) { switch (type) { - case 'i32': return 'uint'; + case 'i32': return 'int'; case 'float': return 'float'; default: throw 'getSIMDName ' + type; } @@ -2372,29 +2372,28 @@ function processMathop(item) { // vector/SIMD operation Types.usesSIMD = true; switch (op) { - case 'fadd': return 'SIMD.add(' + idents[0] + ',' + idents[1] + ')'; - case 'fsub': return 'SIMD.sub(' + idents[0] + ',' + idents[1] + ')'; - case 'fmul': return 'SIMD.mul(' + idents[0] + ',' + idents[1] + ')'; - case 'fdiv': return 'SIMD.div(' + idents[0] + ',' + idents[1] + ')'; - case 'add' : return 'SIMD.addu32(' + idents[0] + ',' + idents[1] + ')'; - case 'sub' : return 'SIMD.subu32(' + idents[0] + ',' + idents[1] + ')'; - case 'mul' : return 'SIMD.mulu32(' + idents[0] + ',' + idents[1] + ')'; - case 'udiv': return 'SIMD.divu32(' + idents[0] + ',' + idents[1] + ')'; + case 'fadd': return 'SIMD.float32x4.add(' + idents[0] + ',' + idents[1] + ')'; + case 'fsub': return 'SIMD.float32x4.sub(' + idents[0] + ',' + idents[1] + ')'; + case 'fmul': return 'SIMD.float32x4.mul(' + idents[0] + ',' + idents[1] + ')'; + case 'fdiv': return 'SIMD.float32x4.div(' + idents[0] + ',' + idents[1] + ')'; + case 'add' : return 'SIMD.int32x4.add(' + idents[0] + ',' + idents[1] + ')'; + case 'sub' : return 'SIMD.int32x4.sub(' + idents[0] + ',' + idents[1] + ')'; + case 'mul' : return 'SIMD.int32x4.mul(' + idents[0] + ',' + idents[1] + ')'; case 'bitcast': { var inType = item.params[0].type; var outType = item.type; if (inType === '<4 x float>') { assert(outType === '<4 x i32>'); - return 'SIMD.float32x4BitsToUint32x4(' + idents[0] + ')'; + return 'SIMD.float32x4.bitsToInt32x4(' + idents[0] + ')'; } else { assert(inType === '<4 x i32>'); assert(outType === '<4 x float>'); - return 'SIMD.uint32x4BitsToFloat32x4(' + idents[0] + ')'; + return 'SIMD.int32x4.bitsToFloat32x4(' + idents[0] + ')'; } } - case 'and': return 'SIMD.and(' + idents[0] + ',' + idents[1] + ')'; - case 'or': return 'SIMD.or(' + idents[0] + ',' + idents[1] + ')'; - case 'xor': return 'SIMD.xor(' + idents[0] + ',' + idents[1] + ')'; + case 'and': return 'SIMD.int32x4.and(' + idents[0] + ',' + idents[1] + ')'; + case 'or': return 'SIMD.int32x4.or(' + idents[0] + ',' + idents[1] + ')'; + case 'xor': return 'SIMD.int32x4.xor(' + idents[0] + ',' + idents[1] + ')'; default: throw 'vector op todo: ' + dump(item); } } @@ -2698,7 +2697,7 @@ var simdLane = ['x', 'y', 'z', 'w']; function ensureVector(ident, base) { Types.usesSIMD = true; - return ident == 0 ? base + '32x4.zero()' : ident; + return ident == 0 ? base + '32x4.splat(0)' : ident; } function ensureValidFFIType(type) { diff --git a/src/simd.js b/src/simd.js index bbb12d0a..c7f5ff48 100644 --- a/src/simd.js +++ b/src/simd.js @@ -20,8 +20,10 @@ https://github.com/johnmccutchan/ecmascript_simd/blob/master/src/ecmascript_simd.js */ +"use strict"; + /** - * Construct a new instance of a float32x4 number. + * Construct a new instance of float32x4 number. * @param {double} value used for x lane. * @param {double} value used for y lane. * @param {double} value used for z lane. @@ -40,7 +42,7 @@ function float32x4(x, y, z, w) { } /** - * Construct a new instance of a float32x4 number with 0.0 in all lanes. + * Construct a new instance of float32x4 number with 0.0 in all lanes. * @constructor */ float32x4.zero = function() { @@ -48,7 +50,7 @@ float32x4.zero = function() { } /** - * Construct a new instance of a float32x4 number with the same value + * Construct a new instance of float32x4 number with the same value * in all lanes. * @param {double} value used for all lanes. * @constructor @@ -87,18 +89,18 @@ Object.defineProperty(float32x4.prototype, 'signMask', { }); /** - * Construct a new instance of a uint32x4 number. + * Construct a new instance of int32x4 number. * @param {integer} 32-bit unsigned value used for x lane. * @param {integer} 32-bit unsigned value used for y lane. * @param {integer} 32-bit unsigned value used for z lane. * @param {integer} 32-bit unsigned value used for w lane. * @constructor */ -function uint32x4(x, y, z, w) { - if (!(this instanceof uint32x4)) { - return new uint32x4(x, y, z, w); +function int32x4(x, y, z, w) { + if (!(this instanceof int32x4)) { + return new int32x4(x, y, z, w); } - this.storage_ = new Uint32Array(4); + this.storage_ = new Int32Array(4); this.storage_[0] = x; this.storage_[1] = y; this.storage_[2] = z; @@ -106,7 +108,7 @@ function uint32x4(x, y, z, w) { } /** - * Construct a new instance of a uint32x4 number with 0xFFFFFFFF or 0x0 in each + * Construct a new instance of int32x4 number with 0xFFFFFFFF or 0x0 in each * lane, depending on the truth value in x, y, z, and w. * @param {boolean} flag used for x lane. * @param {boolean} flag used for y lane. @@ -114,59 +116,59 @@ function uint32x4(x, y, z, w) { * @param {boolean} flag used for w lane. * @constructor */ -uint32x4.bool = function(x, y, z, w) { - return uint32x4(x ? 0xFFFFFFFF : 0x0, - y ? 0xFFFFFFFF : 0x0, - z ? 0xFFFFFFFF : 0x0, - w ? 0xFFFFFFFF : 0x0); +int32x4.bool = function(x, y, z, w) { + return int32x4(x ? -1 : 0x0, + y ? -1 : 0x0, + z ? -1 : 0x0, + w ? -1 : 0x0); } /** - * Construct a new instance of a uint32x4 number with the same value + * Construct a new instance of int32x4 number with the same value * in all lanes. * @param {integer} value used for all lanes. * @constructor */ -uint32x4.splat = function(s) { - return uint32x4(s, s, s, s); +int32x4.splat = function(s) { + return int32x4(s, s, s, s); } -Object.defineProperty(uint32x4.prototype, 'x', { +Object.defineProperty(int32x4.prototype, 'x', { get: function() { return this.storage_[0]; } }); -Object.defineProperty(uint32x4.prototype, 'y', { +Object.defineProperty(int32x4.prototype, 'y', { get: function() { return this.storage_[1]; } }); -Object.defineProperty(uint32x4.prototype, 'z', { +Object.defineProperty(int32x4.prototype, 'z', { get: function() { return this.storage_[2]; } }); -Object.defineProperty(uint32x4.prototype, 'w', +Object.defineProperty(int32x4.prototype, 'w', { get: function() { return this.storage_[3]; } }); -Object.defineProperty(uint32x4.prototype, 'flagX', { +Object.defineProperty(int32x4.prototype, 'flagX', { get: function() { return this.storage_[0] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagY', { +Object.defineProperty(int32x4.prototype, 'flagY', { get: function() { return this.storage_[1] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagZ', { +Object.defineProperty(int32x4.prototype, 'flagZ', { get: function() { return this.storage_[2] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagW', +Object.defineProperty(int32x4.prototype, 'flagW', { get: function() { return this.storage_[3] != 0x0; } }); /** * Extract the sign bit from each lane return them in the first 4 bits. */ -Object.defineProperty(uint32x4.prototype, 'signMask', { +Object.defineProperty(int32x4.prototype, 'signMask', { get: function() { var mx = (this.storage_[0] & 0x80000000) >>> 31; var my = (this.storage_[1] & 0x80000000) >>> 31; @@ -287,414 +289,580 @@ Float32x4Array.prototype.setAt = function(i, v) { this.storage_[i*4+3] = v.w; } + +function Int32x4Array(a, b, c) { + + function isNumber(o) { + return typeof o == "number" || (typeof o == "object" && o.constructor === Number); + } + + function isTypedArray(o) { + return (o instanceof Int8Array) || + (o instanceof Uint8Array) || + (o instanceof Uint8ClampedArray) || + (o instanceof Int16Array) || + (o instanceof Uint16Array) || + (o instanceof Int32Array) || + (o instanceof Uint32Array) || + (o instanceof Float32Array) || + (o instanceof Float64Array) || + (o instanceof Int32x4Array) || + (o instanceof Float32x4Array); + } + + function isArrayBuffer(o) { + return (o instanceof ArrayBuffer); + } + + if (isNumber(a)) { + this.storage_ = new Int32Array(a*4); + this.length_ = a; + this.byteOffset_ = 0; + return; + } else if (isTypedArray(a)) { + if (!(a instanceof Int32x4Array)) { + throw "Copying typed array of non-Int32x4Array is unimplemented."; + } + this.storage_ = new Int32Array(a.length * 4); + this.length_ = a.length; + this.byteOffset_ = 0; + // Copy floats. + for (var i = 0; i < a.length*4; i++) { + this.storage_[i] = a.storage_[i]; + } + } else if (isArrayBuffer(a)) { + if ((b != undefined) && (b % Int32x4Array.BYTES_PER_ELEMENT) != 0) { + throw "byteOffset must be a multiple of 16."; + } + if (c != undefined) { + c *= 4; + this.storage_ = new Int32Array(a, b, c); + } + else { + // Note: new Int32Array(a, b) is NOT equivalent to new Float32Array(a, b, undefined) + this.storage_ = new Int32Array(a, b); + } + this.length_ = this.storage_.length / 4; + this.byteOffset_ = b != undefined ? b : 0; + } else { + throw "Unknown type of first argument."; + } +} + +Object.defineProperty(Int32x4Array.prototype, 'length', + { get: function() { return this.length_; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'byteLength', + { get: function() { return this.length_ * Int32x4Array.BYTES_PER_ELEMENT; } +}); + +Object.defineProperty(Int32x4Array, 'BYTES_PER_ELEMENT', + { get: function() { return 16; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'BYTES_PER_ELEMENT', + { get: function() { return 16; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'byteOffset', + { get: function() { return this.byteOffset_; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'buffer', + { get: function() { return this.storage_.buffer; } +}); + +Int32x4Array.prototype.getAt = function(i) { + if (i < 0) { + throw "Index must be >= 0."; + } + if (i >= this.length) { + throw "Index out of bounds."; + } + var x = this.storage_[i*4+0]; + var y = this.storage_[i*4+1]; + var z = this.storage_[i*4+2]; + var w = this.storage_[i*4+3]; + return float32x4(x, y, z, w); +} + +Int32x4Array.prototype.setAt = function(i, v) { + if (i < 0) { + throw "Index must be >= 0."; + } + if (i >= this.length) { + throw "Index out of bounds."; + } + if (!(v instanceof int32x4)) { + throw "Value is not a int32x4."; + } + this.storage_[i*4+0] = v.x; + this.storage_[i*4+1] = v.y; + this.storage_[i*4+2] = v.z; + this.storage_[i*4+3] = v.w; +} + var SIMD = (function () { return { - /** - * @return {float32x4} New instance of float32x4 with absolute values of - * t. - */ - abs: function(t) { - return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z), - Math.abs(t.w)); - }, - /** - * @return {float32x4} New instance of float32x4 with negated values of - * t. - */ - neg: function(t) { - return new float32x4(-t.x, -t.y, -t.z, -t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a + b. - */ - add: function(a, b) { - return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a - b. - */ - sub: function(a, b) { - return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a * b. - */ - mul: function(a, b) { - return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a / b. - */ - div: function(a, b) { - return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with t's values clamped - * between lowerLimit and upperLimit. - */ - clamp: function(t, lowerLimit, upperLimit) { - var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x; - var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y; - var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z; - var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w; - cx = cx > upperLimit.x ? upperLimit.x : cx; - cy = cy > upperLimit.y ? upperLimit.y : cy; - cz = cz > upperLimit.z ? upperLimit.z : cz; - cw = cw > upperLimit.w ? upperLimit.w : cw; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with the minimum value of - * t and other. - */ - min: function(t, other) { - var cx = t.x > other.x ? other.x : t.x; - var cy = t.y > other.y ? other.y : t.y; - var cz = t.z > other.z ? other.z : t.z; - var cw = t.w > other.w ? other.w : t.w; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with the maximum value of - * t and other. - */ - max: function(t, other) { - var cx = t.x < other.x ? other.x : t.x; - var cy = t.y < other.y ? other.y : t.y; - var cz = t.z < other.z ? other.z : t.z; - var cw = t.w < other.w ? other.w : t.w; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with reciprocal value of - * t. - */ - reciprocal: function(t) { - return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with square root of the - * reciprocal value of t. - */ - reciprocalSqrt: function(t) { - return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y), - Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w)); - }, - /** - * @return {float32x4} New instance of float32x4 with values of t - * scaled by s. - */ - scale: function(t, s) { - return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with square root of - * values of t. - */ - sqrt: function(t) { - return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y), - Math.sqrt(t.z), Math.sqrt(t.w)); - }, - /** - * @param {float32x4} t An instance of float32x4 to be shuffled. - * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. - * @return {float32x4} New instance of float32x4 with lanes shuffled. - */ - shuffle: function(t, mask) { - var _x = (mask) & 0x3; - var _y = (mask >> 2) & 0x3; - var _z = (mask >> 4) & 0x3; - var _w = (mask >> 6) & 0x3; - return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], - t.storage_[_w]); + float32x4: { + /** + * @return {float32x4} New instance of float32x4 with absolute values of + * t. + */ + abs: function(t) { + return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z), + Math.abs(t.w)); + }, + /** + * @return {float32x4} New instance of float32x4 with negated values of + * t. + */ + neg: function(t) { + return new float32x4(-t.x, -t.y, -t.z, -t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a + b. + */ + add: function(a, b) { + return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a - b. + */ + sub: function(a, b) { + return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a * b. + */ + mul: function(a, b) { + return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a / b. + */ + div: function(a, b) { + return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with t's values clamped + * between lowerLimit and upperLimit. + */ + clamp: function(t, lowerLimit, upperLimit) { + var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x; + var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y; + var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z; + var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w; + cx = cx > upperLimit.x ? upperLimit.x : cx; + cy = cy > upperLimit.y ? upperLimit.y : cy; + cz = cz > upperLimit.z ? upperLimit.z : cz; + cw = cw > upperLimit.w ? upperLimit.w : cw; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with the minimum value of + * t and other. + */ + min: function(t, other) { + var cx = t.x > other.x ? other.x : t.x; + var cy = t.y > other.y ? other.y : t.y; + var cz = t.z > other.z ? other.z : t.z; + var cw = t.w > other.w ? other.w : t.w; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with the maximum value of + * t and other. + */ + max: function(t, other) { + var cx = t.x < other.x ? other.x : t.x; + var cy = t.y < other.y ? other.y : t.y; + var cz = t.z < other.z ? other.z : t.z; + var cw = t.w < other.w ? other.w : t.w; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with reciprocal value of + * t. + */ + reciprocal: function(t) { + return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with square root of the + * reciprocal value of t. + */ + reciprocalSqrt: function(t) { + return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y), + Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w)); + }, + /** + * @return {float32x4} New instance of float32x4 with values of t + * scaled by s. + */ + scale: function(t, s) { + return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with square root of + * values of t. + */ + sqrt: function(t) { + return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y), + Math.sqrt(t.z), Math.sqrt(t.w)); + }, + /** + * @param {float32x4} t An instance of float32x4 to be shuffled. + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {float32x4} New instance of float32x4 with lanes shuffled. + */ + shuffle: function(t, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], + t.storage_[_w]); + }, + /** + * @param {float32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result + * @param {float32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {float32x4} New instance of float32x4 with lanes shuffled. + */ + shuffleMix: function(t1, t2, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new float32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z], + t2.storage_[_w]); + }, + /** + * @param {double} value used for x lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * x replaced with {x}. + */ + withX: function(t, x) { + return new float32x4(x, t.y, t.z, t.w); + }, + /** + * @param {double} value used for y lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * y replaced with {y}. + */ + withY: function(t, y) { + return new float32x4(t.x, y, t.z, t.w); + }, + /** + * @param {double} value used for z lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * z replaced with {z}. + */ + withZ: function(t, z) { + return new float32x4(t.x, t.y, z, t.w); + }, + /** + * @param {double} value used for w lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * w replaced with {w}. + */ + withW: function(t, w) { + return new float32x4(t.x, t.y, t.z, w); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t < other. + */ + lessThan: function(t, other) { + var cx = t.x < other.x; + var cy = t.y < other.y; + var cz = t.z < other.z; + var cw = t.w < other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t <= other. + */ + lessThanOrEqual: function(t, other) { + var cx = t.x <= other.x; + var cy = t.y <= other.y; + var cz = t.z <= other.z; + var cw = t.w <= other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t == other. + */ + equal: function(t, other) { + var cx = t.x == other.x; + var cy = t.y == other.y; + var cz = t.z == other.z; + var cw = t.w == other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t != other. + */ + notEqual: function(t, other) { + var cx = t.x != other.x; + var cy = t.y != other.y; + var cz = t.z != other.z; + var cw = t.w != other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t >= other. + */ + greaterThanOrEqual: function(t, other) { + var cx = t.x >= other.x; + var cy = t.y >= other.y; + var cz = t.z >= other.z; + var cw = t.w >= other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t > other. + */ + greaterThan: function(t, other) { + var cx = t.x > other.x; + var cy = t.y > other.y; + var cz = t.z > other.z; + var cw = t.w > other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @return {int32x4} a bit-wise copy of t as a int32x4. + */ + bitsToInt32x4: function(t) { + var alias = new Int32Array(t.storage_.buffer); + return new int32x4(alias[0], alias[1], alias[2], alias[3]); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @return {int32x4} with a integer to float conversion of t. + */ + toInt32x4: function(t) { + var a = new int32x4(t.storage_[0], t.storage_[1], t.storage_[2], + t.storage_[3]); + return a; + } }, - /** - * @param {double} value used for x lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * x replaced with {x}. - */ - withX: function(t, x) { - return new float32x4(x, t.y, t.z, t.w); - }, - /** - * @param {double} value used for y lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * y replaced with {y}. - */ - withY: function(t, y) { - return new float32x4(t.x, y, t.z, t.w); - }, - /** - * @param {double} value used for z lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * z replaced with {z}. - */ - withZ: function(t, z) { - return new float32x4(t.x, t.y, z, t.w); - }, - /** - * @param {double} value used for w lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * w replaced with {w}. - */ - withW: function(t, w) { - return new float32x4(t.x, t.y, t.z, w); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t < other. - */ - lessThan: function(t, other) { - var cx = t.x < other.x; - var cy = t.y < other.y; - var cz = t.z < other.z; - var cw = t.w < other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t <= other. - */ - lessThanOrEqual: function(t, other) { - var cx = t.x <= other.x; - var cy = t.y <= other.y; - var cz = t.z <= other.z; - var cw = t.w <= other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t == other. - */ - equal: function(t, other) { - var cx = t.x == other.x; - var cy = t.y == other.y; - var cz = t.z == other.z; - var cw = t.w == other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t != other. - */ - notEqual: function(t, other) { - var cx = t.x != other.x; - var cy = t.y != other.y; - var cz = t.z != other.z; - var cw = t.w != other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t >= other. - */ - greaterThanOrEqual: function(t, other) { - var cx = t.x >= other.x; - var cy = t.y >= other.y; - var cz = t.z >= other.z; - var cw = t.w >= other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t > other. - */ - greaterThan: function(t, other) { - var cx = t.x > other.x; - var cy = t.y > other.y; - var cz = t.z > other.z; - var cw = t.w > other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a & b. - */ - and: function(a, b) { - return new uint32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a | b. - */ - or: function(a, b) { - return new uint32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a ^ b. - */ - xor: function(a, b) { - return new uint32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of ~a - */ - negu32: function(t) { - return new uint32x4(~t.x, ~t.y, ~t.z, ~t.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a + b. - */ - addu32: function(a, b) { - return new uint32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a - b. - */ - subu32: function(a, b) { - return new uint32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a * b. - */ - mulu32: function(a, b) { - return new uint32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y), - Math.imul(a.z, b.z), Math.imul(a.w, b.w)); - }, - /** - * @param {float32x4} - */ - select: function(t, trueValue, falseValue) { - var tv = SIMD.float32x4BitsToUint32x4(trueValue); - var fv = SIMD.float32x4BitsToUint32x4(falseValue); - var tr = SIMD.and(t, tv); - var fr = SIMD.and(SIMD.negu32(t), fv); - return SIMD.uint32x4BitsToFloat32x4(SIMD.or(tr, fr)); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for x lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * x lane replaced with {x}. - */ - withXu32: function(t, x) { - return new uint32x4(x, t.y, t.z, t.w); - }, - /** - * param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for y lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * y lane replaced with {y}. - */ - withYu32: function(t, y) { - return new uint32x4(t.x, y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for z lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * z lane replaced with {z}. - */ - withZu32: function(t, z) { - return new uint32x4(t.x, t.y, z, t.w); - }, - /** - * @param {integer} 32-bit value used for w lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * w lane replaced with {w}. - */ - withWu32: function(t, w) { - return new uint32x4(t.x, t.y, t.z, w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} x flag used for x lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * x lane replaced with {x}. - */ - withFlagX: function(t, flagX) { - var x = flagX ? 0xFFFFFFFF : 0x0; - return new uint32x4(x, t.y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} y flag used for y lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * y lane replaced with {y}. - */ - withFlagY: function(t, flagY) { - var y = flagY ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} z flag used for z lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * z lane replaced with {z}. - */ - withFlagZ: function(t, flagZ) { - var z = flagZ ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, t.y, z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} w flag used for w lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * w lane replaced with {w}. - */ - withFlagW: function(t, flagW) { - var w = flagW ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, t.y, t.z, w); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @return {uint32x4} a bit-wise copy of t as a uint32x4. - */ - float32x4BitsToUint32x4: function(t) { - var alias = new Uint32Array(t.storage_.buffer); - return new uint32x4(alias[0], alias[1], alias[2], alias[3]); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {float32x4} a bit-wise copy of t as a float32x4. - */ - uint32x4BitsToFloat32x4: function(t) { - var alias = new Float32Array(t.storage_.buffer); - return new float32x4(alias[0], alias[1], alias[2], alias[3]); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {float32x4} with a float to integer conversion copy of t. - */ - uint32x4ToFloat32x4: function(t) { - var a = float32x4.zero(); - a.storage_[0] = t.storage_[0]; - a.storage_[1] = t.storage_[1]; - a.storage_[2] = t.storage_[2]; - a.storage_[3] = t.storage_[3]; - return a; - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @return {uint32x4} with a integer to float conversion of t. - */ - float32x4ToUint32x4: function(t) { - var a = new uint32x4(t.storage_[0], t.storage_[1], t.storage_[2], - t.storage_[3]); - return a; + int32x4: { + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a & b. + */ + and: function(a, b) { + return new int32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a | b. + */ + or: function(a, b) { + return new int32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a ^ b. + */ + xor: function(a, b) { + return new int32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of ~t + */ + not: function(t) { + return new int32x4(~t.x, ~t.y, ~t.z, ~t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of -t + */ + neg: function(t) { + return new int32x4(-t.x, -t.y, -t.z, -t.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a + b. + */ + add: function(a, b) { + return new int32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a - b. + */ + sub: function(a, b) { + return new int32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a * b. + */ + mul: function(a, b) { + return new int32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y), + Math.imul(a.z, b.z), Math.imul(a.w, b.w)); + }, + /** + * @param {int32x4} t An instance of float32x4 to be shuffled. + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {int32x4} New instance of float32x4 with lanes shuffled. + */ + shuffle: function(t, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new int32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], + t.storage_[_w]); + }, + /** + * @param {int32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result + * @param {int32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {int32x4} New instance of float32x4 with lanes shuffled. + */ + shuffleMix: function(t1, t2, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new int32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z], + t2.storage_[_w]); + }, + /** + * @param {float32x4} + */ + select: function(t, trueValue, falseValue) { + var tv = SIMD.float32x4.bitsToInt32x4(trueValue); + var fv = SIMD.float32x4.bitsToInt32x4(falseValue); + var tr = SIMD.int32x4.and(t, tv); + var fr = SIMD.int32x4.and(SIMD.int32x4.not(t), fv); + return SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(tr, fr)); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for x lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * x lane replaced with {x}. + */ + withX: function(t, x) { + return new int32x4(x, t.y, t.z, t.w); + }, + /** + * param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for y lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * y lane replaced with {y}. + */ + withY: function(t, y) { + return new int32x4(t.x, y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for z lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * z lane replaced with {z}. + */ + withZ: function(t, z) { + return new int32x4(t.x, t.y, z, t.w); + }, + /** + * @param {integer} 32-bit value used for w lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * w lane replaced with {w}. + */ + withW: function(t, w) { + return new int32x4(t.x, t.y, t.z, w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} x flag used for x lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * x lane replaced with {x}. + */ + withFlagX: function(t, flagX) { + var x = flagX ? 0xFFFFFFFF : 0x0; + return new int32x4(x, t.y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} y flag used for y lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * y lane replaced with {y}. + */ + withFlagY: function(t, flagY) { + var y = flagY ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} z flag used for z lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * z lane replaced with {z}. + */ + withFlagZ: function(t, flagZ) { + var z = flagZ ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, t.y, z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} w flag used for w lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * w lane replaced with {w}. + */ + withFlagW: function(t, flagW) { + var w = flagW ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, t.y, t.z, w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {float32x4} a bit-wise copy of t as a float32x4. + */ + bitsToFloat32x4: function(t) { + var alias = new Float32Array(t.storage_.buffer); + return new float32x4(alias[0], alias[1], alias[2], alias[3]); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {float32x4} with a float to integer conversion copy of t. + */ + toFloat32x4: function(t) { + var a = float32x4.zero(); + a.storage_[0] = t.storage_[0]; + a.storage_[1] = t.storage_[1]; + a.storage_[2] = t.storage_[2]; + a.storage_[3] = t.storage_[3]; + return a; + } } } })(); @@ -955,4 +1123,3 @@ Object.defineProperty(SIMD, 'WWWX', { get: function() { return 0x3F; } }); Object.defineProperty(SIMD, 'WWWY', { get: function() { return 0x7F; } }); Object.defineProperty(SIMD, 'WWWZ', { get: function() { return 0xBF; } }); Object.defineProperty(SIMD, 'WWWW', { get: function() { return 0xFF; } }); - diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h new file mode 100644 index 00000000..31265db8 --- /dev/null +++ b/system/include/emscripten/emmintrin.h @@ -0,0 +1,87 @@ +#include <xmmintrin.h> + +typedef int32x4 __m128i; + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set_epi32(int z, int y, int x, int w) +{ + return (__m128i){ w, x, y, z }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set1_epi32(int w) +{ + return (__m128i){ w, w, w, w }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_setzero_si128() +{ + return (__m128i){ 0, 0, 0, 0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_si128(__m128i *p, __m128i a) +{ + *p = a; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_and_si128(__m128i a, __m128i b) +{ + return a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_andnot_si128(__m128i a, __m128i b) +{ + return ~a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_or_si128(__m128i a, __m128i b) +{ + return a | b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_xor_si128(__m128i a, __m128i b) +{ + return a ^ b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_add_epi32(__m128i a, __m128i b) +{ + return a + b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_sub_epi32(__m128i a, __m128i b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_castsi128_ps(__m128i a) +{ + return emscripten_int32x4_bitsToFloat32x4(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cvtepi32_ps(__m128i a) +{ + return emscripten_int32x4_toFloat32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_castps_si128(__m128 a) +{ + return emscripten_float32x4_bitsToInt32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_cvtps_epi32(__m128 a) +{ + return emscripten_float32x4_toInt32x4(a); +}
\ No newline at end of file diff --git a/system/include/emscripten/vector.h b/system/include/emscripten/vector.h index 938f2369..cf26a5d6 100644 --- a/system/include/emscripten/vector.h +++ b/system/include/emscripten/vector.h @@ -2,7 +2,7 @@ // Support for the JS SIMD API proposal, https://github.com/johnmccutchan/ecmascript_simd typedef float float32x4 __attribute__((__vector_size__(16))); -typedef unsigned int uint32x4 __attribute__((__vector_size__(16))); +typedef unsigned int int32x4 __attribute__((__vector_size__(16))); #ifdef __cplusplus extern "C" { @@ -10,6 +10,24 @@ extern "C" { unsigned int emscripten_float32x4_signmask(float32x4 x); +float32x4 emscripten_float32x4_min(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_max(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_sqrt(float32x4 a); +float32x4 emscripten_float32x4_lessThan(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_lessThanOrEqual(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_equal(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_greaterThanOrEqual(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_greaterThan(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_and(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_andNot(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_or(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_xor(float32x4 a, float32x4 b); + +float32x4 emscripten_int32x4_bitsToFloat32x4(int32x4 a); +float32x4 emscripten_int32x4_toFloat32x4(int32x4 a); +int32x4 emscripten_float32x4_bitsToInt32x4(float32x4 a); +int32x4 emscripten_float32x4_toInt32x4(float32x4 a); + #ifdef __cplusplus } #endif diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h new file mode 100644 index 00000000..1b9108fa --- /dev/null +++ b/system/include/emscripten/xmmintrin.h @@ -0,0 +1,131 @@ +#include <vector.h> + +typedef float32x4 __m128; + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set_ps(float z, float y, float x, float w) +{ + return (__m128){ w, x, y, z }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set1_ps(float w) +{ + return (__m128){ w, w, w, w }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_setzero_ps(void) +{ + return (__m128){ 0.0, 0.0, 0.0, 0.0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_ps(float *p, __m128 a) +{ + *(__m128 *)p = a; +} + +static __inline__ int __attribute__((__always_inline__)) +_mm_movemask_ps(__m128 a) +{ + return emscripten_float32x4_signmask(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_add_ps(__m128 a, __m128 b) +{ + return a + b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sub_ps(__m128 a, __m128 b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_mul_ps(__m128 a, __m128 b) +{ + return a * b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_div_ps(__m128 a, __m128 b) +{ + return a / b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_min_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_min(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_max_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_max(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sqrt_ps(__m128 a) +{ + return emscripten_float32x4_sqrt(a); +} + +/* TODO: shuffles */ + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmplt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmple_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpeq_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_equal(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpge_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpgt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_and_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_and(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_andnot_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_andNot(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_or_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_or(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_xor_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_xor(a, b); +} diff --git a/tests/test_core.py b/tests/test_core.py index cbde794f..ec00c0a5 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8811,20 +8811,20 @@ int main(int argc, char **argv) { printf("zeros %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3]); } { - uint32x4 *a = (uint32x4*)&data[0]; - uint32x4 *b = (uint32x4*)&data[4]; - uint32x4 c, d, e, f; + int32x4 *a = (int32x4*)&data[0]; + int32x4 *b = (int32x4*)&data[4]; + int32x4 c, d, e, f; c = *a; d = *b; - printf("4uints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]); + printf("4ints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]); e = c+d; f = c-d; - printf("5uints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); + printf("5ints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); e = c&d; f = c|d; e = ~c&d; f = c^d; - printf("5uintops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); + printf("5intops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); } { float32x4 c, d, e, f; @@ -8842,9 +8842,9 @@ int main(int argc, char **argv) { 2floats! 48, 68, 92, 120 42, 56, 72, 90 3floats! 48, 68, 92, 120 2016, 3808, 6624, 10800 zeros 0, 0, 0, 0 -4uints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736 -5uints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056 -5uintops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096 +4ints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736 +5ints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056 +5intops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096 6floats! -9, 0, 4, 9 -2, -12, 14, 10 ''') @@ -8895,6 +8895,484 @@ zeros 0, 0, 0, 0 16.000000 ''') + def test_simd3(self): + if Settings.USE_TYPED_ARRAYS != 2: return self.skip('needs ta2') + if Settings.ASM_JS: Settings.ASM_JS = 2 # does not validate + src = r''' + #include <iostream> + #include <emmintrin.h> + #include <assert.h> + #include <stdint.h> + #include <bitset> + + using namespace std; + + void testSetPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + _mm_store_ps(ar, v); + assert(ar[0] == 4.0); + assert(ar[1] == 3.0); + assert(ar[2] == 2.0); + assert(ar[3] == 1.0); + } + + void testSet1Ps() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_set1_ps(5.5); + _mm_store_ps(ar, v); + assert(ar[0] == 5.5); + assert(ar[1] == 5.5); + assert(ar[2] == 5.5); + assert(ar[3] == 5.5); + } + + void testSetZeroPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_setzero_ps(); + _mm_store_ps(ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testSetEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_set_epi32(5, 7, 126, 381); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 381); + assert(ar[1] == 126); + assert(ar[2] == 7); + assert(ar[3] == 5); + v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x12345678); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xaaaaaaaa); + assert(ar[3] == 0x55555555); + } + + void testSet1Epi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_set1_epi32(-5); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == -5); + assert(ar[1] == -5); + assert(ar[2] == -5); + assert(ar[3] == -5); + } + + void testSetZeroSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_setzero_si128(); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testBitCasts() { + int32_t __attribute__((__aligned__(16))) ar1[4]; + float __attribute__((__aligned__(16))) ar2[4]; + __m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000); + __m128 v2 = _mm_castsi128_ps(v1); + _mm_store_ps(ar2, v2); + assert(ar2[0] == 4.0); + assert(ar2[1] == 3.0); + assert(ar2[2] == 2.0); + assert(ar2[3] == 1.0); + v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0); + v1 = _mm_castps_si128(v2); + _mm_store_si128((__m128i *)ar1, v1); + assert(ar1[0] == 0x41000000); + assert(ar1[1] == 0x40e00000); + assert(ar1[2] == 0x40c00000); + assert(ar1[3] == 0x40a00000); + float w = 0; + float z = -278.3; + float y = 5.2; + float x = -987654321; + v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x)); + _mm_store_ps(ar2, _mm_castsi128_ps(v1)); + assert(ar2[0] == x); + assert(ar2[1] == y); + assert(ar2[2] == z); + assert(ar2[3] == w); + /* + std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned long*>(&(ar2[0]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned long*>(&(ar2[1]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned long*>(&(ar2[2]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned long*>(&(ar2[3]))); + std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned long*>(&x)); + std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned long*>(&y)); + std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned long*>(&z)); + std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned long*>(&w)); + assert(bits1x == bits2x); + assert(bits1y == bits2y); + assert(bits1z == bits2z); + assert(bits1w == bits2w); + */ + v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa)); + _mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2)); + assert(ar1[0] == 0xaaaaaaaa); + assert(ar1[1] == 0x5555cccc); + assert(ar1[2] == 0); + assert(ar1[3] == 0xffffffff); + } + + void testConversions() { + int32_t __attribute__((__aligned__(16))) ar1[4]; + float __attribute__((__aligned__(16))) ar2[4]; + __m128i v1 = _mm_set_epi32(0, -3, -517, 256); + __m128 v2 = _mm_cvtepi32_ps(v1); + _mm_store_ps(ar2, v2); + assert(ar2[0] == 256.0); + assert(ar2[1] == -517.0); + assert(ar2[2] == -3.0); + assert(ar2[3] == 0); + v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0); + v1 = _mm_cvtps_epi32(v2); + _mm_store_si128((__m128i *)ar1, v1); + assert(ar1[0] == -8); + assert(ar1[1] == 7); + assert(ar1[2] == 6); + assert(ar1[3] == 5); + } + + void testMoveMaskPs() { + __m128 v = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + int mask = _mm_movemask_ps(v); + assert(mask == 13); + } + + void testAddPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_add_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 41.0); + assert(ar[1] == 32.0); + assert(ar[2] == 23.0); + assert(ar[3] == 14.0); + } + + void testSubPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_sub_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == -39.0); + assert(ar[1] == -28.0); + assert(ar[2] == -17.0); + assert(ar[3] == -6.0); + } + + void testMulPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_mul_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 40.0); + assert(ar[1] == 60.0); + assert(ar[2] == 60.0); + assert(ar[3] == 40.0); + } + + void testDivPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0); + __m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5); + __m128 v = _mm_div_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 2.0); + assert(ar[1] == 8.0); + assert(ar[2] == 3.0); + assert(ar[3] == 2.0); + } + + void testMinPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5); + __m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0); + __m128 v = _mm_min_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 0.0); + assert(ar[1] == 30.0); + assert(ar[2] == 1.0); + assert(ar[3] == -20.0); + } + + void testMaxPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5); + __m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0); + __m128 v = _mm_max_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 1.0); + assert(ar[1] == 55.0); + assert(ar[2] == 10.0); + assert(ar[3] == 2.5); + } + + void testSqrtPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0); + __m128 v = _mm_sqrt_ps(v1); + _mm_store_ps(ar, v); + assert(ar[0] == 1.0); + assert(ar[1] == 2.0); + assert(ar[2] == 3.0); + assert(ar[3] == 4.0); + } + + void testCmpLtPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmplt_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0xffffffff); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0xffffffff); + assert(_mm_movemask_ps(v) == 9); + } + + void testCmpLePs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmple_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0xffffffff); + assert(ar[1] == 0); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + assert(_mm_movemask_ps(v) == 13); + } + + void testCmpEqPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpeq_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 4); + } + + void testCmpGePs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpge_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 6); + } + + void testCmpGtPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpgt_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 2); + } + + void testAndPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(425, -501, -32, 68); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + __m128 v = _mm_and_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 68); + assert(ar[1] == 0); + assert(ar[2] == -501); + assert(ar[3] == 425); + int32_t __attribute__((__aligned__(16))) ar2[4]; + v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa)); + v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + v = _mm_and_ps(v1, v2); + _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v)); + assert(ar2[0] == 0); + assert(ar2[1] == 0); + assert(ar2[2] == 0); + assert(ar2[3] == 0); + } + + void testAndNotPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(425, -501, -32, 68); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + __m128 v = _mm_andnot_ps(v2, v1); + _mm_store_ps(ar, v); + assert(ar[0] == 0); + assert(ar[1] == -32); + assert(ar[2] == 0); + assert(ar[3] == 0); + int32_t __attribute__((__aligned__(16))) ar2[4]; + v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa)); + v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + v = _mm_andnot_ps(v1, v2); + _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v)); + assert(ar2[0] == 0x55555555); + assert(ar2[1] == 0x55555555); + assert(ar2[2] == 0x55555555); + assert(ar2[3] == 0x55555555); + } + + void testOrPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0)); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + __m128 v = _mm_or_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testXorPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0)); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + __m128 v = _mm_xor_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xaaaaaaaa); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testAndSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_and_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testAndNotSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_andnot_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0x55555555); + assert(ar[2] == 0x55555555); + assert(ar[3] == 0x55555555); + } + + void testOrSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_or_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testXorSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_xor_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xaaaaaaaa); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testAddEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(4, 3, 2, 1); + __m128i v2 = _mm_set_epi32(10, 20, 30, 40); + __m128i v = _mm_add_epi32(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 41); + assert(ar[1] == 32); + assert(ar[2] == 23); + assert(ar[3] == 14); + } + + void testSubEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(4, 3, 2, 1); + __m128i v2 = _mm_set_epi32(10, 20, 30, 40); + __m128i v = _mm_sub_epi32(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == -39); + assert(ar[1] == -28); + assert(ar[2] == -17); + assert(ar[3] == -6); + } + + int main(int argc, char ** argv) { + testSetPs(); + testSet1Ps(); + testSetZeroPs(); + testSetEpi32(); + testSet1Epi32(); + testSetZeroSi128(); + testBitCasts(); + testConversions(); + testMoveMaskPs(); + testAddPs(); + testSubPs(); + testMulPs(); + testDivPs(); + testMaxPs(); + testMinPs(); + testSqrtPs(); + testCmpLtPs(); + testCmpLePs(); + testCmpEqPs(); + testCmpGePs(); + testCmpGtPs(); + testAndPs(); + testAndNotPs(); + testOrPs(); + testXorPs(); + testAndSi128(); + testAndNotSi128(); + testOrSi128(); + testXorSi128(); + testAddEpi32(); + testSubEpi32(); + printf("DONE"); + return 0; + } + ''' + + self.do_run(src, 'DONE') + + def test_gcc_unmangler(self): Settings.NAMED_GLOBALS = 1 # test coverage for this |