diff options
author | Heidi Pan <heidi.pan@intel.com> | 2013-11-04 19:02:20 -0800 |
---|---|---|
committer | Heidi Pan <heidi.pan@intel.com> | 2013-11-26 14:59:02 -0800 |
commit | adeaad098f3e61901ce01bde5c2a12f0649123a5 (patch) | |
tree | b46d7f96043cf4d8bfdd0bb2b2a31cd2077e293a /system | |
parent | 05de00315488a0a17984a472e6e6bbc4000a1e76 (diff) |
getting some initial set of x86 intrinsics mapped to JS SIMD ops; update existing SIMD code to use updated JS SIMD API (SIMD.type.op instead of SIMD.op & SIMD.opu32; int32x4 instead of uint32x4); bug fix: right type of withX() for int32x4 initialization; bug fix: splat(0) instead of zero() for int32x4 initialization
Diffstat (limited to 'system')
-rw-r--r-- | system/include/emscripten/emmintrin.h | 87 | ||||
-rw-r--r-- | system/include/emscripten/xmmintrin.h | 131 |
2 files changed, 218 insertions, 0 deletions
diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h new file mode 100644 index 00000000..31265db8 --- /dev/null +++ b/system/include/emscripten/emmintrin.h @@ -0,0 +1,87 @@ +#include <xmmintrin.h> + +typedef int32x4 __m128i; + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set_epi32(int z, int y, int x, int w) +{ + return (__m128i){ w, x, y, z }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set1_epi32(int w) +{ + return (__m128i){ w, w, w, w }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_setzero_si128() +{ + return (__m128i){ 0, 0, 0, 0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_si128(__m128i *p, __m128i a) +{ + *p = a; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_and_si128(__m128i a, __m128i b) +{ + return a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_andnot_si128(__m128i a, __m128i b) +{ + return ~a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_or_si128(__m128i a, __m128i b) +{ + return a | b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_xor_si128(__m128i a, __m128i b) +{ + return a ^ b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_add_epi32(__m128i a, __m128i b) +{ + return a + b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_sub_epi32(__m128i a, __m128i b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_castsi128_ps(__m128i a) +{ + return emscripten_int32x4_bitsToFloat32x4(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cvtepi32_ps(__m128i a) +{ + return emscripten_int32x4_toFloat32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_castps_si128(__m128 a) +{ + return emscripten_float32x4_bitsToInt32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_cvtps_epi32(__m128 a) +{ + return emscripten_float32x4_toInt32x4(a); +}
\ No newline at end of file diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h new file mode 100644 index 00000000..1b9108fa --- /dev/null +++ b/system/include/emscripten/xmmintrin.h @@ -0,0 +1,131 @@ +#include <vector.h> + +typedef float32x4 __m128; + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set_ps(float z, float y, float x, float w) +{ + return (__m128){ w, x, y, z }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set1_ps(float w) +{ + return (__m128){ w, w, w, w }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_setzero_ps(void) +{ + return (__m128){ 0.0, 0.0, 0.0, 0.0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_ps(float *p, __m128 a) +{ + *(__m128 *)p = a; +} + +static __inline__ int __attribute__((__always_inline__)) +_mm_movemask_ps(__m128 a) +{ + return emscripten_float32x4_signmask(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_add_ps(__m128 a, __m128 b) +{ + return a + b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sub_ps(__m128 a, __m128 b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_mul_ps(__m128 a, __m128 b) +{ + return a * b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_div_ps(__m128 a, __m128 b) +{ + return a / b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_min_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_min(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_max_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_max(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sqrt_ps(__m128 a) +{ + return emscripten_float32x4_sqrt(a); +} + +/* TODO: shuffles */ + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmplt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmple_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpeq_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_equal(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpge_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpgt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_and_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_and(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_andnot_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_andNot(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_or_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_or(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_xor_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_xor(a, b); +} |