aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorVasilis Kalintiris <ehostunreach@gmail.com>2013-12-07 17:23:44 +0200
committerVasilis Kalintiris <ehostunreach@gmail.com>2013-12-07 19:36:02 +0200
commit67356a7e2bfb3a039edab075a84d921569e8361b (patch)
tree7ef278485022309eb5fd30f90c8281cbb4d252e3 /tests
parent6f051b2e20bafefe9b5c9c0a6811fb2ad54f768f (diff)
Use do_run_from_file() for test_simd3
Diffstat (limited to 'tests')
-rw-r--r--tests/core/test_simd3.in471
-rw-r--r--tests/core/test_simd3.out1
-rw-r--r--tests/test_core.py475
3 files changed, 475 insertions, 472 deletions
diff --git a/tests/core/test_simd3.in b/tests/core/test_simd3.in
new file mode 100644
index 00000000..462370e7
--- /dev/null
+++ b/tests/core/test_simd3.in
@@ -0,0 +1,471 @@
+
+ #include <iostream>
+ #include <emmintrin.h>
+ #include <assert.h>
+ #include <stdint.h>
+ #include <bitset>
+
+ using namespace std;
+
+ void testSetPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 4.0);
+ assert(ar[1] == 3.0);
+ assert(ar[2] == 2.0);
+ assert(ar[3] == 1.0);
+ }
+
+ void testSet1Ps() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_set1_ps(5.5);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 5.5);
+ assert(ar[1] == 5.5);
+ assert(ar[2] == 5.5);
+ assert(ar[3] == 5.5);
+ }
+
+ void testSetZeroPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v = _mm_setzero_ps();
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testSetEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_set_epi32(5, 7, 126, 381);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 381);
+ assert(ar[1] == 126);
+ assert(ar[2] == 7);
+ assert(ar[3] == 5);
+ v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x12345678);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xaaaaaaaa);
+ assert(ar[3] == 0x55555555);
+ }
+
+ void testSet1Epi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_set1_epi32(-5);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == -5);
+ assert(ar[1] == -5);
+ assert(ar[2] == -5);
+ assert(ar[3] == -5);
+ }
+
+ void testSetZeroSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v = _mm_setzero_si128();
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testBitCasts() {
+ int32_t __attribute__((__aligned__(16))) ar1[4];
+ float __attribute__((__aligned__(16))) ar2[4];
+ __m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000);
+ __m128 v2 = _mm_castsi128_ps(v1);
+ _mm_store_ps(ar2, v2);
+ assert(ar2[0] == 4.0);
+ assert(ar2[1] == 3.0);
+ assert(ar2[2] == 2.0);
+ assert(ar2[3] == 1.0);
+ v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0);
+ v1 = _mm_castps_si128(v2);
+ _mm_store_si128((__m128i *)ar1, v1);
+ assert(ar1[0] == 0x41000000);
+ assert(ar1[1] == 0x40e00000);
+ assert(ar1[2] == 0x40c00000);
+ assert(ar1[3] == 0x40a00000);
+ float w = 0;
+ float z = -278.3;
+ float y = 5.2;
+ float x = -987654321;
+ v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x));
+ _mm_store_ps(ar2, _mm_castsi128_ps(v1));
+ assert(ar2[0] == x);
+ assert(ar2[1] == y);
+ assert(ar2[2] == z);
+ assert(ar2[3] == w);
+ /*
+ std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned long*>(&(ar2[0])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned long*>(&(ar2[1])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned long*>(&(ar2[2])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned long*>(&(ar2[3])));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned long*>(&x));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned long*>(&y));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned long*>(&z));
+ std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned long*>(&w));
+ assert(bits1x == bits2x);
+ assert(bits1y == bits2y);
+ assert(bits1z == bits2z);
+ assert(bits1w == bits2w);
+ */
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa));
+ _mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2));
+ assert(ar1[0] == 0xaaaaaaaa);
+ assert(ar1[1] == 0x5555cccc);
+ assert(ar1[2] == 0);
+ assert(ar1[3] == 0xffffffff);
+ }
+
+ void testConversions() {
+ int32_t __attribute__((__aligned__(16))) ar1[4];
+ float __attribute__((__aligned__(16))) ar2[4];
+ __m128i v1 = _mm_set_epi32(0, -3, -517, 256);
+ __m128 v2 = _mm_cvtepi32_ps(v1);
+ _mm_store_ps(ar2, v2);
+ assert(ar2[0] == 256.0);
+ assert(ar2[1] == -517.0);
+ assert(ar2[2] == -3.0);
+ assert(ar2[3] == 0);
+ v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0);
+ v1 = _mm_cvtps_epi32(v2);
+ _mm_store_si128((__m128i *)ar1, v1);
+ assert(ar1[0] == -8);
+ assert(ar1[1] == 7);
+ assert(ar1[2] == 6);
+ assert(ar1[3] == 5);
+ }
+
+ void testMoveMaskPs() {
+ __m128 v = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ int mask = _mm_movemask_ps(v);
+ assert(mask == 13);
+ }
+
+ void testAddPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_add_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 41.0);
+ assert(ar[1] == 32.0);
+ assert(ar[2] == 23.0);
+ assert(ar[3] == 14.0);
+ }
+
+ void testSubPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_sub_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == -39.0);
+ assert(ar[1] == -28.0);
+ assert(ar[2] == -17.0);
+ assert(ar[3] == -6.0);
+ }
+
+ void testMulPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
+ __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
+ __m128 v = _mm_mul_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 40.0);
+ assert(ar[1] == 60.0);
+ assert(ar[2] == 60.0);
+ assert(ar[3] == 40.0);
+ }
+
+ void testDivPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0);
+ __m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5);
+ __m128 v = _mm_div_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 2.0);
+ assert(ar[1] == 8.0);
+ assert(ar[2] == 3.0);
+ assert(ar[3] == 2.0);
+ }
+
+ void testMinPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
+ __m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0);
+ __m128 v = _mm_min_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0.0);
+ assert(ar[1] == 30.0);
+ assert(ar[2] == 1.0);
+ assert(ar[3] == -20.0);
+ }
+
+ void testMaxPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
+ __m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0);
+ __m128 v = _mm_max_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 1.0);
+ assert(ar[1] == 55.0);
+ assert(ar[2] == 10.0);
+ assert(ar[3] == 2.5);
+ }
+
+ void testSqrtPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0);
+ __m128 v = _mm_sqrt_ps(v1);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 1.0);
+ assert(ar[1] == 2.0);
+ assert(ar[2] == 3.0);
+ assert(ar[3] == 4.0);
+ }
+
+ void testCmpLtPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmplt_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0xffffffff);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0xffffffff);
+ assert(_mm_movemask_ps(v) == 9);
+ }
+
+ void testCmpLePs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmple_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0xffffffff);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ assert(_mm_movemask_ps(v) == 13);
+ }
+
+ void testCmpEqPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpeq_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 4);
+ }
+
+ void testCmpGePs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpge_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 6);
+ }
+
+ void testCmpGtPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
+ __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
+ __m128 v = _mm_cmpgt_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ assert(_mm_movemask_ps(v) == 2);
+ }
+
+ void testAndPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(425, -501, -32, 68);
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ __m128 v = _mm_and_ps(v1, v2);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 68);
+ assert(ar[1] == 0);
+ assert(ar[2] == -501);
+ assert(ar[3] == 425);
+ int32_t __attribute__((__aligned__(16))) ar2[4];
+ v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ v = _mm_and_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
+ assert(ar2[0] == 0);
+ assert(ar2[1] == 0);
+ assert(ar2[2] == 0);
+ assert(ar2[3] == 0);
+ }
+
+ void testAndNotPs() {
+ float __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_set_ps(425, -501, -32, 68);
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
+ __m128 v = _mm_andnot_ps(v2, v1);
+ _mm_store_ps(ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == -32);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ int32_t __attribute__((__aligned__(16))) ar2[4];
+ v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
+ v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ v = _mm_andnot_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
+ assert(ar2[0] == 0x55555555);
+ assert(ar2[1] == 0x55555555);
+ assert(ar2[2] == 0x55555555);
+ assert(ar2[3] == 0x55555555);
+ }
+
+ void testOrPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ __m128 v = _mm_or_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testXorPs() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
+ __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
+ __m128 v = _mm_xor_ps(v1, v2);
+ _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xaaaaaaaa);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testAndSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_and_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0);
+ assert(ar[1] == 0);
+ assert(ar[2] == 0);
+ assert(ar[3] == 0);
+ }
+
+ void testAndNotSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_andnot_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0x55555555);
+ assert(ar[2] == 0x55555555);
+ assert(ar[3] == 0x55555555);
+ }
+
+ void testOrSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_or_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xffffffff);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testXorSi128() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
+ __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
+ __m128i v = _mm_xor_si128(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 0x55555555);
+ assert(ar[1] == 0xaaaaaaaa);
+ assert(ar[2] == 0xffffffff);
+ assert(ar[3] == 0xffffffff);
+ }
+
+ void testAddEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
+ __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
+ __m128i v = _mm_add_epi32(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == 41);
+ assert(ar[1] == 32);
+ assert(ar[2] == 23);
+ assert(ar[3] == 14);
+ }
+
+ void testSubEpi32() {
+ int32_t __attribute__((__aligned__(16))) ar[4];
+ __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
+ __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
+ __m128i v = _mm_sub_epi32(v1, v2);
+ _mm_store_si128((__m128i *)ar, v);
+ assert(ar[0] == -39);
+ assert(ar[1] == -28);
+ assert(ar[2] == -17);
+ assert(ar[3] == -6);
+ }
+
+ int main(int argc, char ** argv) {
+ testSetPs();
+ testSet1Ps();
+ testSetZeroPs();
+ testSetEpi32();
+ testSet1Epi32();
+ testSetZeroSi128();
+ testBitCasts();
+ testConversions();
+ testMoveMaskPs();
+ testAddPs();
+ testSubPs();
+ testMulPs();
+ testDivPs();
+ testMaxPs();
+ testMinPs();
+ testSqrtPs();
+ testCmpLtPs();
+ testCmpLePs();
+ testCmpEqPs();
+ testCmpGePs();
+ testCmpGtPs();
+ testAndPs();
+ testAndNotPs();
+ testOrPs();
+ testXorPs();
+ testAndSi128();
+ testAndNotSi128();
+ testOrSi128();
+ testXorSi128();
+ testAddEpi32();
+ testSubEpi32();
+ printf("DONE");
+ return 0;
+ }
+ \ No newline at end of file
diff --git a/tests/core/test_simd3.out b/tests/core/test_simd3.out
new file mode 100644
index 00000000..09626497
--- /dev/null
+++ b/tests/core/test_simd3.out
@@ -0,0 +1 @@
+DONE \ No newline at end of file
diff --git a/tests/test_core.py b/tests/test_core.py
index 9b8cce5a..c220541d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4565,480 +4565,11 @@ return malloc(size);
def test_simd3(self):
if Settings.USE_TYPED_ARRAYS != 2: return self.skip('needs ta2')
if Settings.ASM_JS: Settings.ASM_JS = 2 # does not validate
- src = r'''
- #include <iostream>
- #include <emmintrin.h>
- #include <assert.h>
- #include <stdint.h>
- #include <bitset>
-
- using namespace std;
-
- void testSetPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
- _mm_store_ps(ar, v);
- assert(ar[0] == 4.0);
- assert(ar[1] == 3.0);
- assert(ar[2] == 2.0);
- assert(ar[3] == 1.0);
- }
-
- void testSet1Ps() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v = _mm_set1_ps(5.5);
- _mm_store_ps(ar, v);
- assert(ar[0] == 5.5);
- assert(ar[1] == 5.5);
- assert(ar[2] == 5.5);
- assert(ar[3] == 5.5);
- }
-
- void testSetZeroPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v = _mm_setzero_ps();
- _mm_store_ps(ar, v);
- assert(ar[0] == 0);
- assert(ar[1] == 0);
- assert(ar[2] == 0);
- assert(ar[3] == 0);
- }
-
- void testSetEpi32() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v = _mm_set_epi32(5, 7, 126, 381);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 381);
- assert(ar[1] == 126);
- assert(ar[2] == 7);
- assert(ar[3] == 5);
- v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0x12345678);
- assert(ar[1] == 0xffffffff);
- assert(ar[2] == 0xaaaaaaaa);
- assert(ar[3] == 0x55555555);
- }
-
- void testSet1Epi32() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v = _mm_set1_epi32(-5);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == -5);
- assert(ar[1] == -5);
- assert(ar[2] == -5);
- assert(ar[3] == -5);
- }
-
- void testSetZeroSi128() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v = _mm_setzero_si128();
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0);
- assert(ar[1] == 0);
- assert(ar[2] == 0);
- assert(ar[3] == 0);
- }
-
- void testBitCasts() {
- int32_t __attribute__((__aligned__(16))) ar1[4];
- float __attribute__((__aligned__(16))) ar2[4];
- __m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000);
- __m128 v2 = _mm_castsi128_ps(v1);
- _mm_store_ps(ar2, v2);
- assert(ar2[0] == 4.0);
- assert(ar2[1] == 3.0);
- assert(ar2[2] == 2.0);
- assert(ar2[3] == 1.0);
- v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0);
- v1 = _mm_castps_si128(v2);
- _mm_store_si128((__m128i *)ar1, v1);
- assert(ar1[0] == 0x41000000);
- assert(ar1[1] == 0x40e00000);
- assert(ar1[2] == 0x40c00000);
- assert(ar1[3] == 0x40a00000);
- float w = 0;
- float z = -278.3;
- float y = 5.2;
- float x = -987654321;
- v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x));
- _mm_store_ps(ar2, _mm_castsi128_ps(v1));
- assert(ar2[0] == x);
- assert(ar2[1] == y);
- assert(ar2[2] == z);
- assert(ar2[3] == w);
- /*
- std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned long*>(&(ar2[0])));
- std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned long*>(&(ar2[1])));
- std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned long*>(&(ar2[2])));
- std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned long*>(&(ar2[3])));
- std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned long*>(&x));
- std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned long*>(&y));
- std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned long*>(&z));
- std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned long*>(&w));
- assert(bits1x == bits2x);
- assert(bits1y == bits2y);
- assert(bits1z == bits2z);
- assert(bits1w == bits2w);
- */
- v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa));
- _mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2));
- assert(ar1[0] == 0xaaaaaaaa);
- assert(ar1[1] == 0x5555cccc);
- assert(ar1[2] == 0);
- assert(ar1[3] == 0xffffffff);
- }
-
- void testConversions() {
- int32_t __attribute__((__aligned__(16))) ar1[4];
- float __attribute__((__aligned__(16))) ar2[4];
- __m128i v1 = _mm_set_epi32(0, -3, -517, 256);
- __m128 v2 = _mm_cvtepi32_ps(v1);
- _mm_store_ps(ar2, v2);
- assert(ar2[0] == 256.0);
- assert(ar2[1] == -517.0);
- assert(ar2[2] == -3.0);
- assert(ar2[3] == 0);
- v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0);
- v1 = _mm_cvtps_epi32(v2);
- _mm_store_si128((__m128i *)ar1, v1);
- assert(ar1[0] == -8);
- assert(ar1[1] == 7);
- assert(ar1[2] == 6);
- assert(ar1[3] == 5);
- }
-
- void testMoveMaskPs() {
- __m128 v = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
- int mask = _mm_movemask_ps(v);
- assert(mask == 13);
- }
-
- void testAddPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
- __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
- __m128 v = _mm_add_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 41.0);
- assert(ar[1] == 32.0);
- assert(ar[2] == 23.0);
- assert(ar[3] == 14.0);
- }
-
- void testSubPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
- __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
- __m128 v = _mm_sub_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == -39.0);
- assert(ar[1] == -28.0);
- assert(ar[2] == -17.0);
- assert(ar[3] == -6.0);
- }
-
- void testMulPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
- __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
- __m128 v = _mm_mul_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 40.0);
- assert(ar[1] == 60.0);
- assert(ar[2] == 60.0);
- assert(ar[3] == 40.0);
- }
-
- void testDivPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0);
- __m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5);
- __m128 v = _mm_div_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 2.0);
- assert(ar[1] == 8.0);
- assert(ar[2] == 3.0);
- assert(ar[3] == 2.0);
- }
-
- void testMinPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
- __m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0);
- __m128 v = _mm_min_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 0.0);
- assert(ar[1] == 30.0);
- assert(ar[2] == 1.0);
- assert(ar[3] == -20.0);
- }
-
- void testMaxPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
- __m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0);
- __m128 v = _mm_max_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 1.0);
- assert(ar[1] == 55.0);
- assert(ar[2] == 10.0);
- assert(ar[3] == 2.5);
- }
-
- void testSqrtPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0);
- __m128 v = _mm_sqrt_ps(v1);
- _mm_store_ps(ar, v);
- assert(ar[0] == 1.0);
- assert(ar[1] == 2.0);
- assert(ar[2] == 3.0);
- assert(ar[3] == 4.0);
- }
-
- void testCmpLtPs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
- __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
- __m128 v = _mm_cmplt_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0xffffffff);
- assert(ar[1] == 0);
- assert(ar[2] == 0);
- assert(ar[3] == 0xffffffff);
- assert(_mm_movemask_ps(v) == 9);
- }
-
- void testCmpLePs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
- __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
- __m128 v = _mm_cmple_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0xffffffff);
- assert(ar[1] == 0);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0xffffffff);
- assert(_mm_movemask_ps(v) == 13);
- }
-
- void testCmpEqPs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
- __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
- __m128 v = _mm_cmpeq_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0);
- assert(ar[1] == 0);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0);
- assert(_mm_movemask_ps(v) == 4);
- }
-
- void testCmpGePs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
- __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
- __m128 v = _mm_cmpge_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0);
- assert(ar[1] == 0xffffffff);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0);
- assert(_mm_movemask_ps(v) == 6);
- }
-
- void testCmpGtPs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
- __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
- __m128 v = _mm_cmpgt_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0);
- assert(ar[1] == 0xffffffff);
- assert(ar[2] == 0);
- assert(ar[3] == 0);
- assert(_mm_movemask_ps(v) == 2);
- }
-
- void testAndPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(425, -501, -32, 68);
- __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
- __m128 v = _mm_and_ps(v1, v2);
- _mm_store_ps(ar, v);
- assert(ar[0] == 68);
- assert(ar[1] == 0);
- assert(ar[2] == -501);
- assert(ar[3] == 425);
- int32_t __attribute__((__aligned__(16))) ar2[4];
- v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
- v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
- v = _mm_and_ps(v1, v2);
- _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
- assert(ar2[0] == 0);
- assert(ar2[1] == 0);
- assert(ar2[2] == 0);
- assert(ar2[3] == 0);
- }
-
- void testAndNotPs() {
- float __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_set_ps(425, -501, -32, 68);
- __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
- __m128 v = _mm_andnot_ps(v2, v1);
- _mm_store_ps(ar, v);
- assert(ar[0] == 0);
- assert(ar[1] == -32);
- assert(ar[2] == 0);
- assert(ar[3] == 0);
- int32_t __attribute__((__aligned__(16))) ar2[4];
- v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
- v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
- v = _mm_andnot_ps(v1, v2);
- _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
- assert(ar2[0] == 0x55555555);
- assert(ar2[1] == 0x55555555);
- assert(ar2[2] == 0x55555555);
- assert(ar2[3] == 0x55555555);
- }
-
- void testOrPs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
- __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
- __m128 v = _mm_or_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0x55555555);
- assert(ar[1] == 0xffffffff);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0xffffffff);
- }
-
- void testXorPs() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
- __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
- __m128 v = _mm_xor_ps(v1, v2);
- _mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
- assert(ar[0] == 0x55555555);
- assert(ar[1] == 0xaaaaaaaa);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0xffffffff);
- }
-
- void testAndSi128() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
- __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
- __m128i v = _mm_and_si128(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0);
- assert(ar[1] == 0);
- assert(ar[2] == 0);
- assert(ar[3] == 0);
- }
-
- void testAndNotSi128() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
- __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
- __m128i v = _mm_andnot_si128(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0x55555555);
- assert(ar[1] == 0x55555555);
- assert(ar[2] == 0x55555555);
- assert(ar[3] == 0x55555555);
- }
-
- void testOrSi128() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
- __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
- __m128i v = _mm_or_si128(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0x55555555);
- assert(ar[1] == 0xffffffff);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0xffffffff);
- }
-
- void testXorSi128() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
- __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
- __m128i v = _mm_xor_si128(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 0x55555555);
- assert(ar[1] == 0xaaaaaaaa);
- assert(ar[2] == 0xffffffff);
- assert(ar[3] == 0xffffffff);
- }
-
- void testAddEpi32() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
- __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
- __m128i v = _mm_add_epi32(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == 41);
- assert(ar[1] == 32);
- assert(ar[2] == 23);
- assert(ar[3] == 14);
- }
-
- void testSubEpi32() {
- int32_t __attribute__((__aligned__(16))) ar[4];
- __m128i v1 = _mm_set_epi32(4, 3, 2, 1);
- __m128i v2 = _mm_set_epi32(10, 20, 30, 40);
- __m128i v = _mm_sub_epi32(v1, v2);
- _mm_store_si128((__m128i *)ar, v);
- assert(ar[0] == -39);
- assert(ar[1] == -28);
- assert(ar[2] == -17);
- assert(ar[3] == -6);
- }
-
- int main(int argc, char ** argv) {
- testSetPs();
- testSet1Ps();
- testSetZeroPs();
- testSetEpi32();
- testSet1Epi32();
- testSetZeroSi128();
- testBitCasts();
- testConversions();
- testMoveMaskPs();
- testAddPs();
- testSubPs();
- testMulPs();
- testDivPs();
- testMaxPs();
- testMinPs();
- testSqrtPs();
- testCmpLtPs();
- testCmpLePs();
- testCmpEqPs();
- testCmpGePs();
- testCmpGtPs();
- testAndPs();
- testAndNotPs();
- testOrPs();
- testXorPs();
- testAndSi128();
- testAndNotSi128();
- testOrSi128();
- testXorSi128();
- testAddEpi32();
- testSubEpi32();
- printf("DONE");
- return 0;
- }
- '''
- self.do_run(src, 'DONE')
+ test_path = path_from_root('tests', 'core', 'test_simd3')
+ src, output = (test_path + s for s in ('.in', '.out'))
+ self.do_run_from_file(src, output)
def test_gcc_unmangler(self):
Settings.NAMED_GLOBALS = 1 # test coverage for this