aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Headers/avx2intrin.h6
-rw-r--r--lib/Headers/avxintrin.h26
2 files changed, 28 insertions, 4 deletions
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index cc3ffe69b3..359c0aad3c 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -837,7 +837,11 @@ _mm256_permutevar8x32_ps(__m256 a, __m256 b)
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
+ __builtin_shufflevector(__V1, __V2, \
+ ((M) & 0x3) * 2, \
+ ((M) & 0x3) * 2 + 1, \
+ (((M) & 0x30) >> 4) * 2, \
+ (((M) & 0x30) >> 4) * 2 + 1); })
#define _mm256_extracti128_si256(A, O) __extension__ ({ \
__m256i __A = (A); \
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index 0758a653bf..d8ecd1d413 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -289,17 +289,37 @@ _mm256_permutevar_ps(__m256 a, __m256i c)
#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
__m256d __V1 = (V1); \
__m256d __V2 = (V2); \
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
+ (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
+ ((M) & 0x3) * 2, \
+ ((M) & 0x3) * 2 + 1, \
+ (((M) & 0x30) >> 4) * 2, \
+ (((M) & 0x30) >> 4) * 2 + 1); })
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
__m256 __V1 = (V1); \
__m256 __V2 = (V2); \
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+ (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
+ ((M) & 0x3) * 4, \
+ ((M) & 0x3) * 4 + 1, \
+ ((M) & 0x3) * 4 + 2, \
+ ((M) & 0x3) * 4 + 3, \
+ (((M) & 0x30) >> 4) * 4, \
+ (((M) & 0x30) >> 4) * 4 + 1, \
+ (((M) & 0x30) >> 4) * 4 + 2, \
+ (((M) & 0x30) >> 4) * 4 + 3); })
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
__m256i __V1 = (V1); \
__m256i __V2 = (V2); \
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
+ (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
+ ((M) & 0x3) * 4, \
+ ((M) & 0x3) * 4 + 1, \
+ ((M) & 0x3) * 4 + 2, \
+ ((M) & 0x3) * 4 + 3, \
+ (((M) & 0x30) >> 4) * 4, \
+ (((M) & 0x30) >> 4) * 4 + 1, \
+ (((M) & 0x30) >> 4) * 4 + 2, \
+ (((M) & 0x30) >> 4) * 4 + 3); })
/* Vector Blend */
#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \