aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-12-20 09:55:26 +0000
committerCraig Topper <craig.topper@gmail.com>2011-12-20 09:55:26 +0000
commit4c07c5dfebd270b2f0660e86f056eeafdb26a4fb (patch)
tree3c21d6c3d721e66804b3ae1ea2818735b4b740f2
parent3ff53b3587862439a1a84a7b0bc9a7dbee0cf4eb (diff)
Add AVX2 intrinsics for pavg, pblend, and pcmp instructions. Also remove unneeded builtins for SSE pcmp. Change SSE pcmpeqq and pcmpgtq to not use builtins and just use vector == and >.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@146969 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Basic/BuiltinsX86.def13
-rw-r--r--lib/Headers/avx2intrin.h72
-rw-r--r--lib/Headers/smmintrin.h4
-rw-r--r--test/CodeGen/avx2-builtins.c60
-rw-r--r--test/CodeGen/builtins-x86.c6
5 files changed, 138 insertions, 17 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 3dc7271469..e72d86bb7f 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -210,12 +210,6 @@ BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "")
BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "")
BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "")
BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpeqb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pcmpeqw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpeqd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pcmpgtb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pcmpgtw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pcmpgtd128, "V4iV4iV4i", "")
BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "")
BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "")
BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "")
@@ -353,7 +347,6 @@ BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLi*", "")
BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_pcmpeqq, "V2LLiV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16ci", "")
// SSE 4.2
@@ -374,8 +367,6 @@ BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","")
//BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","")
//BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","")
-BUILTIN(__builtin_ia32_pcmpgtq, "V2LLiV2LLiV2LLi", "")
-
BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "")
BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "")
BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "")
@@ -499,5 +490,9 @@ BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "")
BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "")
BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "")
BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIc", "")
+BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "")
+BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "")
+BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "")
+BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "")
#undef BUILTIN
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index df0450d7d1..813c602527 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -136,6 +136,78 @@ _mm256_andnot_si256(__m256i a, __m256i b)
}
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_avg_epu8(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_avg_epu16(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
+{
+ return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
+ (__v32qi)__M);
+}
+
+#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
+ __m256i __V1 = (V1); \
+ __m256i __V2 = (V2); \
+ (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, M); })
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi8(__m256i a, __m256i b)
+{
+ return (__m256i)((__v32qi)a == (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi16(__m256i a, __m256i b)
+{
+ return (__m256i)((__v16hi)a == (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi32(__m256i a, __m256i b)
+{
+ return (__m256i)((__v8si)a == (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi64(__m256i a, __m256i b)
+{
+ return (__m256i)((__v4di)a == (__v4di)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi8(__m256i a, __m256i b)
+{
+ return (__m256i)((__v32qi)a > (__v32qi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi16(__m256i a, __m256i b)
+{
+ return (__m256i)((__v16hi)a > (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi32(__m256i a, __m256i b)
+{
+ return (__m256i)((__v8si)a > (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi64(__m256i a, __m256i b)
+{
+ return (__m256i)((__v4di)a > (__v4di)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_or_si256(__m256i a, __m256i b)
{
return a | b;
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index a2bbd5da87..2e376ddb46 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -245,7 +245,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
{
- return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);
+ return (__m128i)((__v2di)__V1 == (__v2di)__V2);
}
/* SSE4 Packed Integer Sign-Extension. */
@@ -398,7 +398,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
{
- return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2);
+ return (__m128i)((__v2di)__V1 > (__v2di)__V2);
}
/* SSE4.2 Accumulate CRC32. */
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index 3aa374e448..aa13b3c268 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -155,3 +155,63 @@ __m256 test_mm256_xor_si256(__m256 a, __m256 b) {
// CHECK: xor <4 x i64>
return _mm256_xor_si256(a, b);
}
+
+__m256 test_mm256_avg_epu8(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pavg.b
+ return _mm256_avg_epu8(a, b);
+}
+
+__m256 test_mm256_avg_epu16(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pavg.w
+ return _mm256_avg_epu16(a, b);
+}
+
+__m256 test_mm256_blendv_epi8(__m256 a, __m256 b, __m256 m) {
+ // CHECK: @llvm.x86.avx2.pblendvb
+ return _mm256_blendv_epi8(a, b, m);
+}
+
+__m256 test_mm256_blend_epi16(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pblendw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, i32 2)
+ return _mm256_blend_epi16(a, b, 2);
+}
+
+__m256 test_mm256_cmpeq_epi8(__m256 a, __m256 b) {
+ // CHECK: icmp eq <32 x i8>
+ return _mm256_cmpeq_epi8(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi16(__m256 a, __m256 b) {
+ // CHECK: icmp eq <16 x i16>
+ return _mm256_cmpeq_epi16(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi32(__m256 a, __m256 b) {
+ // CHECK: icmp eq <8 x i32>
+ return _mm256_cmpeq_epi32(a, b);
+}
+
+__m256 test_mm256_cmpeq_epi64(__m256 a, __m256 b) {
+ // CHECK: icmp eq <4 x i64>
+ return _mm256_cmpeq_epi64(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi8(__m256 a, __m256 b) {
+ // CHECK: icmp sgt <32 x i8>
+ return _mm256_cmpgt_epi8(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi16(__m256 a, __m256 b) {
+ // CHECK: icmp sgt <16 x i16>
+ return _mm256_cmpgt_epi16(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi32(__m256 a, __m256 b) {
+ // CHECK: icmp sgt <8 x i32>
+ return _mm256_cmpgt_epi32(a, b);
+}
+
+__m256 test_mm256_cmpgt_epi64(__m256 a, __m256 b) {
+ // CHECK: icmp sgt <4 x i64>
+ return _mm256_cmpgt_epi64(a, b);
+}
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index 7f028e5932..772ab105db 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -199,12 +199,6 @@ void f0() {
tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
tmp_V16c = __builtin_ia32_pavgb128(tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pavgw128(tmp_V8s, tmp_V8s);
- tmp_V16c = __builtin_ia32_pcmpeqb128(tmp_V16c, tmp_V16c);
- tmp_V8s = __builtin_ia32_pcmpeqw128(tmp_V8s, tmp_V8s);
- tmp_V4i = __builtin_ia32_pcmpeqd128(tmp_V4i, tmp_V4i);
- tmp_V16c = __builtin_ia32_pcmpgtb128(tmp_V16c, tmp_V16c);
- tmp_V8s = __builtin_ia32_pcmpgtw128(tmp_V8s, tmp_V8s);
- tmp_V4i = __builtin_ia32_pcmpgtd128(tmp_V4i, tmp_V4i);
tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);