aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Christopher <echristo@apple.com>2010-03-15 23:22:58 +0000
committerEric Christopher <echristo@apple.com>2010-03-15 23:22:58 +0000
commit7bd0dfd322d7a2d6a5461dcbdb2a83fd729dd8e2 (patch)
treebc13d1f5a24da2b60c4b2c4f5d54264a0507fc5a
parentaea67dbd653a2dd6dd5cc2159279e81e855b2482 (diff)
Add remaining sse4.1 intrinsics and builtins.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98587 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Basic/BuiltinsX86.def3
-rw-r--r--lib/Headers/smmintrin.h91
2 files changed, 93 insertions, 1 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 55a8e1537f..9a79f90d1e 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -290,6 +290,7 @@ BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLi*", "")
BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "")
BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "")
-
+BUILTIN(__builtin_ia32_pcmpeqq, "V2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16ci", "")
#undef BUILTIN
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index 7eab8182eb..972b2e0d30 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -246,6 +246,97 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V))
+/* SSE4 64-bit Packed Integer Comparisons. */
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
+{
+ return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);
+}
+
+/* SSE4 Packed Integer Sign-Extension. */
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi16(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi32(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi8_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi16_epi32(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi16_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepi32_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
+}
+
+/* SSE4 Packed Integer Zero-Extension. */
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi16(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi32(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu8_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu16_epi32(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu16_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
+}
+
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_cvtepu32_epi64(__m128i __V)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
+}
+
+/* SSE4 Pack with Unsigned Saturation. */
+static inline __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_packus_epi32(__m128i __V1, __m128i __V2)
+{
+ return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
+}
+
+/* SSE4 Multiple Packed Sums of Absolute Difference. */
+#define _mm_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw128((X), (Y), (M))
+
#endif /* __SSE4_1__ */
#endif /* _SMMINTRIN_H */