diff options
author | Eli Friedman <eli.friedman@gmail.com> | 2009-06-07 07:12:56 +0000 |
---|---|---|
committer | Eli Friedman <eli.friedman@gmail.com> | 2009-06-07 07:12:56 +0000 |
commit | e0ae8bdadc4fef56db7f0e3336d0379a89eb462f (patch) | |
tree | 5e56e67957bbfe21e2a152bfca54b01affab20ed | |
parent | 06026000b1e88a799ed010c88131b549cbbe4f56 (diff) |
Now that LLVM CodeGen can handle the generic variations a bit better,
get rid of a few more clang vector builtins.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@73015 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/AST/X86Builtins.def | 4 | ||||
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 22 | ||||
-rw-r--r-- | lib/Headers/xmmintrin.h | 13 | ||||
-rw-r--r-- | test/CodeGen/builtins-x86.c | 4 |
4 files changed, 5 insertions, 38 deletions
diff --git a/include/clang/AST/X86Builtins.def b/include/clang/AST/X86Builtins.def index 27ebbbc13a..95d0003463 100644 --- a/include/clang/AST/X86Builtins.def +++ b/include/clang/AST/X86Builtins.def @@ -175,8 +175,6 @@ BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "") BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "") BUILTIN(__builtin_ia32_loadups, "V4ffC*", "") BUILTIN(__builtin_ia32_storeups, "vf*V4f", "") -BUILTIN(__builtin_ia32_loadhps, "V4fV4fV2i*", "") -BUILTIN(__builtin_ia32_loadlps, "V4fV4fV2i*", "") BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "") BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "") BUILTIN(__builtin_ia32_movmskps, "iV4f", "") @@ -194,8 +192,6 @@ BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "") BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "") BUILTIN(__builtin_ia32_loadupd, "V2ddC*", "") BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "") -BUILTIN(__builtin_ia32_loadhpd, "V2dV2ddC*", "") -BUILTIN(__builtin_ia32_loadlpd, "V2dV2ddC*", "") BUILTIN(__builtin_ia32_movmskpd, "iV2d", "") BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "") BUILTIN(__builtin_ia32_movnti, "vi*i", "") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 8a565985f5..3c7c5e5398 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -764,28 +764,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd); return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd"); } - case X86::BI__builtin_ia32_loadlps: - case X86::BI__builtin_ia32_loadhps: { - // FIXME: This should probably be represented as - // shuffle (dst, (v4f32 (insert undef, (load i64), 0)), shuf mask hi/lo) - const llvm::Type *EltTy = llvm::Type::DoubleTy; - const llvm::Type *VecTy = llvm::VectorType::get(EltTy, 2); - const llvm::Type *OrigTy = Ops[0]->getType(); - unsigned Index = BuiltinID == X86::BI__builtin_ia32_loadlps ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, Index); - Ops[1] = Builder.CreateBitCast(Ops[1], llvm::PointerType::getUnqual(EltTy)); - Ops[1] = Builder.CreateLoad(Ops[1], "tmp"); - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Ops[0] = Builder.CreateInsertElement(Ops[0], Ops[1], Idx, "loadps"); - return Builder.CreateBitCast(Ops[0], OrigTy, "loadps"); - } - case X86::BI__builtin_ia32_loadlpd: - case X86::BI__builtin_ia32_loadhpd: { - Ops[1] = Builder.CreateLoad(Ops[1], "tmp"); - unsigned Index = BuiltinID == X86::BI__builtin_ia32_loadlpd ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(llvm::Type::Int32Ty, Index); - return Builder.CreateInsertElement(Ops[0], Ops[1], Idx, "loadpd"); - } case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { const llvm::Type *EltTy = llvm::Type::Int64Ty; diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 7291f88979..2903049170 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -464,20 +464,19 @@ _mm_cvtss_f32(__m128 a) static inline __m128 __attribute__((__always_inline__, __nodebug__)) _mm_loadh_pi(__m128 a, __m64 const *p) { - return __builtin_ia32_loadhps(a, (__v2si *)p); + __m128 b; + b[0] = *(float*)p; + b[1] = *((float*)p+1); + return __builtin_shufflevector(a, b, 0, 1, 4, 5); } static inline __m128 __attribute__((__always_inline__, __nodebug__)) _mm_loadl_pi(__m128 a, __m64 const *p) { -#if 0 - // FIXME: This should work, but gives really crappy code at the moment __m128 b; b[0] = *(float*)p; b[1] = *((float*)p+1); - return __builtin_shufflevector(a, b, 0, 1, 4, 5); -#endif - return __builtin_ia32_loadlps(a, (__v2si *)p); + return __builtin_shufflevector(a, b, 4, 5, 2, 3); } static inline __m128 __attribute__((__always_inline__, __nodebug__)) @@ -899,8 +898,6 @@ do { \ (row3) = _mm_movelh_ps(tmp3, tmp1); \ } while (0) -#include <emmintrin.h> - #endif /* __SSE__ */ #endif /* __XMMINTRIN_H */ diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c index c889a2feab..8d4bcbfab1 100644 --- a/test/CodeGen/builtins-x86.c +++ b/test/CodeGen/builtins-x86.c @@ -250,8 +250,6 @@ void f0() { (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp); tmp_V4f = __builtin_ia32_loadups(tmp_fCp); (void) __builtin_ia32_storeups(tmp_fp, tmp_V4f); - tmp_V4f = __builtin_ia32_loadhps(tmp_V4f, tmp_V2ip); - tmp_V4f = __builtin_ia32_loadlps(tmp_V4f, tmp_V2ip); (void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f); (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f); tmp_i = __builtin_ia32_movmskps(tmp_V4f); @@ -270,8 +268,6 @@ void f0() { (void) __builtin_ia32_maskmovdqu(tmp_V16c, tmp_V16c, tmp_cp); tmp_V2d = __builtin_ia32_loadupd(tmp_dCp); (void) __builtin_ia32_storeupd(tmp_dp, tmp_V2d); - tmp_V2d = __builtin_ia32_loadhpd(tmp_V2d, tmp_dCp); - tmp_V2d = __builtin_ia32_loadlpd(tmp_V2d, tmp_dCp); tmp_i = __builtin_ia32_movmskpd(tmp_V2d); tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c); (void) __builtin_ia32_movnti(tmp_ip, tmp_i); |