Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of

as a double in the x86-64 ABI. This allows us to generate much better code for certain things, e.g.: _Complex float f32(_Complex float A, _Complex float B) { return A+B; } Used to compile into (look at the integer silliness!): _f32: ## @f32 ## BB#0: ## %entry movd %xmm1, %rax movd %eax, %xmm1 movd %xmm0, %rcx movd %ecx, %xmm0 addss %xmm1, %xmm0 movd %xmm0, %edx shrq $32, %rax movd %eax, %xmm0 shrq $32, %rcx movd %ecx, %xmm1 addss %xmm0, %xmm1 movd %xmm1, %eax shlq $32, %rax addq %rdx, %rax movd %rax, %xmm0 ret Now we get: _f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 pshufd $16, %xmm2, %xmm2 pshufd $1, %xmm1, %xmm1 pshufd $1, %xmm0, %xmm0 addss %xmm1, %xmm0 pshufd $16, %xmm0, %xmm1 movdqa %xmm2, %xmm0 unpcklps %xmm1, %xmm0 ret and compile stuff like: extern float _Complex ccoshf( float _Complex ) ; float _Complex ccosf ( float _Complex z ) { float _Complex iz; (__real__ iz) = -(__imag__ z); (__imag__ iz) = (__real__ z); return ccoshf(iz); } into: _ccosf: ## @ccosf ## BB#0: ## %entry pshufd $1, %xmm0, %xmm1 xorps LCPI4_0(%rip), %xmm1 unpcklps %xmm0, %xmm1 movaps %xmm1, %xmm0 jmp _ccoshf ## TAILCALL instead of: _ccosf: ## @ccosf ## BB#0: ## %entry movd %xmm0, %rax movq %rax, %rcx shlq $32, %rcx shrq $32, %rax xorl $-2147483648, %eax ## imm = 0xFFFFFFFF80000000 addq %rcx, %rax movd %rax, %xmm0 jmp _ccoshf ## TAILCALL There is still "stuff to be done" here for the struct case, but this resolves rdar://6379669 - [x86-64 ABI] Pass and return _Complex float / double efficiently git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112111 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-08-25 23:39:14 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-08-25 23:39:14 +0000
commit: 22fd4baf2eba2103e2b41e463f1a5f6486c398fb (patch)
tree: fa299a9c8abfbbf716cd16bc1c029c4568834e01 /lib/CodeGen/TargetInfo.cpp
parent: d2932986a16244b7f9a3f9a7a6b0daf543c91540 (diff)
1 files changed, 2 insertions, 6 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index a08e7f4b80..c5b858dbfa 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1294,12 +1294,8 @@ GetSSETypeAtOffset(const llvm::Type *IRType, unsigned IROffset,
   // offset+0 and offset+4.  Walk the LLVM IR type to find out if this is the
   // case.
   if (ContainsFloatAtOffset(IRType, IROffset, getTargetData()) &&
-      ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) {
-    // FIXME: <2 x float> doesn't pass as one XMM register yet.  Don't enable
-    // this code until it does.
-    //return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
-
-  }
+      ContainsFloatAtOffset(IRType, IROffset+4, getTargetData()))
+    return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
 
   return llvm::Type::getDoubleTy(getVMContext());
 }
author	Chris Lattner <sabre@nondot.org>	2010-08-25 23:39:14 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-08-25 23:39:14 +0000
commit	22fd4baf2eba2103e2b41e463f1a5f6486c398fb (patch)
tree	fa299a9c8abfbbf716cd16bc1c029c4568834e01 /lib/CodeGen/TargetInfo.cpp
parent	d2932986a16244b7f9a3f9a7a6b0daf543c91540 (diff)