diff options
author | Chris Lattner <sabre@nondot.org> | 2010-08-25 23:39:14 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-08-25 23:39:14 +0000 |
commit | 22fd4baf2eba2103e2b41e463f1a5f6486c398fb (patch) | |
tree | fa299a9c8abfbbf716cd16bc1c029c4568834e01 /lib/CodeGen/TargetInfo.cpp | |
parent | d2932986a16244b7f9a3f9a7a6b0daf543c91540 (diff) |
Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of
as a double in the x86-64 ABI. This allows us to generate much better
code for certain things, e.g.:
_Complex float f32(_Complex float A, _Complex float B) {
return A+B;
}
Used to compile into (look at the integer silliness!):
_f32: ## @f32
## BB#0: ## %entry
movd %xmm1, %rax
movd %eax, %xmm1
movd %xmm0, %rcx
movd %ecx, %xmm0
addss %xmm1, %xmm0
movd %xmm0, %edx
shrq $32, %rax
movd %eax, %xmm0
shrq $32, %rcx
movd %ecx, %xmm1
addss %xmm0, %xmm1
movd %xmm1, %eax
shlq $32, %rax
addq %rdx, %rax
movd %rax, %xmm0
ret
Now we get:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $16, %xmm2, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm0
addss %xmm1, %xmm0
pshufd $16, %xmm0, %xmm1
movdqa %xmm2, %xmm0
unpcklps %xmm1, %xmm0
ret
and compile stuff like:
extern float _Complex ccoshf( float _Complex ) ;
float _Complex ccosf ( float _Complex z ) {
float _Complex iz;
(__real__ iz) = -(__imag__ z);
(__imag__ iz) = (__real__ z);
return ccoshf(iz);
}
into:
_ccosf: ## @ccosf
## BB#0: ## %entry
pshufd $1, %xmm0, %xmm1
xorps LCPI4_0(%rip), %xmm1
unpcklps %xmm0, %xmm1
movaps %xmm1, %xmm0
jmp _ccoshf ## TAILCALL
instead of:
_ccosf: ## @ccosf
## BB#0: ## %entry
movd %xmm0, %rax
movq %rax, %rcx
shlq $32, %rcx
shrq $32, %rax
xorl $-2147483648, %eax ## imm = 0xFFFFFFFF80000000
addq %rcx, %rax
movd %rax, %xmm0
jmp _ccoshf ## TAILCALL
There is still "stuff to be done" here for the struct case,
but this resolves rdar://6379669 - [x86-64 ABI] Pass and return
_Complex float / double efficiently
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112111 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index a08e7f4b80..c5b858dbfa 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1294,12 +1294,8 @@ GetSSETypeAtOffset(const llvm::Type *IRType, unsigned IROffset, // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the // case. if (ContainsFloatAtOffset(IRType, IROffset, getTargetData()) && - ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) { - // FIXME: <2 x float> doesn't pass as one XMM register yet. Don't enable - // this code until it does. - //return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); - - } + ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) + return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); return llvm::Type::getDoubleTy(getVMContext()); } |