aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-07-12 20:46:04 +0000
committerDan Gohman <gohman@apple.com>2010-07-12 20:46:04 +0000
commitcfbf0ed8b03bbff5b4045c97dca3f93a4e6b834d (patch)
treed91d1048611de592ae82afb564786b97a5eb83a9
parent4d5fe97c479ed3a2736755a3b821f5ff99c67cdc (diff)
Apply the SSE dependence idiom for SSE unary operations to
SD instructions too, in addition to SS instructions. And add a comment about it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108191 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td10
-rw-r--r--test/CodeGen/X86/break-sse-dep.ll41
2 files changed, 49 insertions, 2 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8e5f37c867..0d5d1b449e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1937,6 +1937,10 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
@@ -1992,9 +1996,11 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode FR64:$src))]>;
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F64Int VR128:$src))]>;
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 027d2f1daf..094cbc7bde 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -19,3 +19,44 @@ entry:
%1 = fptrunc double %0 to float
ret float %1
}
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; CHECK: squirtf:
+; CHECK: movss (%rdi), %xmm0
+; CHECK: sqrtss %xmm0, %xmm0
+ %z = load float* %x
+ %t = call float @llvm.sqrt.f32(float %z)
+ ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; CHECK: squirt:
+; CHECK: movsd (%rdi), %xmm0
+; CHECK: sqrtsd %xmm0, %xmm0
+ %z = load double* %x
+ %t = call double @llvm.sqrt.f64(double %z)
+ ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; CHECK: squirtf_size:
+; CHECK: sqrtss (%rdi), %xmm0
+ %z = load float* %x
+ %t = call float @llvm.sqrt.f32(float %z)
+ ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; CHECK: squirt_size:
+; CHECK: sqrtsd (%rdi), %xmm0
+ %z = load double* %x
+ %t = call double @llvm.sqrt.f64(double %z)
+ ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)