diff options
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/break-sse-dep.ll | 41 |
2 files changed, 49 insertions, 2 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8e5f37c867..0d5d1b449e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1937,6 +1937,10 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, @@ -1992,9 +1996,11 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode FR64:$src))]>; - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), + // See the comments in sse1_fp_unop_s for why this is OptForSize. + def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode (load addr:$src)))]>; + [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, + Requires<[HasSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F64Int VR128:$src))]>; diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index 027d2f1daf..094cbc7bde 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -19,3 +19,44 @@ entry: %1 = fptrunc double %0 to float ret float %1 } + +define float @squirtf(float* %x) nounwind { +entry: +; CHECK: squirtf: +; CHECK: movss (%rdi), %xmm0 +; CHECK: sqrtss %xmm0, %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt(double* %x) nounwind { +entry: +; CHECK: squirt: +; CHECK: movsd (%rdi), %xmm0 +; CHECK: sqrtsd %xmm0, %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +define float @squirtf_size(float* %x) nounwind optsize { +entry: +; CHECK: squirtf_size: +; CHECK: sqrtss (%rdi), %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt_size(double* %x) nounwind optsize { +entry: +; CHECK: squirt_size: +; CHECK: sqrtsd (%rdi), %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) |