X86TTI: Add accurate costs for itofp operations, based on the actual instruction counts.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178459 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2013-04-01 10:23:49 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2013-04-01 10:23:49 +0000
commit: 13497b3aa7589fc4f9e924f850a7e5151e9ddd2f (patch)
tree: b1bab1d7d8b1b55e5e1e0bb59ef37875447bab28
parent: ef7964c1b78f57e277e74bda4f38e1143d1363fe (diff)
3 files changed, 106 insertions, 9 deletions
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 3e3b86edbb..2336035bea 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -271,10 +271,33 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
     { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
     { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 1 },
     { ISD::TRUNCATE,    MVT::v8i16, MVT::v8i32, 1 },
-    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
+
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i1,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i32, 1 },
+
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i1,  6 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 9 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 6 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
+
     { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 1 },
     { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
     { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index c8d0f6fd8e..b69b3bf630 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -77,3 +77,78 @@ define i32 @masks4(<4 x i1> %in) {
   ret i32 undef
 }
 
+define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A1 = sitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A2 = sitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B1 = sitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B2 = sitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C1 = sitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C2 = sitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D2 = sitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %A1 = sitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %B1 = sitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} sitofp
+  %C1 = sitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <8 x i32> %d to <8 x float>
+  ret void
+}
+
+define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A1 = uitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A2 = uitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B1 = uitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B2 = uitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C1 = uitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C2 = uitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D1 = uitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D2 = uitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %A1 = uitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %B1 = uitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %C1 = uitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 9 {{.*}} uitofp
+  %D1 = uitofp <8 x i32> %d to <8 x float>
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 23d9233544..760d28deaf 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = add nsw i64 %indvars.iv, 3
-  %3 = trunc i64 %2 to i32
-  %4 = sitofp i32 %3 to float
-  %5 = getelementptr inbounds float* %B, i64 %indvars.iv
-  store float %4, float* %5, align 4
+  %add = add nsw i64 %indvars.iv, 3
+  %tofp = sitofp i64 %add to float
+  %gep = getelementptr inbounds float* %B, i64 %indvars.iv
+  store float %tofp, float* %gep, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
author	Benjamin Kramer <benny.kra@googlemail.com>	2013-04-01 10:23:49 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2013-04-01 10:23:49 +0000
commit	13497b3aa7589fc4f9e924f850a7e5151e9ddd2f (patch)
tree	b1bab1d7d8b1b55e5e1e0bb59ef37875447bab28
parent	ef7964c1b78f57e277e74bda4f38e1143d1363fe (diff)