diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-11-06 19:33:53 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-11-06 19:33:53 +0000 |
commit | b042868c01925dae3a1032890f591f1da78b19d3 (patch) | |
tree | 38cb8d56122a6e2eb42f2fccafe210bcec458225 | |
parent | a082892fb734f4738f5e687778997706fa0dd65a (diff) |
Cost Model: add tables for some avx type-conversion hacks.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167480 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 60 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cast.ll | 32 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/cost-model.ll | 2 |
4 files changed, 94 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6b5466d70b..5853b5109c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17526,6 +17526,23 @@ int FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { return -1; } +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; + +int FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + unsigned X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { @@ -17535,8 +17552,7 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, int ISD = InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); static const X86CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized @@ -17647,5 +17663,45 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); } +unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, + Type *Dst, + Type *Src) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); + + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + }; + if (ST.hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 40dfaa0d43..c6354b0801 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -960,6 +960,9 @@ namespace llvm { unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; }; } diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index f8b1114a7c..298d5c66ad 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -32,3 +32,35 @@ define i32 @add(i32 %arg) { ret i32 undef } +define i32 @zext_sext(<8 x i1> %in) { + ;CHECK: cost of 6 {{.*}} zext + %Z = zext <8 x i1> %in to <8 x i32> + ;CHECK: cost of 9 {{.*}} sext + %S = sext <8 x i1> %in to <8 x i32> + + ;CHECK: cost of 1 {{.*}} sext + %A = sext <8 x i16> undef to <8 x i32> + ;CHECK: cost of 1 {{.*}} zext + %B = zext <8 x i16> undef to <8 x i32> + ;CHECK: cost of 1 {{.*}} sext + %C = sext <4 x i32> undef to <4 x i64> + + ;CHECK: cost of 1 {{.*}} zext + %D = zext <4 x i32> undef to <4 x i64> + ;CHECK: cost of 1 {{.*}} trunc + + %E = trunc <4 x i64> undef to <4 x i32> + ;CHECK: cost of 1 {{.*}} trunc + %F = trunc <8 x i32> undef to <8 x i16> + + ret i32 undef +} + +define i32 @masks(<8 x i1> %in) { + ;CHECK: cost of 6 {{.*}} zext + %Z = zext <8 x i1> %in to <8 x i32> + ;CHECK: cost of 9 {{.*}} sext + %S = sext <8 x i1> %in to <8 x i32> + ret i32 undef +} + diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll index 40e660855b..628f9912c8 100644 --- a/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0" @a = common global [2048 x i32] zeroinitializer, align 16 ;CHECK: cost_model_1 -;CHECK-NOT: <4 x i32> +;CHECK: <4 x i32> ;CHECK: ret void define void @cost_model_1() nounwind uwtable noinline ssp { entry: |