diff options
Diffstat (limited to 'lib/CodeGen/BasicTargetTransformInfo.cpp')
-rw-r--r-- | lib/CodeGen/BasicTargetTransformInfo.cpp | 102 |
1 files changed, 83 insertions, 19 deletions
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index e8b5b4fe8d..4a99184f5e 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -85,7 +85,9 @@ public: virtual unsigned getNumberOfRegisters(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, @@ -193,27 +195,34 @@ unsigned BasicTTI::getMaximumUnrollFactor() const { return 1; } -unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { +unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const { // Check if any of the operands are vector operands. int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + // Assume that floating point arithmetic operations cost twice as much as + // integer operations. + unsigned OpCost = (IsFloat ? 2 : 1); + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some + // If the type is split to multiple registers, assume that there is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; + return LT.first * 2 * OpCost; + return LT.first * 1 * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { // If the operation is custom lowered then assume // thare the code is twice as expensive. - return LT.first * 2; + return LT.first * 2 * OpCost; } // Else, assume that we need to scalarize this op. @@ -226,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { } // We don't know anything about this scalar instruction. - return 1; + return OpCost; } unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, @@ -379,22 +388,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys) const { - // assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + unsigned ISD = 0; + switch (IID) { + default: { + // Assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + + return ScalarCalls + ScalarizationCost; + } + // Look for intrinsics that can be lowered directly or turned into a scalar + // intrinsic call. + case Intrinsic::sqrt: ISD = ISD::FSQRT; break; + case Intrinsic::sin: ISD = ISD::FSIN; break; + case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::exp: ISD = ISD::FEXP; break; + case Intrinsic::exp2: ISD = ISD::FEXP2; break; + case Intrinsic::log: ISD = ISD::FLOG; break; + case Intrinsic::log10: ISD = ISD::FLOG10; break; + case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::floor: ISD = ISD::FFLOOR; break; + case Intrinsic::ceil: ISD = ISD::FCEIL; break; + case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::pow: ISD = ISD::FPOW; break; + case Intrinsic::fma: ISD = ISD::FMA; break; + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + } + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; } - return ScalarCalls + ScalarizationCost; + + // Else, assume that we need to scalarize this intrinsic. For math builtins + // this will emit a costly libcall, adding call overhead and spills. Make it + // very expensive. + if (RetTy->isVectorTy()) { + unsigned Num = RetTy->getVectorNumElements(); + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), + Tys); + return 10 * Cost * Num; + } + + // This is going to be turned into a library call, make it expensive. + return 10; } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { |