aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-11-05 19:32:46 +0000
committerNadav Rotem <nrotem@apple.com>2012-11-05 19:32:46 +0000
commite623702c22e7cee4e02332b245a417a88ae6ffff (patch)
treeb1fcd57b8be41d13cb6a4d4d0ef12500e4c639fe
parent6837232a609fe7b13473606a1c8a18eabfe2df2a (diff)
Implement the cost of abnormal x86 instruction lowering as a table.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167395 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetTransformImpl.h5
-rw-r--r--lib/Target/TargetTransformImpl.cpp18
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp86
-rw-r--r--test/Analysis/CostModel/X86/arith.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/conversion-cost.ll2
5 files changed, 53 insertions, 60 deletions
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
index 625be7208a..d5ab3728af 100644
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -55,13 +55,16 @@ protected:
const TargetLowering *TLI;
/// Estimate the cost of type-legalization and the legalized type.
- std::pair<unsigned, EVT>
+ std::pair<unsigned, MVT>
getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+ // Get the ISD node that corresponds to the Instruction class opcode.
+ int InstructionOpcodeToISD(unsigned Opcode) const;
+
public:
explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index 4cd07cd6c3..a9f02edaae 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -60,7 +60,7 @@ bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
// Calls used by the vectorizers.
//
//===----------------------------------------------------------------------===//
-static int InstructionOpcodeToISD(unsigned Opcode) {
+int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
enum InstructionOpcodes {
#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
@@ -130,7 +130,7 @@ static int InstructionOpcodeToISD(unsigned Opcode) {
llvm_unreachable("Unknown instruction type encountered!");
}
-std::pair<unsigned, EVT>
+std::pair<unsigned, MVT>
VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
EVT Ty) const {
unsigned Cost = 1;
@@ -141,7 +141,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
if (LK.first == TargetLowering::TypeLegal)
- return std::make_pair(Cost, Ty);
+ return std::make_pair(Cost, Ty.getSimpleVT());
if (LK.first == TargetLowering::TypeSplitVector)
Cost *= 2;
@@ -174,7 +174,7 @@ unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
int ISD = InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, EVT> LT =
+ std::pair<unsigned, MVT> LT =
getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -205,10 +205,10 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, EVT> SrcLT =
+ std::pair<unsigned, MVT> SrcLT =
getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
- std::pair<unsigned, EVT> DstLT =
+ std::pair<unsigned, MVT> DstLT =
getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst));
// Handle scalar conversions.
@@ -283,7 +283,7 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
ISD = ISD::VSELECT;
}
- std::pair<unsigned, EVT> LT =
+ std::pair<unsigned, MVT> LT =
getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy));
if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -326,7 +326,7 @@ unsigned
VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
- std::pair<unsigned, EVT> LT =
+ std::pair<unsigned, MVT> LT =
getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
// Assume that all loads of legal types cost 1.
@@ -335,7 +335,7 @@ VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned
VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
- std::pair<unsigned, EVT> LT =
+ std::pair<unsigned, MVT> LT =
getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
return LT.first;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0d38ba236e..575d30df2e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17505,63 +17505,51 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return Res;
}
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+struct X86CostTblEntry {
+ int ISD;
+ MVT Type;
+ unsigned Cost;
+};
+
unsigned
X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
Type *Ty) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT =
+ getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty));
+
+ int ISD = InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
const X86Subtarget &ST =
TLI->getTargetMachine().getSubtarget<X86Subtarget>();
- // Fix some of the inaccuracies of the target independent estimation.
- if (Ty->isVectorTy() && ST.hasSSE41()) {
- unsigned NumElem = Ty->getVectorNumElements();
- unsigned SizeInBits = Ty->getScalarType()->getScalarSizeInBits();
-
- bool Is2 = (NumElem == 2);
- bool Is4 = (NumElem == 4);
- bool Is8 = (NumElem == 8);
- bool Is32bits = (SizeInBits == 32);
- bool Is64bits = (SizeInBits == 64);
- bool HasAvx = ST.hasAVX();
- bool HasAvx2 = ST.hasAVX2();
-
- switch (Opcode) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul: {
- // Only AVX2 has support for 8-wide integer operations.
- if (Is32bits && (Is4 || (Is8 && HasAvx2))) return 1;
- if (Is64bits && (Is2 || (Is4 && HasAvx2))) return 1;
-
- // We don't have to completly scalarize unsupported ops. We can
- // issue two half-sized operations (with some overhead).
- // We don't need to extract the lower part of the YMM to the XMM.
- // Extract the upper, two ops, insert the upper = 4.
- if (Is32bits && Is8 && HasAvx) return 4;
- if (Is64bits && Is4 && HasAvx) return 4;
- break;
- }
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul: {
- // AVX has support for 8-wide float operations.
- if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
- if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
- break;
- }
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // AVX has support for 8-wide integer bitwise operations.
- if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1;
- if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1;
- break;
- }
+ static const X86CostTblEntry AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::MUL, MVT::v4i64, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+ };
+
+ // Look for AVX1 lowering tricks.
+ if (ST.hasAVX())
+ for (unsigned int i = 0, e = array_lengthof(AVX1CostTable); i < e; ++i) {
+ if (AVX1CostTable[i].ISD == ISD && AVX1CostTable[i].Type == LT.second)
+ return LT.first * AVX1CostTable[i].Cost;
}
- }
+ // Fallback to the default implementation.
return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
}
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 58b4a7c426..37cca8d540 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -12,6 +12,8 @@ define i32 @add(i32 %arg) {
%C = add <2 x i64> undef, undef
;CHECK: cost of 4 {{.*}} add
%D = add <4 x i64> undef, undef
+ ;CHECK: cost of 8 {{.*}} add
+ %E = add <8 x i64> undef, undef
;CHECK: cost of 1 {{.*}} ret
ret i32 undef
}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 8582613617..19bcdc5d90 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
}
;CHECK: @conversion_cost2
-;CHECK: store <8 x float>
+;CHECK-NOT: <8 x float>
;CHECK: ret
define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 9