aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CostModel.cpp5
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp7
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp5
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp13
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp22
5 files changed, 42 insertions, 10 deletions
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 1784512bce..8435e39707 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -85,6 +85,11 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
return -1;
switch (I->getOpcode()) {
+ case Instruction::GetElementPtr:{
+ Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
+ return TTI->getAddressComputationCost(ValTy);
+ }
+
case Instruction::Ret:
case Instruction::PHI:
case Instruction::Br: {
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9fc21fdb92..72421a00c7 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -196,6 +196,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return PrevTTI->getNumberOfParts(Tp);
}
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
+ return PrevTTI->getAddressComputationCost(Tp);
+}
namespace {
@@ -535,6 +538,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
unsigned getNumberOfParts(Type *Tp) const {
return 0;
}
+
+ unsigned getAddressComputationCost(Type *Tp) const {
+ return 0;
+ }
};
} // end anonymous namespace
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index ea5e93747d..e8b5b4fe8d 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -101,6 +101,7 @@ public:
virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
ArrayRef<Type*> Tys) const;
virtual unsigned getNumberOfParts(Type *Tp) const;
+ virtual unsigned getAddressComputationCost(Type *Ty) const;
/// @}
};
@@ -400,3 +401,7 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
return LT.first;
}
+
+unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+ return 0;
+}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1f91e0ee36..f6fa319970 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -120,6 +120,8 @@ public:
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const;
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
+
+ unsigned getAddressComputationCost(Type *Val) const;
/// @}
};
@@ -304,12 +306,13 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) const {
- // Penalize inserting into an D-subregister.
+ // Penalize inserting into an D-subregister. We end up with a three times
+ // lower estimated throughput on swift.
if (ST->isSwift() &&
Opcode == Instruction::InsertElement &&
ValTy->isVectorTy() &&
ValTy->getScalarSizeInBits() <= 32)
- return 2;
+ return 3;
return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
}
@@ -326,3 +329,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
+
+unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 91d565976a..f12b0bf0f3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3056,9 +3056,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
- // We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
- // generate vector geps.
+ // We mark this instruction as zero-cost because the cost of GEPs in
+ // vectorized code depends on whether the corresponding memory instruction
+ // is scalarized or not. Therefore, we handle GEPs with the memory
+ // instruction cost.
return 0;
case Instruction::Br: {
return TTI.getCFInstrCost(I->getOpcode());
@@ -3113,9 +3114,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
unsigned AS = SI ? SI->getPointerAddressSpace() :
LI->getPointerAddressSpace();
Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
-
+ // We add the cost of address computation here instead of with the gep
+ // instruction because only here we know whether the operation is
+ // scalarized.
if (VF == 1)
- return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ return TTI.getAddressComputationCost(VectorTy) +
+ TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
// Scalarized loads/stores.
int Stride = Legal->isConsecutivePtr(Ptr);
@@ -3135,15 +3139,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
VectorTy, i);
}
- // The cost of the scalar stores.
+ // The cost of the scalar loads/stores.
+ Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
return Cost;
}
// Wide load/stores.
- unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- Alignment, AS);
+ unsigned Cost = TTI.getAddressComputationCost(VectorTy);
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
VectorTy, 0);