diff options
author | Eli Bendersky <eliben@chromium.org> | 2013-03-11 15:16:37 -0700 |
---|---|---|
committer | Eli Bendersky <eliben@chromium.org> | 2013-03-11 15:16:37 -0700 |
commit | 23c00401dad33ca247d2818e71540079bed63c5b (patch) | |
tree | df9f25d60f9538fbde84b78cf3c4e4a00eb6c3db /lib/Transforms/Vectorize/BBVectorize.cpp | |
parent | 79da56afe68a0c5b2c2227681014dd13705d78cc (diff) | |
parent | 279b9184c2ff4fea93b198a3519b8cb3a1d8d195 (diff) |
Merge commit '279b9184c2ff4fea93b198a3519b8cb3a1d8d195'
Conflicts:
include/llvm/CodeGen/LexicalScopes.h
include/llvm/MC/MCAsmInfo.h
lib/Linker/LinkArchives.cpp
lib/Linker/LinkItems.cpp
lib/MC/MCAsmInfo.cpp
lib/MC/MCDwarf.cpp
lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/ARMSubtarget.cpp
lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
lib/Target/Mips/MipsAsmPrinter.cpp
lib/Target/Mips/MipsDelaySlotFiller.cpp
lib/Target/Mips/MipsISelDAGToDAG.cpp
lib/Target/Mips/MipsSubtarget.cpp
lib/Target/Mips/MipsSubtarget.h
lib/Target/Mips/MipsTargetObjectFile.cpp
lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
lib/Target/X86/X86FastISel.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86TargetMachine.cpp
lib/Transforms/CMakeLists.txt
lib/Transforms/LLVMBuild.txt
lib/Transforms/Makefile
test/MC/ARM/arm_instructions.s
test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
Diffstat (limited to 'lib/Transforms/Vectorize/BBVectorize.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/BBVectorize.cpp | 934 |
1 files changed, 527 insertions, 407 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index d72a4a1a62..76365417aa 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -48,7 +48,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> -#include <map> using namespace llvm; static cl::opt<bool> @@ -89,6 +88,10 @@ MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group")); static cl::opt<unsigned> +MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, + cl::desc("The maximum number of candidate instruction pairs per group")); + +static cl::opt<unsigned> MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" " a full cycle check")); @@ -207,11 +210,6 @@ namespace { typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair typedef std::pair<VPPair, unsigned> VPPairWithType; - typedef std::pair<std::multimap<Value *, Value *>::iterator, - std::multimap<Value *, Value *>::iterator> VPIteratorPair; - typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, - std::multimap<ValuePair, ValuePair>::iterator> - VPPIteratorPair; AliasAnalysis *AA; DominatorTree *DT; @@ -225,7 +223,7 @@ namespace { bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, DenseSet<ValuePair> &FixedOrderPairs, DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len); @@ -239,33 +237,36 @@ namespace { PairConnectionSplat }; - void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes); + void computeConnectedPairs( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes); void buildDepMap(BasicBlock &BB, - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &PairableInstUsers); - - void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *>& ChosenPairs); + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &PairableInstUsers); + + void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *>& ChosenPairs); void fuseChosenPairs(BasicBlock &BB, - std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *>& ChosenPairs, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps); + std::vector<Value *> &PairableInsts, + DenseMap<Value *, Value *>& ChosenPairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); @@ -277,56 +278,63 @@ namespace { bool trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers = true, - std::multimap<Value *, Value *> *LoadMoveSet = 0); + DenseSet<ValuePair> *LoadMoveSetPairs = 0); - void computePairsConnectedTo( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - ValuePair P); + void computePairsConnectedTo( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + ValuePair P); bool pairsConflict(ValuePair P, ValuePair Q, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0); + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > + *PairableInstUserMap = 0, + DenseSet<VPPair> *PairableInstUserPairSet = 0); bool pairWillFormCycle(ValuePair P, - std::multimap<ValuePair, ValuePair> &PairableInstUsers, - DenseSet<ValuePair> &CurrentPairs); - - void pruneTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, - DenseSet<ValuePair> &PrunedTree, ValuePair J, - bool UseCycleCheck); - - void buildInitialTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, ValuePair J); - - void findBestTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, - int &BestEffSize, VPIteratorPair ChoiceRange, - bool UseCycleCheck); + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers, + DenseSet<ValuePair> &CurrentPairs); + + void pruneDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, + DenseSet<ValuePair> &PrunedDAG, ValuePair J, + bool UseCycleCheck); + + void buildInitialDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, ValuePair J); + + void findBestDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth, + int &BestEffSize, Value *II, std::vector<Value *>&JJ, + bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o); @@ -358,20 +366,22 @@ namespace { void collectPairLoadMoveSet(BasicBlock &BB, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I); void collectLoadMoveSet(BasicBlock &BB, std::vector<Value *> &PairableInsts, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet); + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs); bool canMoveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I, Instruction *J); void moveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *&InsertionPt, Instruction *I, Instruction *J); @@ -463,18 +473,18 @@ namespace { static inline void getInstructionTypes(Instruction *I, Type *&T1, Type *&T2) { - if (isa<StoreInst>(I)) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // For stores, it is the value type, not the pointer type that matters // because the value is what will come from a vector register. - Value *IVal = cast<StoreInst>(I)->getValueOperand(); + Value *IVal = SI->getValueOperand(); T1 = IVal->getType(); } else { T1 = I->getType(); } - if (I->isCast()) - T2 = cast<CastInst>(I)->getSrcTy(); + if (CastInst *CI = dyn_cast<CastInst>(I)) + T2 = CI->getSrcTy(); else T2 = T1; @@ -500,7 +510,7 @@ namespace { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric - // used to compare the trees in the next iteration. Thus, giving them a + // used to compare the dags in the next iteration. Thus, giving them a // weight of zero allows the pass to essentially ignore them in // subsequent iterations when looking for vectorization opportunities // while still tracking dependency chains that flow through those @@ -661,19 +671,6 @@ namespace { } } - // Returns true if J is the second element in some pair referenced by - // some multimap pair iterator pair. - template <typename V> - bool isSecondInIteratorPair(V J, std::pair< - typename std::multimap<V, V>::iterator, - typename std::multimap<V, V>::iterator> PairRange) { - for (typename std::multimap<V, V>::iterator K = PairRange.first; - K != PairRange.second; ++K) - if (K->second == J) return true; - - return false; - } - bool isPureIEChain(InsertElementInst *IE) { InsertElementInst *IENext = IE; do { @@ -698,11 +695,12 @@ namespace { DenseMap<Value *, Value *> AllChosenPairs; DenseSet<ValuePair> AllFixedOrderPairs; DenseMap<VPPair, unsigned> AllPairConnectionTypes; - std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps; + DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs, + AllConnectedPairDeps; do { std::vector<Value *> PairableInsts; - std::multimap<Value *, Value *> CandidatePairs; + DenseMap<Value *, std::vector<Value *> > CandidatePairs; DenseSet<ValuePair> FixedOrderPairs; DenseMap<ValuePair, int> CandidatePairCostSavings; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, @@ -711,6 +709,14 @@ namespace { PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; + // Build the candidate pair set for faster lookups. + DenseSet<ValuePair> CandidatePairsSet; + for (DenseMap<Value *, std::vector<Value *> >::iterator I = + CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I) + for (std::vector<Value *>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + CandidatePairsSet.insert(ValuePair(I->first, *J)); + // Now we have a map of all of the pairable instructions and we need to // select the best possible pairing. A good pairing is one such that the // users of the pair are also paired. This defines a (directed) forest @@ -720,30 +726,33 @@ namespace { // Note that it only matters that both members of the second pair use some // element of the first pair (to allow for splatting). - std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps; + DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs, + ConnectedPairDeps; DenseMap<VPPair, unsigned> PairConnectionTypes; - computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs, - PairConnectionTypes); + computeConnectedPairs(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, PairConnectionTypes); if (ConnectedPairs.empty()) continue; - for (std::multimap<ValuePair, ValuePair>::iterator + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - ConnectedPairDeps.insert(VPPair(I->second, I->first)); - } + I != IE; ++I) + for (std::vector<ValuePair>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + ConnectedPairDeps[*J].push_back(I->first); // Build the pairable-instruction dependency map DenseSet<ValuePair> PairableInstUsers; buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); // There is now a graph of the connected pairs. For each variable, pick - // the pairing with the largest tree meeting the depth requirement on at - // least one branch. Then select all pairings that are part of that tree + // the pairing with the largest dag meeting the depth requirement on at + // least one branch. Then select all pairings that are part of that dag // and remove them from the list of available pairings and pairable // variables. DenseMap<Value *, Value *> ChosenPairs; - choosePairs(CandidatePairs, CandidatePairCostSavings, + choosePairs(CandidatePairs, CandidatePairsSet, + CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, PairableInstUsers, ChosenPairs); @@ -777,14 +786,15 @@ namespace { } } - for (std::multimap<ValuePair, ValuePair>::iterator + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - if (AllPairConnectionTypes.count(*I)) { - AllConnectedPairs.insert(*I); - AllConnectedPairDeps.insert(VPPair(I->second, I->first)); - } - } + I != IE; ++I) + for (std::vector<ValuePair>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + if (AllPairConnectionTypes.count(VPPair(I->first, *J))) { + AllConnectedPairs[I->first].push_back(*J); + AllConnectedPairDeps[*J].push_back(I->first); + } } while (ShouldContinue); if (AllChosenPairs.empty()) return false; @@ -910,7 +920,7 @@ namespace { // This function returns true if the two provided instructions are compatible // (meaning that they can be fused into a vector instruction). This assumes // that I has already been determined to be vectorizable and that J is not - // in the use tree of I. + // in the use dag of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool IsSimpleLoadStore, bool NonPow2Len, int &CostSavings, int &FixedOrder) { @@ -972,6 +982,11 @@ namespace { unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, BottomAlignment, IAddressSpace); + + ICost += TTI->getAddressComputationCost(aTypeI); + JCost += TTI->getAddressComputationCost(aTypeJ); + VCost += TTI->getAddressComputationCost(VType); + if (VCost > ICost + JCost) return false; @@ -994,6 +1009,12 @@ namespace { unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); Type *VT1 = getVecTypeForPair(IT1, JT1), *VT2 = getVecTypeForPair(IT2, JT2); + + // Note that this procedure is incorrect for insert and extract element + // instructions (because combining these often results in a shuffle), + // but this cost is ignored (because insert and extract element + // instructions are assigned a zero depth factor and are not really + // fused in general). unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); if (VCost > ICost + JCost) @@ -1090,7 +1111,7 @@ namespace { // to contain any memory locations to which J writes. The function returns // true if J uses I. By default, alias analysis is used to determine // whether J reads from memory that overlaps with a location in WriteSet. - // If LoadMoveSet is not null, then it is a previously-computed multimap + // If LoadMoveSet is not null, then it is a previously-computed map // where the key is the memory-based user instruction and the value is // the instruction to be compared with I. So, if LoadMoveSet is provided, // then the alias analysis is not used. This is necessary because this @@ -1100,7 +1121,7 @@ namespace { bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers, - std::multimap<Value *, Value *> *LoadMoveSet) { + DenseSet<ValuePair> *LoadMoveSetPairs) { bool UsesI = false; // This instruction may already be marked as a user due, for example, to @@ -1118,9 +1139,8 @@ namespace { } } if (!UsesI && J->mayReadFromMemory()) { - if (LoadMoveSet) { - VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); - UsesI = isSecondInIteratorPair<Value*>(I, JPairRange); + if (LoadMoveSetPairs) { + UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); } else { for (AliasSetTracker::iterator W = WriteSet.begin(), WE = WriteSet.end(); W != WE; ++W) { @@ -1144,10 +1164,11 @@ namespace { // basic block and collects all candidate pairs for vectorization. bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, DenseSet<ValuePair> &FixedOrderPairs, DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len) { + size_t TotalPairs = 0; BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -1193,7 +1214,8 @@ namespace { PairableInsts.push_back(I); } - CandidatePairs.insert(ValuePair(I, J)); + CandidatePairs[I].push_back(J); + ++TotalPairs; if (TTI) CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CostSavings)); @@ -1217,7 +1239,8 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= Config.MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts || + TotalPairs >= Config.MaxPairs) { ShouldContinue = true; break; } @@ -1237,11 +1260,12 @@ namespace { // it looks for pairs such that both members have an input which is an // output of PI or PJ. void BBVectorize::computePairsConnectedTo( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - ValuePair P) { + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + ValuePair P) { StoreInst *SI, *SJ; // For each possible pairing for this variable, look at the uses of @@ -1259,8 +1283,6 @@ namespace { continue; } - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), @@ -1269,19 +1291,17 @@ namespace { P.second == SJ->getPointerOperand()) continue; - VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); - // Look for <I, J>: - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); } // Look for <J, I>: - if (isSecondInIteratorPair<Value*>(*I, JPairRange)) { + if (CandidatePairsSet.count(ValuePair(*J, *I))) { VPPair VP(P, ValuePair(*J, *I)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); } } @@ -1294,9 +1314,9 @@ namespace { P.first == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1313,16 +1333,14 @@ namespace { P.second == SI->getPointerOperand()) continue; - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { if ((SJ = dyn_cast<StoreInst>(*J)) && P.second == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1333,55 +1351,73 @@ namespace { // connected if some output of the first pair forms an input to both members // of the second pair. void BBVectorize::computeConnectedPairs( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes) { - + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes) { for (std::vector<Value *>::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { - VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); + DenseMap<Value *, std::vector<Value *> >::iterator PP = + CandidatePairs.find(*PI); + if (PP == CandidatePairs.end()) + continue; - for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; - P != choiceRange.second; ++P) - computePairsConnectedTo(CandidatePairs, PairableInsts, - ConnectedPairs, PairConnectionTypes, *P); + for (std::vector<Value *>::iterator P = PP->second.begin(), + E = PP->second.end(); P != E; ++P) + computePairsConnectedTo(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, + PairConnectionTypes, ValuePair(*PI, *P)); } - DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() + DEBUG(size_t TotalPairs = 0; + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = + ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I) + TotalPairs += I->second.size(); + dbgs() << "BBV: found " << TotalPairs << " pair connections.\n"); } // This function builds a set of use tuples such that <A, B> is in the set - // if B is in the use tree of A. If B is in the use tree of A, then B + // if B is in the use dag of A. If B is in the use dag of A, then B // depends on the output of A. void BBVectorize::buildDepMap( BasicBlock &BB, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, std::vector<Value *> &PairableInsts, DenseSet<ValuePair> &PairableInstUsers) { DenseSet<Value *> IsInPair; - for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), - E = CandidatePairs.end(); C != E; ++C) { + for (DenseMap<Value *, std::vector<Value *> >::iterator C = + CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) { IsInPair.insert(C->first); - IsInPair.insert(C->second); + IsInPair.insert(C->second.begin(), C->second.end()); } - // Iterate through the basic block, recording all Users of each + // Iterate through the basic block, recording all users of each // pairable instruction. - BasicBlock::iterator E = BB.end(); + BasicBlock::iterator E = BB.end(), EL = + BasicBlock::iterator(cast<Instruction>(PairableInsts.back())); for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { if (IsInPair.find(I) == IsInPair.end()) continue; DenseSet<Value *> Users; AliasSetTracker WriteSet(*AA); - for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) + for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) { (void) trackUsesOfI(Users, WriteSet, I, J); + if (J == EL) + break; + } + for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); - U != E; ++U) + U != E; ++U) { + if (IsInPair.find(*U) == IsInPair.end()) continue; PairableInstUsers.insert(ValuePair(I, *U)); + } + + if (I == EL) + break; } } @@ -1389,8 +1425,9 @@ namespace { // input of pair Q is an output of pair P. If this is the case, then these // two pairs cannot be simultaneously fused. bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> *PairableInstUserMap) { + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap, + DenseSet<VPPair> *PairableInstUserPairSet) { // Two pairs are in conflict if they are mutual Users of eachother. bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || PairableInstUsers.count(ValuePair(P.first, Q.second)) || @@ -1403,17 +1440,14 @@ namespace { if (PairableInstUserMap) { // FIXME: The expensive part of the cycle check is not so much the cycle // check itself but this edge insertion procedure. This needs some - // profiling and probably a different data structure (same is true of - // most uses of std::multimap). + // profiling and probably a different data structure. if (PUsesQ) { - VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); - if (!isSecondInIteratorPair(P, QPairRange)) - PairableInstUserMap->insert(VPPair(Q, P)); + if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) + (*PairableInstUserMap)[Q].push_back(P); } if (QUsesP) { - VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); - if (!isSecondInIteratorPair(Q, PPairRange)) - PairableInstUserMap->insert(VPPair(P, Q)); + if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) + (*PairableInstUserMap)[P].push_back(Q); } } @@ -1423,8 +1457,8 @@ namespace { // This function walks the use graph of current pairs to see if, starting // from P, the walk returns to P. bool BBVectorize::pairWillFormCycle(ValuePair P, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseSet<ValuePair> &CurrentPairs) { + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<ValuePair> &CurrentPairs) { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " << *P.second << "\n"); @@ -1441,36 +1475,41 @@ namespace { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " << *QTop.second << "\n"); - VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); - for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; - C != QPairRange.second; ++C) { - if (C->second == P) { + DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = + PairableInstUserMap.find(QTop); + if (QQ == PairableInstUserMap.end()) + continue; + + for (std::vector<ValuePair>::iterator C = QQ->second.begin(), + CE = QQ->second.end(); C != CE; ++C) { + if (*C == P) { DEBUG(dbgs() << "BBV: rejected to prevent non-trivial cycle formation: " - << *C->first.first << " <-> " << *C->first.second << "\n"); + << QTop.first << " <-> " << C->second << "\n"); return true; } - if (CurrentPairs.count(C->second) && !Visited.count(C->second)) - Q.push_back(C->second); + if (CurrentPairs.count(*C) && !Visited.count(*C)) + Q.push_back(*C); } } while (!Q.empty()); return false; } - // This function builds the initial tree of connected pairs with the + // This function builds the initial dag of connected pairs with the // pair J at the root. - void BBVectorize::buildInitialTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, ValuePair J) { - // Each of these pairs is viewed as the root node of a Tree. The Tree + void BBVectorize::buildInitialDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, ValuePair J) { + // Each of these pairs is viewed as the root node of a DAG. The DAG // is then walked (depth-first). As this happens, we keep track of - // the pairs that compose the Tree and the maximum depth of the Tree. + // the pairs that compose the DAG and the maximum depth of the DAG. |