diff options
Diffstat (limited to 'lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 509 |
1 files changed, 373 insertions, 136 deletions
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 142f157c3c..876ff2c337 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -54,8 +54,13 @@ static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); +static cl::opt<bool> +SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -667,13 +672,32 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } } + if (HasWeight) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(Weights)); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -830,18 +854,24 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, bool PredHasWeights = HasBranchWeights(PTI); bool SuccHasWeights = HasBranchWeights(TI); - if (PredHasWeights) + if (PredHasWeights) { GetBranchWeights(PTI, Weights); - else if (SuccHasWeights) + // branch-weight metadata is inconsistant here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) // If there are no predecessor weights but there are successor weights, // populate Weights with 1, which will later be scaled to the sum of // successor's weights Weights.assign(1 + PredCases.size(), 1); SmallVector<uint64_t, 8> SuccWeights; - if (SuccHasWeights) + if (SuccHasWeights) { GetBranchWeights(TI, SuccWeights); - else if (PredHasWeights) + // branch-weight metadata is inconsistant here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) SuccWeights.assign(1 + BBCases.size(), 1); if (PredDefault == BB) { @@ -898,18 +928,21 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Weights[0] *= SuccWeights[0]; } } else { - // FIXME: preserve branch weight metadata, similarly to the 'then' - // above. For now, drop it. - PredHasWeights = false; - SuccHasWeights = false; - // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt*, uint64_t> WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == BB) { PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; @@ -920,6 +953,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); PTIHandled.erase(BBCases[i].Value);// This constant is taken care of @@ -930,6 +965,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } @@ -1124,6 +1161,171 @@ HoistTerminator: return true; } +/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BB1 = BI1->getParent(); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // Check that BBEnd has two predecessors and the other predecessor ends with + // an unconditional branch. + SmallVector<BasicBlock*, 16> Preds(pred_begin(BBEnd), pred_end(BBEnd)); + if (Preds.size() != 2) + return false; + BasicBlock *BB2 = (Preds[0] == BB1) ? Preds[1] : Preds[0]; + BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator()); + if (!BI2 || !BI2->isUnconditional()) + return false; + + // Gather the PHI nodes in BBEnd. + std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; + Instruction *FirstNonPhiInBBEnd = 0; + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + } else { + FirstNonPhiInBBEnd = &*I; + break; + } + } + if (!FirstNonPhiInBBEnd) + return false; + + + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. We scan backward for obviously identical + // instructions in an identical order. + BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), + RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return false; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return false; + // Skip the unconditional branches. + ++RI1; + ++RI2; + + bool Changed = false; + while (RI1 != RE1 && RI2 != RE2) { + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return Changed; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return Changed; + + Instruction *I1 = &*RI1, *I2 = &*RI2; + // I1 and I2 should have a single use in the same PHI node, and they + // perform the same operation. + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I1) || isa<PHINode>(I2) || + isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || + isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) || + isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || + I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || + I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || + !I1->hasOneUse() || !I2->hasOneUse() || + MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || + MapValueFromBB1ToBB2[I1].first != I2) + return Changed; + + // Check whether we should swap the operands of ICmpInst. + ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); + bool SwapOpnds = false; + if (ICmp1 && ICmp2 && + ICmp1->getOperand(0) != ICmp2->getOperand(0) && + ICmp1->getOperand(1) != ICmp2->getOperand(1) && + (ICmp1->getOperand(0) == ICmp2->getOperand(1) || + ICmp1->getOperand(1) == ICmp2->getOperand(0))) { + ICmp2->swapOperands(); + SwapOpnds = true; + } + if (!I1->isSameOperationAs(I2)) { + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + + // The operands should be either the same or they need to be generated + // with a PHI node after sinking. We only handle the case where there is + // a single pair of different operands. + Value *DifferentOp1 = 0, *DifferentOp2 = 0; + unsigned Op1Idx = 0; + for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { + if (I1->getOperand(I) == I2->getOperand(I)) + continue; + // Early exit if we have more-than one pair of different operands or + // the different operand is already in MapValueFromBB1ToBB2. + // Early exit if we need a PHI node to replace a constant. + if (DifferentOp1 || + MapValueFromBB1ToBB2.find(I1->getOperand(I)) != + MapValueFromBB1ToBB2.end() || + isa<Constant>(I1->getOperand(I)) || + isa<Constant>(I2->getOperand(I))) { + // If we can't sink the instructions, undo the swapping. + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + DifferentOp1 = I1->getOperand(I); + Op1Idx = I; + DifferentOp2 = I2->getOperand(I); + } + + // We insert the pair of different operands to MapValueFromBB1ToBB2 and + // remove (I1, I2) from MapValueFromBB1ToBB2. + if (DifferentOp1) { + PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", + BBEnd->begin()); + MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + // I1 should use NewPN instead of DifferentOp1. + I1->setOperand(Op1Idx, NewPN); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } + PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; + MapValueFromBB1ToBB2.erase(I1); + + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); + DEBUG(dbgs() << " " << *I2 << "\n";); + // We need to update RE1 and RE2 if we are going to sink the first + // instruction in the basic block down. + bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + // Sink the instruction. + BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + if (!OldPN->use_empty()) + OldPN->replaceAllUsesWith(I1); + OldPN->eraseFromParent(); + + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + + if (UpdateRE1) + RE1 = BB1->getInstList().rend(); + if (UpdateRE2) + RE2 = BB2->getInstList().rend(); + FirstNonPhiInBBEnd = I1; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + /// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 /// and an BB2 and the only successor of BB1 is BB2, hoist simple code /// (for now, restricted to a single instruction that's side effect free) from @@ -1626,7 +1828,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, /// parameters and return true, or returns false if no or invalid metadata was /// found. static bool ExtractBranchMetadata(BranchInst *BI, - APInt &ProbTrue, APInt &ProbFalse) { + uint64_t &ProbTrue, uint64_t &ProbFalse) { assert(BI->isConditional() && "Looking for probabilities on unconditional branch?"); MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); @@ -1634,35 +1836,11 @@ static bool ExtractBranchMetadata(BranchInst *BI, ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); if (!CITrue || !CIFalse) return false; - ProbTrue = CITrue->getValue(); - ProbFalse = CIFalse->getValue(); - assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 && - "Branch probability metadata must be 32-bit integers"); + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); return true; } -/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In -/// the event of overflow, logically-shifts all four inputs right until the -/// multiply fits. -static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, - unsigned &BitsLost) { - BitsLost = 0; - bool Overflow = false; - APInt Result = A.umul_ov(B, Overflow); - if (Overflow) { - APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A); - do { - B = B.lshr(1); - ++BitsLost; - } while (B.ugt(MaxB)); - A = A.lshr(BitsLost); - C = C.lshr(BitsLost); - D = D.lshr(BitsLost); - Result = A * B; - } - return Result; -} - /// checkCSEInPredecessor - Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. /// @@ -1788,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { continue; // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; bool InvertPredCond = false; if (BI->isConditional()) { @@ -1887,14 +2065,53 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { New, "or.cond")); PBI->setCondition(NewCond); + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + SmallVector<uint64_t, 8> NewWeights; + if (PBI->getSuccessor(0) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + } AddPredecessorToBlock(TrueDest, PredBlock, BB); PBI->setSuccessor(0, TrueDest); } if (PBI->getSuccessor(1) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, NULL); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { @@ -1949,90 +2166,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // TODO: If BB is reachable from all paths through PredBlock, then we // could replace PBI's branch probabilities with BI's. - // Merge probability data into PredBlock's branch. - APInt A, B, C, D; - if (PBI->isConditional() && BI->isConditional() && - ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { - // Given IR which does: - // bbA: - // br i1 %x, label %bbB, label %bbC - // bbB: - // br i1 %y, label %bbD, label %bbC - // Let's call the probability that we take the edge from %bbA to %bbB - // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to - // %bbC probability 'd'. - // - // We transform the IR into: - // bbA: - // br i1 %z, label %bbD, label %bbC - // where the probability of going to %bbD is (a*c) and going to bbC is - // (b+a*d). - // - // Probabilities aren't stored as ratios directly. Using branch weights, - // we get: - // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D. - - // In the event of overflow, we want to drop the LSB of the input - // probabilities. - unsigned BitsLost; - - // Ignore overflow result on ProbTrue. - APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost); - - APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - } - - APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - } - - APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - Tmp2 = Tmp2.lshr(BitsLost*2); - } - - bool Overflow1 = false, Overflow2 = false; - APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1); - APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2); - - if (Overflow1 || Overflow2) { - ProbTrue = ProbTrue.lshr(1); - Tmp1 = Tmp1.lshr(1); - Tmp2 = Tmp2.lshr(1); - Tmp3 = Tmp3.lshr(1); - Tmp4 = Tmp2 + Tmp3; - ProbFalse = Tmp4 + Tmp1; - } - - // The sum of branch weights must fit in 32-bits. - if (ProbTrue.isNegative() && ProbFalse.isNegative()) { - ProbTrue = ProbTrue.lshr(1); - ProbFalse = ProbFalse.lshr(1); - } - - if (ProbTrue != ProbFalse) { - // Normalize the result. - APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); - ProbTrue = ProbTrue.udiv(GCD); - ProbFalse = ProbFalse.udiv(GCD); - - MDBuilder MDB(BI->getContext()); - MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(), - ProbFalse.getZExtValue()); - PBI->setMetadata(LLVMContext::MD_prof, N); - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - // Copy any debug value intrinsics into the end of PredBlock. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) @@ -2187,6 +2320,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + if (PredHasWeights && SuccHasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + SmallVector<uint64_t, 2> NewWeights; + NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon); + NewWeights.push_back(PredOther * SuccOther); + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); @@ -2223,7 +2383,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB){ + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight){ // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -2252,10 +2414,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else + else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. - Builder.CreateCondBr(Cond, TrueBB, FalseBB); + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()). + createBranchWeights(TrueWeight, FalseWeight)); + } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. @@ -2292,8 +2459,23 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). + getSuccessorIndex()]; + FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). + getSuccessorIndex()]; + } + } + // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, + TrueWeight, FalseWeight); } // SimplifyIndirectBrOnSelect - Replaces @@ -2313,7 +2495,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + 0, 0); } /// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp @@ -2412,6 +2595,21 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0]+1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(MDWeights)); + } + } SI->addCase(Cst, NewBB); // NewBB branches to the phi block, add the uncond branch and the phi entry. @@ -2825,9 +3023,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr( + BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + // Update weight for the newly-created conditional branch. + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Combine all weights for the cases to be the true weight of NewBI. + // We assume that the sum of all weights for a Terminator can fit into 32 + // bits. + uint32_t NewTrueWeight = 0; + for (unsigned I = 1, E = Weights.size(); I != E; ++I) + NewTrueWeight += (uint32_t)Weights[I]; + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(NewTrueWeight, + (uint32_t)Weights[0])); + } + } + // Prune obsolete incoming values off the successor's PHI nodes. for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa<PHINode>(BBI); ++BBI) { @@ -2858,15 +3075,33 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { } } + SmallVector<uint64_t, 8> Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); assert(Case != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + // Prune unused values from PHI nodes. Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } + if (HasWeight) { + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(MDWeights)); + } return !DeadCases.empty(); } @@ -3193,17 +3428,16 @@ static bool SwitchToLookupTable(SwitchInst *SI, "switch.gep"); Value *Result = Builder.CreateLoad(GEP, "switch.load"); - // If the result is only going to be used to return from the function, - // we want to do that right here. - if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) { - if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { - Builder.CreateRet(Result); - ReturnedEarly = true; - } + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) && + *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; } - if (!ReturnedEarly) - PHI->addIncoming(Result, LookupBB); + PHI->addIncoming(Result, LookupBB); } if (!ReturnedEarly) @@ -3306,6 +3540,9 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && |