aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp2
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp2
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp312
-rw-r--r--lib/Transforms/Utils/Local.cpp38
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp509
7 files changed, 727 insertions, 143 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 1e6586bf0d..0f9dfddf57 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -55,7 +55,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
OS << ']';
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 30d60be277..ac18b7d5a0 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -221,7 +221,7 @@ static bool reuseOrInsertFastDiv(Function &F,
// be profitably bypassed and carried out with a shorter, faster divide.
bool llvm::bypassSlowDivision(Function &F,
Function::iterator &I,
- const DenseMap<Type *, Type *> &BypassTypeMap) {
+ const DenseMap<Type*, Type*> &BypassTypeMap) {
DivCacheTy DivCache;
bool MadeChange = false;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index aace75b745..c3f72b13af 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMTransformUtils
DemoteRegToStack.cpp
InlineFunction.cpp
InstructionNamer.cpp
+ IntegerDivision.cpp
LCSSA.cpp
Local.cpp
LoopSimplify.cpp
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
new file mode 100644
index 0000000000..9d630349ab
--- /dev/null
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -0,0 +1,312 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit scalar integer division for
+// targets that don't have native support. It's largely derived from
+// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
+// amount of control flow
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "integer-division"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing at the sdiv
+/// instruction. This will generate a udiv in the process, and Builder's insert
+/// point will be pointing at the udiv (if present, i.e. not folded), ready to
+/// be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Implementation taken from compiler-rt's __divsi3
+
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %tmp = ashr i32 %dividend, 31
+ // ; %tmp1 = ashr i32 %divisor, 31
+ // ; %tmp2 = xor i32 %tmp, %dividend
+ // ; %u_dvnd = sub nsw i32 %tmp2, %tmp
+ // ; %tmp3 = xor i32 %tmp1, %divisor
+ // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1
+ // ; %q_sgn = xor i32 %tmp1, %tmp
+ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
+ // ; %tmp4 = xor i32 %q_mag, %q_sgn
+ // ; %q = sub i32 %tmp4, %q_sgn
+ Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
+ Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+ Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
+ Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+ Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp);
+ Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+ Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn);
+ Value *Q = Builder.CreateSub(Tmp4, Q_Sgn);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+ Builder.SetInsertPoint(UDiv);
+
+ return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
+/// quotient, rounded towards 0. Builder's insert point should be pointing at
+/// the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // The basic algorithm can be found in the compiler-rt project's
+ // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+ // that's been hand-tuned to lessen the amount of control flow involved.
+
+ // Some helper values
+ IntegerType *I32Ty = Builder.getInt32Ty();
+
+ ConstantInt *Zero = Builder.getInt32(0);
+ ConstantInt *One = Builder.getInt32(1);
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+ ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
+ ConstantInt *True = Builder.getTrue();
+
+ BasicBlock *IBB = Builder.GetInsertBlock();
+ Function *F = IBB->getParent();
+ Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ I32Ty);
+
+ // Our CFG is going to look like:
+ // +---------------------+
+ // | special-cases |
+ // | ... |
+ // +---------------------+
+ // | |
+ // | +----------+
+ // | | bb1 |
+ // | | ... |
+ // | +----------+
+ // | | |
+ // | | +------------+
+ // | | | preheader |
+ // | | | ... |
+ // | | +------------+
+ // | | |
+ // | | | +---+
+ // | | | | |
+ // | | +------------+ |
+ // | | | do-while | |
+ // | | | ... | |
+ // | | +------------+ |
+ // | | | | |
+ // | +-----------+ +---+
+ // | | loop-exit |
+ // | | ... |
+ // | +-----------+
+ // | |
+ // +-------+
+ // | ... |
+ // | end |
+ // +-------+
+ BasicBlock *SpecialCases = Builder.GetInsertBlock();
+ SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+ BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+ "udiv-end");
+ BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(),
+ "udiv-loop-exit", F, End);
+ BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(),
+ "udiv-do-while", F, End);
+ BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+ "udiv-preheader", F, End);
+ BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(),
+ "udiv-bb1", F, End);
+
+ // We'll be overwriting the terminator to insert our extra blocks
+ SpecialCases->getTerminator()->eraseFromParent();
+
+ // First off, check for special cases: dividend or divisor is zero, divisor
+ // is greater than dividend, and divisor is 1.
+ // ; special-cases:
+ // ; %ret0_1 = icmp eq i32 %divisor, 0
+ // ; %ret0_2 = icmp eq i32 %dividend, 0
+ // ; %ret0_3 = or i1 %ret0_1, %ret0_2
+ // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+ // ; %sr = sub nsw i32 %tmp0, %tmp1
+ // ; %ret0_4 = icmp ugt i32 %sr, 31
+ // ; %ret0 = or i1 %ret0_3, %ret0_4
+ // ; %retDividend = icmp eq i32 %sr, 31
+ // ; %retVal = select i1 %ret0, i32 0, i32 %dividend
+ // ; %earlyRet = or i1 %ret0, %retDividend
+ // ; br i1 %earlyRet, label %end, label %bb1
+ Builder.SetInsertPoint(SpecialCases);
+ Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
+ Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
+ Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
+ Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *SR = Builder.CreateSub(Tmp0, Tmp1);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
+ Builder.CreateCondBr(EarlyRet, End, BB1);
+
+ // ; bb1: ; preds = %special-cases
+ // ; %sr_1 = add i32 %sr, 1
+ // ; %tmp2 = sub i32 31, %sr
+ // ; %q = shl i32 %dividend, %tmp2
+ // ; %skipLoop = icmp eq i32 %sr_1, 0
+ // ; br i1 %skipLoop, label %loop-exit, label %preheader
+ Builder.SetInsertPoint(BB1);
+ Value *SR_1 = Builder.CreateAdd(SR, One);
+ Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+ Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+ // ; preheader: ; preds = %bb1
+ // ; %tmp3 = lshr i32 %dividend, %sr_1
+ // ; %tmp4 = add i32 %divisor, -1
+ // ; br label %do-while
+ Builder.SetInsertPoint(Preheader);
+ Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+ Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+ Builder.CreateBr(DoWhile);
+
+ // ; do-while: ; preds = %do-while, %preheader
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ // ; %tmp5 = shl i32 %r_1, 1
+ // ; %tmp6 = lshr i32 %q_2, 31
+ // ; %tmp7 = or i32 %tmp5, %tmp6
+ // ; %tmp8 = shl i32 %q_2, 1
+ // ; %q_1 = or i32 %carry_1, %tmp8
+ // ; %tmp9 = sub i32 %tmp4, %tmp7
+ // ; %tmp10 = ashr i32 %tmp9, 31
+ // ; %carry = and i32 %tmp10, 1
+ // ; %tmp11 = and i32 %tmp10, %divisor
+ // ; %r = sub i32 %tmp7, %tmp11
+ // ; %sr_2 = add i32 %sr_3, -1
+ // ; %tmp12 = icmp eq i32 %sr_2, 0
+ // ; br i1 %tmp12, label %loop-exit, label %do-while
+ Builder.SetInsertPoint(DoWhile);
+ PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp5 = Builder.CreateShl(R_1, One);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
+ Value *Tmp8 = Builder.CreateShl(Q_2, One);
+ Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
+ Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Carry = Builder.CreateAnd(Tmp10, One);
+ Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+ Value *R = Builder.CreateSub(Tmp7, Tmp11);
+ Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
+ Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+ Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+ // ; loop-exit: ; preds = %do-while, %bb1
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ // ; %tmp13 = shl i32 %q_3, 1
+ // ; %q_4 = or i32 %carry_2, %tmp13
+ // ; br label %end
+ Builder.SetInsertPoint(LoopExit);
+ PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ Value *Tmp13 = Builder.CreateShl(Q_3, One);
+ Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
+ Builder.CreateBr(End);
+
+ // ; end: ; preds = %loop-exit, %special-cases
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ // ; ret i32 %q_5
+ Builder.SetInsertPoint(End, End->begin());
+ PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+
+ // Populate the Phis, since all values have now been created. Our Phis were:
+ // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+ Carry_1->addIncoming(Zero, Preheader);
+ Carry_1->addIncoming(Carry, DoWhile);
+ // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+ SR_3->addIncoming(SR_1, Preheader);
+ SR_3->addIncoming(SR_2, DoWhile);
+ // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+ R_1->addIncoming(Tmp3, Preheader);
+ R_1->addIncoming(R, DoWhile);
+ // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+ Q_2->addIncoming(Q, Preheader);
+ Q_2->addIncoming(Q_1, DoWhile);
+ // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+ Carry_2->addIncoming(Zero, BB1);
+ Carry_2->addIncoming(Carry, DoWhile);
+ // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+ Q_3->addIncoming(Q, BB1);
+ Q_3->addIncoming(Q_1, DoWhile);
+ // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+ Q_5->addIncoming(Q_4, LoopExit);
+ Q_5->addIncoming(RetVal, SpecialCases);
+
+ return Q_5;
+}
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Currently only
+/// implements 32bit scalar division, but future work is removing this
+/// limitation.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ IRBuilder<> Builder(Div);
+
+ if (Div->getType()->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ // First prepare the sign if it's a signed division
+ if (Div->getOpcode() == Instruction::SDiv) {
+ // Lower the code to unsigned division, and reset Div to point to the udiv.
+ Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1), Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::UDiv)
+ return true;
+
+ Div = BO;
+ }
+
+ // Insert the unsigned division code
+ Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+ Div->getOperand(1),
+ Builder);
+ Div->replaceAllUsesWith(Quotient);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return true;
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0601433565..2d89516c43 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Intrinsics.h"
+#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
#include "llvm/Operator.h"
#include "llvm/ADT/DenseMap.h"
@@ -122,6 +123,27 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
if (i.getCaseSuccessor() == DefaultDest) {
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ // MD should have 2 + NumCases operands.
+ if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+ // Merge weight of this case to the default weight.
+ unsigned idx = i.getCaseIndex();
+ Weights[0] += Weights[idx+1];
+ // Remove weight for this case.
+ std::swap(Weights[idx+1], Weights.back());
+ Weights.pop_back();
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(Weights));
+ }
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
SI->removeCase(i);
@@ -178,8 +200,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
"cond");
// Insert the new branch.
- Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
+ }
// Delete the old switch.
SI->eraseFromParent();
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 60f031e16f..233bc12d3c 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -13,15 +13,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
#include "llvm/TypeFinder.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 142f157c3c..876ff2c337 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -54,8 +54,13 @@ static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -667,13 +672,32 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
+ if (HasWeight)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
@@ -830,18 +854,24 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
bool PredHasWeights = HasBranchWeights(PTI);
bool SuccHasWeights = HasBranchWeights(TI);
- if (PredHasWeights)
+ if (PredHasWeights) {
GetBranchWeights(PTI, Weights);
- else if (SuccHasWeights)
+ // branch-weight metadata is inconsistant here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
// If there are no predecessor weights but there are successor weights,
// populate Weights with 1, which will later be scaled to the sum of
// successor's weights
Weights.assign(1 + PredCases.size(), 1);
SmallVector<uint64_t, 8> SuccWeights;
- if (SuccHasWeights)
+ if (SuccHasWeights) {
GetBranchWeights(TI, SuccWeights);
- else if (PredHasWeights)
+ // branch-weight metadata is inconsistant here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
SuccWeights.assign(1 + BBCases.size(), 1);
if (PredDefault == BB) {
@@ -898,18 +928,21 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Weights[0] *= SuccWeights[0];
}
} else {
- // FIXME: preserve branch weight metadata, similarly to the 'then'
- // above. For now, drop it.
- PredHasWeights = false;
- SuccHasWeights = false;
-
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == BB) {
PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
std::swap(PredCases[i], PredCases.back());
PredCases.pop_back();
--i; --e;
@@ -920,6 +953,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (PTIHandled.count(BBCases[i].Value)) {
// If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
@@ -930,6 +965,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
@@ -1124,6 +1161,171 @@ HoistTerminator:
return true;
}
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ SmallVector<BasicBlock*, 16> Preds(pred_begin(BBEnd), pred_end(BBEnd));
+ if (Preds.size() != 2)
+ return false;
+ BasicBlock *BB2 = (Preds[0] == BB1) ? Preds[1] : Preds[0];
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
/// (for now, restricted to a single instruction that's side effect free) from
@@ -1626,7 +1828,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
/// parameters and return true, or returns false if no or invalid metadata was
/// found.
static bool ExtractBranchMetadata(BranchInst *BI,
- APInt &ProbTrue, APInt &ProbFalse) {
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
assert(BI->isConditional() &&
"Looking for probabilities on unconditional branch?");
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
@@ -1634,35 +1836,11 @@ static bool ExtractBranchMetadata(BranchInst *BI,
ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
if (!CITrue || !CIFalse) return false;
- ProbTrue = CITrue->getValue();
- ProbFalse = CIFalse->getValue();
- assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
- "Branch probability metadata must be 32-bit integers");
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
return true;
}
-/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
-/// the event of overflow, logically-shifts all four inputs right until the
-/// multiply fits.
-static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
- unsigned &BitsLost) {
- BitsLost = 0;
- bool Overflow = false;
- APInt Result = A.umul_ov(B, Overflow);
- if (Overflow) {
- APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
- do {
- B = B.lshr(1);
- ++BitsLost;
- } while (B.ugt(MaxB));
- A = A.lshr(BitsLost);
- C = C.lshr(BitsLost);
- D = D.lshr(BitsLost);
- Result = A * B;
- }
- return Result;
-}
-
/// checkCSEInPredecessor - Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
///
@@ -1788,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
continue;
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc;
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
bool InvertPredCond = false;
if (BI->isConditional()) {
@@ -1887,14 +2065,53 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
New, "or.cond"));
PBI->setCondition(NewCond);
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(TrueDest, PredBlock, BB);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(FalseDest, PredBlock, BB);
PBI->setSuccessor(1, FalseDest);
}
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVM