aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp25
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h11
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp803
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp11
-rw-r--r--lib/CodeGen/InlineSpiller.cpp4
-rw-r--r--lib/CodeGen/LiveInterval.cpp221
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp89
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp31
-rw-r--r--lib/CodeGen/LiveRangeCalc.h9
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp49
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp76
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp80
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp20
-rw-r--r--lib/CodeGen/MachineCSE.cpp68
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp258
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp86
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp2
-rw-r--r--lib/CodeGen/MachineScheduler.cpp5
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp1153
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h341
-rw-r--r--lib/CodeGen/MachineVerifier.cpp668
-rw-r--r--lib/CodeGen/PHIElimination.cpp131
-rw-r--r--lib/CodeGen/Passes.cpp27
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp108
-rw-r--r--lib/CodeGen/RegAllocFast.cpp12
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp43
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp194
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp27
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp907
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp49
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp95
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp241
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp89
-rw-r--r--lib/CodeGen/SplitKit.cpp9
-rw-r--r--lib/CodeGen/StackProtector.cpp70
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp3
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp4
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp18
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp5
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp704
61 files changed, 5002 insertions, 1898 deletions
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index b60fda86a6..bf5d8c4880 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
ARMException::ARMException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
- {}
+ : DwarfException(A) {}
ARMException::~ARMException() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 84277604ef..64a68d3d9c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -218,16 +218,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
if ((GlobalValue::LinkageTypes)Linkage !=
- GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ GlobalValue::LinkOnceODRAutoHideLinkage)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 9d17767f3b..d30e5bbd8e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing dwarf compile unit.
+// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
@@ -419,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- if (Location.isReg()) {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
- } else {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
-
- addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
- }
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fa7fb1f8d0..abac337042 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1410,7 +1410,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- 0);
+ DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
}
}
@@ -1450,6 +1450,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIVariable DV(Variables.getElement(i));
if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
continue;
+ // Check that DbgVariable for DV wasn't created earlier, when
+ // findAbstractVariable() was called for inlined instance of DV.
+ LLVMContext &Ctx = DV->getContext();
+ DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
+ if (AbstractVariables.lookup(CleanDV))
+ continue;
if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
addScopeVariable(Scope, new DbgVariable(DV, NULL));
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b5f86ab1b9..75f6056c44 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -175,17 +175,6 @@ public:
};
class ARMException : public DwarfException {
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index a2ce7a004d..2e189ad7e7 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen
DeadMachineInstructionElim.cpp
DFAPacketizer.cpp
DwarfEHPrepare.cpp
+ EarlyIfConversion.cpp
EdgeBundles.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
@@ -60,6 +61,7 @@ add_llvm_library(LLVMCodeGen
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
+ MachineTraceMetrics.cpp
MachineVerifier.cpp
OcamlGC.cpp
OptimizePHIs.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 7b015d850d..fb2c2e83f1 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeCalculateSpillWeightsPass(Registry);
initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 0000000000..f9347efdb0
--- /dev/null
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,803 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-ifcvt"
+#include "MachineTraceMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ if (MO->isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+
+ // Remember clobbered regunits.
+ if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI))
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO->isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO->readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = 0;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = 0;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: "
+ << ((Value*)MF.getFunction())->getName() << '\n');
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (po_iterator<MachineDominatorTree*>
+ I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
+ if (tryConvertIf(I->getBlock()))
+ Changed = true;
+
+ MF.verify(this, "After early if-conversion");
+ return Changed;
+}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index b14afc286d..7a17331ba1 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
} else {
TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
}
DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 4c7f5d8c88..07e37af57f 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1081,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ // Some (out-of-tree) targets have EC reload instructions.
+ if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
+ if (MO->isEarlyClobber())
+ LoadIdx = LoadIdx.getRegSlot(true);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 8990360a72..0a795e644c 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
valnos.pop_back();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->setIsUnused(true);
+ ValNo->markUnused();
}
}
@@ -196,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
// If NewEnd was in the middle of an interval, make sure to get its endpoint.
I->end = std::max(NewEnd, prior(MergeTo)->end);
- // Erase any dead ranges.
- ranges.erase(llvm::next(I), MergeTo);
-
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
- Ranges::iterator Next = llvm::next(I);
- if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
- I->end = Next->end;
- ranges.erase(Next);
+ if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ I->end = MergeTo->end;
+ ++MergeTo;
}
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
}
@@ -381,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other,
const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
+ verify();
+
// Determine if any of our live range values are mapped. This is uncommon, so
// we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
@@ -448,14 +450,148 @@ void LiveInterval::join(LiveInterval &Other,
valnos.resize(NumNewVals); // shrinkify
// Okay, now insert the RHS live ranges into the LHS.
- iterator InsertPos = begin();
unsigned RangeNo = 0;
for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
// Map the valno in the other live range to the current live range.
I->valno = NewVNInfo[OtherAssignments[RangeNo]];
assert(I->valno && "Adding a dead range?");
- InsertPos = addRangeFrom(*I, InsertPos);
}
+ mergeIntervalRanges(Other);
+
+ verify();
+}
+
+/// \brief Helper function for merging in another LiveInterval's ranges.
+///
+/// This is a helper routine implementing an efficient merge of another
+/// LiveIntervals ranges into the current interval.
+///
+/// \param LHSValNo If non-NULL, set as the new value number for every range
+/// from RHS which is merged into the LHS.
+/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value
+/// number maches this value number will be merged into LHS.
+void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
+ VNInfo *LHSValNo,
+ const VNInfo *RHSValNo) {
+ if (RHS.empty())
+ return;
+
+ // Ensure we're starting with a valid range. Note that we don't verify RHS
+ // because it may have had its value numbers adjusted in preparation for
+ // merging.
+ verify();
+
+ // The strategy for merging these efficiently is as follows:
+ //
+ // 1) Find the beginning of the impacted ranges in the LHS.
+ // 2) Create a new, merged sub-squence of ranges merging from the position in
+ // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS
+ // entries being merged will be copied into this new range.
+ // 3) Replace the relevant section in LHS with these newly merged ranges.
+ // 4) Append any remaning ranges from RHS if LHS is exhausted in #2.
+ //
+ // We don't follow the typical in-place merge strategy for sorted ranges of
+ // appending the new ranges to the back and then using std::inplace_merge
+ // because one step of the merge can both mutate the original elements and
+ // remove elements from the original. Essentially, because the merge includes
+ // collapsing overlapping ranges, a more complex approach is required.
+
+ // We do an initial binary search to optimize for a common pattern: a large
+ // LHS, and a very small RHS.
+ const_iterator RI = RHS.begin(), RE = RHS.end();
+ iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI);
+
+ // Merge into NewRanges until one of the ranges is exhausted.
+ SmallVector<LiveRange, 4> NewRanges;
+
+ // Keep track of where to begin the replacement.
+ iterator ReplaceI = LI;
+
+ // If there are preceding ranges in the LHS, put the last one into NewRanges
+ // so we can optionally extend it. Adjust the replacement point accordingly.
+ if (LI != begin()) {
+ ReplaceI = llvm::prior(LI);
+ NewRanges.push_back(*ReplaceI);
+ }
+
+ // Now loop over the mergable portions of both LHS and RHS, merging into
+ // NewRanges.
+ while (LI != LE && RI != RE) {
+ // Skip incoming ranges with the wrong value.
+ if (RHSValNo && RI->valno != RHSValNo) {
+ ++RI;
+ continue;
+ }
+
+ // Select the first range. We pick the earliest start point, and then the
+ // largest range.
+ LiveRange R = *LI;
+ if (*RI < R) {
+ R = *RI;
+ ++RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ } else {
+ ++LI;
+ }
+
+ if (NewRanges.empty()) {
+ NewRanges.push_back(R);
+ continue;
+ }
+
+ LiveRange &LastR = NewRanges.back();
+ if (R.valno == LastR.valno) {
+ // Try to merge this range into the last one.
+ if (R.start <= LastR.end) {
+ LastR.end = std::max(LastR.end, R.end);
+ continue;
+ }
+ } else {
+ // We can't merge ranges across a value number.
+ assert(R.start >= LastR.end &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+
+ // If all else fails, just append the range.
+ NewRanges.push_back(R);
+ }
+ assert(RI == RE || LI == LE);
+
+ // Check for being able to merge into the trailing sequence of ranges on the LHS.
+ if (!NewRanges.empty())
+ for (; LI != LE && (LI->valno == NewRanges.back().valno &&
+ LI->start <= NewRanges.back().end);
+ ++LI)
+ NewRanges.back().end = std::max(NewRanges.back().end, LI->end);
+
+ // Replace the ranges in the LHS with the newly merged ones. It would be
+ // really nice if there were a move-supporting 'replace' directly in
+ // SmallVector, but as there is not, we pay the price of copies to avoid
+ // wasted memory allocations.
+ SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(),
+ NRE = NewRanges.end();
+ for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI)
+ *ReplaceI = *NRI;
+ if (NRI == NRE)
+ ranges.erase(ReplaceI, LI);
+ else
+ ranges.insert(LI, NRI, NRE);
+
+ // And finally insert any trailing end of RHS (if we have one).
+ for (; RI != RE; ++RI) {
+ LiveRange R = *RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ if (!ranges.empty() &&
+ ranges.back().valno == R.valno && R.start <= ranges.back().end)
+ ranges.back().end = std::max(ranges.back().end, R.end);
+ else
+ ranges.push_back(R);
+ }
+
+ // Ensure we finished with a valid new sequence of ranges.
+ verify();
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -464,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other,
/// the overlapping LiveRanges have the specified value number.
void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+ mergeIntervalRanges(RHS, LHSValNo);
}
-
/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
/// in RHS into this live interval as the specified value number.
/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
/// current interval, it will replace the value numbers of the overlaped
/// live ranges with the specified value number.
-void LiveInterval::MergeValueInAsValue(
- const LiveInterval &RHS,
- const VNInfo *RHSValNo, VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- if (I->valno != RHSValNo)
- continue;
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ mergeIntervalRanges(RHS, LHSValNo, RHSValNo);
}
-
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
/// LiveRanges with the V1 value number with the V2 value number. This can
@@ -549,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
- // Merge the relevant flags.
- V2->mergeFlags(V1);
-
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -575,6 +690,8 @@ void LiveInterval::Copy(const LiveInterval &RHS,
const LiveRange &LR = RHS.ranges[i];
addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
}
+
+ verify();
}
unsigned LiveInterval::getSize() const {
@@ -617,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const {
} else {
OS << vni->def;
if (vni->isPHIDef())
- OS << "-phidef";
- if (vni->hasPHIKill())
- OS << "-phikill";
+ OS << "-phi";
}
}
}
@@ -629,6 +744,23 @@ void LiveInterval::dump() const {
dbgs() << *this << "\n";
}
+#ifndef NDEBUG
+void LiveInterval::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != 0);
+ assert(I->valno == valnos[I->valno->id]);
+ if (llvm::next(I) != E) {
+ assert(I->end <= llvm::next(I)->start);
+ if (I->end == llvm::next(I)->start)
+ assert(I->valno != llvm::next(I)->valno);
+ }
+ }
+}
+#endif
+
void LiveRange::print(raw_ostream &os) const {
os << *this;
@@ -690,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
- if (MO.isUse() && MO.isUndef())
- continue;
// DBG_VALUE instructions should have been eliminated earlier.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = Idx.getRegSlot(MO.isUse());
- const VNInfo *VNI = LI.getVNInfoAt(Idx);
- // FIXME: We should be able to assert(VNI) here, but the coalescer leaves
- // dangling defs around.
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
if (!VNI)
continue;
MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 819707f59f..d0f8ae1af3 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,13 @@
#include <cmath>
using namespace llvm;
+// Switch to the new experimental algorithm for computing live intervals.
+static cl::opt<bool>
+NewLiveIntervals("new-live-intervals", cl::Hidden,
+ cl::desc("Use new algorithm forcomputing live intervals"));
+
char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
AllocatableRegs = TRI->getAllocatableSet(fn);
ReservedRegs = TRI->getReservedRegs(fn);
- computeIntervals();
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ if (NewLiveIntervals) {
+ // This is the new way of computing live intervals.
+ // It is independent of LiveVariables, and it can run at any time.
+ computeVirtRegs();
+ computeRegMasks();
+ } else {
+ // This is the old way of computing live intervals.
+ // It depends on LiveVariables.
+ computeIntervals();
+ }
computeLiveInRegUnits();
DEBUG(dump());
@@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// new valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
// live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
@@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
ValNo = interval.getNextValue(Start, VNInfoAllocator);
- ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
@@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
llvm_unreachable("Multiply defined register");
@@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Register Unit Liveness
//===----------------------------------------------------------------------===//
@@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
- VNI->setIsUnused(true);
+ VNI->markUnused();
NewLI.removeRange(*LII);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
CanSeparate = true;
@@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
return MBB1 == MBB2 ? MBB1 : NULL;
}
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
float
LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
// Limit the loop depth ridiculousness.
@@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
- VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 3e5d7f6c29..d828f25932 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
.getRegSlot(I.getOperand().isEarlyClobber());
// Create the def in LI. This may find an existing def.
- VNInfo *VNI = LI->createDeadDef(Idx, *Alloc);
- VNI->setIsPHIDef(MI->isPHI());
+ LI->createDeadDef(Idx, *Alloc);
}
}
@@ -95,7 +94,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
Idx = Idx.getRegSlot(true);
}
}
- extend(LI, Idx);
+ extend(LI, Idx, Reg);
}
}
@@ -129,7 +128,8 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) {
void LiveRangeCalc::extend(LiveInterval *LI,
- SlotIndex Kill) {
+ SlotIndex Kill,
+ unsigned PhysReg) {
assert(LI && "Missing live range");
assert(Kill.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
@@ -146,7 +146,7 @@ void LiveRangeCalc::extend(LiveInterval *LI,
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill);
+ VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg);
// When there were multiple different values, we may need new PHIs.
if (!VNI)
@@ -169,7 +169,8 @@ void LiveRangeCalc::calculateValues() {
VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
- SlotIndex Kill) {
+ SlotIndex Kill,
+ unsigned PhysReg) {
// Blocks where LI should be live-in.
SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
@@ -180,7 +181,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
MachineBasicBlock *MBB = WorkList[i];
- assert(!MBB->pred_empty() && "Value live-in to entry block?");
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
MachineBasicBlock *Pred = *PI;
@@ -303,7 +319,6 @@ void LiveRangeCalc::updateSSA() {
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
- VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
I->DomNode = 0;
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 3c9e40f0fe..909829b228 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -105,9 +105,12 @@ class LiveRangeCalc {
/// to be live-in are added to LiveIn. If a unique reaching def is found,
/// its value is returned, if Kill is jointly dominated by multiple values,
/// NULL is returned.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
VNInfo *findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
- SlotIndex Kill);
+ SlotIndex Kill,
+ unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
@@ -157,7 +160,9 @@ public:
/// Kill is not dominated by a single existing value, PHI-defs are inserted
/// as required to preserve SSA form. If Kill is known to be dominated by a
/// single existing value, Alloc may be null.
- void extend(LiveInterval *LI, SlotIndex Kill);
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
/// createDeadDefs - Create a dead def in LI for every def operand of Reg.
/// Each instruction defining Reg gets a new VNInfo with a corresponding
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 261d860e01..b4ce9aa8c1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -177,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (!DefMI || !UseMI)
return false;
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI,
+ LIS.getInstructionIndex(DefMI),
+ LIS.getInstructionIndex(UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ return false;
+
DEBUG(dbgs() << "Try to fold single def: " << *DefMI
<< " into single use: " << *UseMI);
@@ -226,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -233,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (!MOI->isReg())
continue;
unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ ReadsPhysRegs = true;
continue;
+ }
LiveInterval &LI = LIS.getInterval(Reg);
// Shrink read registers, unless it is likely to be expensive and
@@ -258,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
}
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
// Erase any virtregs that are now empty and unused. There may be <undef>
// uses around. Keep the empty live range in that case.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index fa971e7c3a..a7c9f46f4d 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() != 0 && "machine instruction not in a basic block");
// Remove from the use/def lists.
- N->RemoveRegOperandsFromUseLists();
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(0);
@@ -311,8 +312,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
if (!succ_empty()) {
if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
- for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
OS << '\n';
}
}
@@ -478,18 +482,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) {
void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
MachineBasicBlock *New) {
- uint32_t weight = 0;
- succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+ if (Old == New)
+ return;
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(SI);
- weight = *WI;
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
}
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
- // Update the successor information.
- removeSuccessor(SI);
- addSuccessor(New, weight);
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
}
void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
@@ -508,14 +536,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t weight = 0;
-
+ uint32_t Weight = 0;
// If Weight list is empty it means we don't use it (disabled optimization).
if (!fromMBB->Weights.empty())
- weight = *fromMBB->Weights.begin();
+ Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, weight);
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
}
}
@@ -527,7 +554,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- addSuccessor(Succ);
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
@@ -541,9 +571,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
}
}
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
- return I != Successors.end();
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
}
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -674,7 +707,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Inherit live-ins from the successor
for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
- E = Succ->livein_end(); I != E; ++I)
+ E = Succ->livein_end(); I != E; ++I)
NMBB->addLiveIn(*I);
// Update LiveVariables.
@@ -910,12 +943,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
if (Weights.empty())
return 0;
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- return *getWeightIterator(I);
+ return *getWeightIterator(Succ);
}
/// getWeightIterator - Return wight iterator corresonding to the I successor
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5a15f92a18..c4dca2cd15 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
PrevBB->updateTerminator();
+ }
}
// Fixup the last block.
@@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
- // on the loop info here so that we can align backedges in unnatural CFGs and
- // backedges that were introduced purely because of the loop rotations done
- // during this layout pass.
- // FIXME: This isn't quite right, we shouldn't align backedges that result
- // from blocks being sunken below the exit block for the function.
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
- SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
- for (BlockChain::iterator BI = FunctionChain.begin(),
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
BE = FunctionChain.end();
BI != BE; ++BI) {
- PreviousBlocks.insert(*BI);
- // Set alignment on the destination of all the back edges in the new
- // ordering.
- for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
- SE = (*BI)->succ_end();
- SI != SE; ++SI)
- if (PreviousBlocks.count(*SI))
- (*SI)->setAlignment(Align);
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, othe predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
}
}
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0795..447921147f 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight;
}
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Sum = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight / Scale;
}
assert(Sum <= UINT32_MAX);
return Sum;
}
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
return Weight;
}
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
MachineBasicBlock *MaxSucc = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
MaxSucc = *I;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index f2539baabd..896461fd19 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -84,7 +84,7 @@ namespace {
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) const ;
+ MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
@@ -100,8 +100,7 @@ namespace {
void ExitScope(MachineBasicBlock *MBB);
bool ProcessBlock(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
};
} // end anonymous namespace
@@ -216,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (MO.isDef() &&
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- PhysRefs.insert(*AI);
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
if (MO.isDef())
PhysDefs.push_back(Reg);
}
@@ -325,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
@@ -395,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -436,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
- SmallSet<unsigned,8> PhysRefs;
+ SmallSet<unsigned, 8> PhysRefs;
SmallVector<unsigned, 2> PhysDefs;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
@@ -464,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned OldReg = MO.getReg();
unsigned NewReg = CSMI->getOperand(i).getReg();
- if (OldReg == NewReg)
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
continue;
+ }
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -487,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// within the register class of the new instruction.
const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -502,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->clearKillFlags(CSEPairs[i].second);
}
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -521,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
Exps.push_back(MI);
}
CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
}
return Changed;
@@ -536,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
/// up the dominator tree to destroy ancestors which are now done.
void
MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
if (OpenChildren[Node])
return;
@@ -545,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
ExitScope(Node->getBlock());
// Now traverse upwards to pop ancestors whose offsprings are all done.
- while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
unsigned Left = --OpenChildren[Parent];
if (Left != 0)
break;
@@ -557,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
SmallVector<MachineDomTreeNode*, 32> Scopes;
SmallVector<MachineDomTreeNode*, 8> WorkList;
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
CurrVN = 0;
@@ -572,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
OpenChildren[Node] = NumChildren;
for (unsigned i = 0; i != NumChildren; ++i) {
MachineDomTreeNode *Child = Children[i];
- ParentMap[Child] = Node;
WorkList.push_back(Child);
}
} while (!WorkList.empty());
@@ -585,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
EnterScope(MBB);
Changed |= ProcessBlock(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
- ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ ExitScopeIfDone(Node, OpenChildren);
}
return Changed;
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index dbda93e4fd..0102ac708d 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Debug.h"
@@ -42,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) {
OS << "# " << Banner << ":\n";
- MF.print(OS);
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
return false;
}
};
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5d16c20460..b166849946 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -47,55 +47,6 @@ using namespace llvm;
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
-/// AddRegOperandToRegInfo - Add this register operand to the specified
-/// MachineRegisterInfo. If it is null, then the next/prev fields should be
-/// explicitly nulled out.
-void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
- assert(isReg() && "Can only add reg operand to use lists");
-
- // If the reginfo pointer is null, just explicitly null out or next/prev
- // pointers, to ensure they are not garbage.
- if (RegInfo == 0) {
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
- return;
- }
-
- // Otherwise, add this operand to the head of the registers use/def list.
- MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
- // For SSA values, we prefer to keep the definition at the start of the list.
- // we do this by skipping over the definition if it is at the head of the
- // list.
- if (*Head && (*Head)->isDef())
- Head = &(*Head)->Contents.Reg.Next;
-
- Contents.Reg.Next = *Head;
- if (Contents.Reg.Next) {
- assert(getReg() == Contents.Reg.Next->getReg() &&
- "Different regs on the same list!");
- Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
- }
-
- Contents.Reg.Prev = Head;
- *Head = this;
-}
-
-/// RemoveRegOperandFromRegInfo - Remove this register operand from the
-/// MachineRegisterInfo it is linked with.
-void MachineOperand::RemoveRegOperandFromRegInfo() {
- assert(isOnRegUseList() && "Reg operand is not on a use list");
- // Unlink this from the doubly linked list of operands.
- MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
- if (NextOp) {
- assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
- NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
- }
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
-}
-
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
@@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineInstr *MI = getParent())
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
- RemoveRegOperandFromRegInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
SmallContents.RegNo = Reg;
- AddRegOperandToRegInfo(&MF->getRegInfo());
+ MRI.addRegOperandToUseList(this);
return;
}
@@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
setReg(Reg);
}
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
- if (isReg() && getParent() && getParent()->getParent() &&
- getParent()->getParent()->getParent())
- RemoveRegOperandFromRegInfo();
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
@@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
// register's use/def lists.
- if (isReg()) {
- assert(!isEarlyClobber());
- setReg(Reg);
- } else {
- // Otherwise, change this to a register and set the reg#.
- OpKind = MO_Register;
- SmallContents.RegNo = Reg;
-
- // If this operand is embedded in a function, add the operand to the
- // register's use/def list.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- AddRegOperandToRegInfo(&MF->getRegInfo());
- }
+ if (RegInfo && isReg())
+ RegInfo->removeRegOperandFromUseList(this);
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg = 0;
IsDef = isDef;
IsImp = isImp;
IsKill = isKill;
@@ -182,11 +151,18 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
- SubReg = 0;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
}
/// isIdenticalTo - Return true if this operand is identical to the specified
-/// operand.
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
if (getType() != Other.getType() ||
getTargetFlags() != Other.getTargetFlags())
@@ -207,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_FrameIndex:
return getIndex() == Other.getIndex();
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
case MachineOperand::MO_JumpTableIndex:
return getIndex() == Other.getIndex();
@@ -227,6 +204,47 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
llvm_unreachable("Invalid machine operand type");
}
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(),
+ MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress());
+ case MachineOperand::MO_RegisterMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
/// print - Print the specified machine operand.
///
void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -312,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
case MachineOperand::MO_JumpTableIndex:
OS << "<jt#" << getIndex() << '>';
break;
@@ -609,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists() {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
- }
+ MRI.removeRegOperandFromUseList(&Operands[i]);
}
/// AddRegOperandsToUseLists - Add all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(&RegInfo);
- }
+ MRI.addRegOperandToUseList(&Operands[i]);
}
-
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
@@ -654,13 +674,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
--OpNo;
if (RegInfo)
- Operands[OpNo].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
}
}
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
- assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) &&
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands()) &&
"Trying to add an operand to a machine instr that is already done!");
// All operands from OpNo have been removed from RegInfo. If the Operands
@@ -669,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
// Insert the new operand at OpNo.
Operands.insert(Operands.begin() + OpNo, Op);
@@ -680,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
// When adding a register operand, tell RegInfo about it.
if (Operands[OpNo].isReg()) {
- // Add the new operand to RegInfo, even when RegInfo is NULL.
- // This will initialize the linked list pointers.
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ Operands[OpNo].Contents.Reg.Prev = 0;
+ // Add the new operand to RegInfo.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(&Operands[OpNo]);
// If the register operand is flagged as early, mark the operand as such.
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
Operands[OpNo].setIsEarlyClobber(true);
@@ -696,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (RegInfo) {
for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -706,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
+ MachineRegisterInfo *RegInfo = getRegInfo();
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
- if (Operands.back().isReg() && Operands.back().isOnRegUseList())
- Operands.back().RemoveRegOperandFromRegInfo();
+ if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
+ RegInfo->removeRegOperandFromUseList(&Operands.back());
Operands.pop_back();
return;
@@ -720,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
// Otherwise, we are removing an interior operand. If we have reginfo to
// update, remove all operands that will be shifted down from their reg lists,
// move everything down, then re-add them.
- MachineRegisterInfo *RegInfo = getRegInfo();
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
}
}
@@ -733,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -1852,52 +1876,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
// Build up a buffer of hash code components.
- //
- // FIXME: This is a total hack. We should have a hash_value overload for
- // MachineOperand, but currently that doesn't work because there are many
- // different ideas of "equality" and thus different sets of information that
- // contribute to the hash code. This one happens to want to take a specific
- // subset. And it's still not clear that this routine uses the *correct*
- // subset of information when computing the hash code. The goal is to use the
- // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
- // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
- // be very useful to factor the selection of relevant inputs out of the two
- // functions and into a common routine, but it's not clear how that can be
- // done.
SmallVector<size_t, 8> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- switch (MO.getType()) {
- default: break;
- case MachineOperand::MO_Register:
- if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue; // Skip virtual register defs.
- HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
- break;
- case MachineOperand::MO_Immediate:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
- break;
- case MachineOperand::MO_FrameIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_JumpTableIndex:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
- break;
- case MachineOperand::MO_MachineBasicBlock:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
- break;
- case MachineOperand::MO_GlobalAddress:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
- break;
- case MachineOperand::MO_BlockAddress:
- HashComponents.push_back(hash_combine(MO.getType(),
- MO.getBlockAddress()));
- break;
- case MachineOperand::MO_MCSymbol:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
- break;
- }
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
}
return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 82e123528a..5fb938f340 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
// New virtual register number.
unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-
- // Add a reg, but keep track of whether the vector reallocated or not.
- const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
- void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
VRegInfo.grow(Reg);
VRegInfo[Reg].first = RegClass;
RegAllocHints.grow(Reg);
-
- if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
- // The vector reallocated, handle this now.
- HandleVRegListReallocation();
return Reg;
}
@@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() {
VRegInfo.clear();
}
-/// HandleVRegListReallocation - We just added a virtual register to the
-/// VRegInfo info list and it reallocated. Update the use/def lists info
-/// pointers.
-void MachineRegisterInfo::HandleVRegListReallocation() {
- // The back pointers for the vreg lists point into the previous vector.
- // Update them to point to their correct slots.
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- MachineOperand *List = VRegInfo[Reg].second;
- if (!List) continue;
- // Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
}
}
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
return &*I;
}
-bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
- use_iterator UI = use_begin(RegNo);
- if (UI == use_end())
- return false;
- return ++UI == use_end();
-}
-
bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
use_nodbg_iterator UI = use_nodbg_begin(RegNo);
if (UI == use_nodbg_end())
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index acb1ee6cb6..076547a5ed 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
}
MachineSSAUpdater::~MachineSSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 847bf1e76e..a1dc9481c6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -212,7 +212,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
Scheduler->startBlock(MBB);
// Break the block into scheduling regions [I, RegionEnd), and schedule each
- // region as soon as it is discovered. RegionEnd points the the scheduling
+ // region as soon as it is discovered. RegionEnd points the scheduling
// boundary at the bottom of the region. The DAG does not include RegionEnd,
// but the region does (i.e. the next RegionEnd is above the previous
// RegionBegin). If the current block has no terminator then RegionEnd ==
@@ -403,7 +403,8 @@ public:
/// getIssueWidth - Return the max instructions per scheduling group.
unsigned getIssueWidth() const {
- return InstrItins ? InstrItins->Props.IssueWidth : 1;
+ return (InstrItins && InstrItins->SchedModel)
+ ? InstrItins->SchedModel->IssueWidth : 1;
}
/// getNumMicroOps - Return the number of issue slots required for this MI.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1ce546b578..bc383cba45 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -99,6 +99,16 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
} // end anonymous namespace
char MachineSinking::ID = 0;
@@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000000..1a3aa60918
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1153 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ ItinData = MF->getTarget().getInstrItineraryData();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ BlockInfo.resize(MF->getNumBlockIDs());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ // FIXME: Compute per-functional unit counts.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+ }
+ FBI->InstrCount = InstrCount;
+ return FBI;
+}
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list.
+ if (I->MI)
+ DepHeight += TII->computeOperandLatency(ItinData,
+ MI, MO.getOperandNo(),
+ I->MI, I->Op);
+ else
+ // No UseMI. Just use the MI latency instead.
+ DepHeight += TII->getInstrLatency(ItinData, MI);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
+/// of all the blocks in Trace. Stop when reaching the block that contains
+/// DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.ItinData, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ PHI, Dep.UseOp,
+ /* FindMin = */ false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // For now, we compute the resource depth from instruction count / issue
+ // width. Eventually, we should compute resource depth per functional unit
+ // and return the max.
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 0000000000..c5b86f31db
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,341 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+ const MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *ItinData;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *Loops;
+
+public:
+ class Ensemble;
+ class Trace;
+ static char ID;
+ MachineTraceMetrics();
+ void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+ void releaseMemory();
+ void verifyAnalysis() const;
+
+ friend class Ensemble;
+ friend class Trace;
+
+ /// Per-basic block information that doesn't depend on the trace through the
+ /// block.
+ struct FixedBlockInfo {
+ /// The number of non-trivial instructions in the block.
+ /// Doesn't count PHI and COPY instructions that are likely to be removed.
+ unsigned InstrCount;
+
+ /// True when the block contains calls.
+ bool HasCalls;
+
+ FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+ /// Returns true when resource information for this block has been computed.
+ bool hasResources() const { return InstrCount != ~0u; }
+
+ /// Invalidate resource information.
+ void invalidate() { InstrCount = ~0u; }
+ };
+
+ /// Get the fixed resource information about MBB. Compute it on demand.
+ const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+ /// A virtual register or regunit required by a basic block or its trace
+ /// successors.
+ struct LiveInReg {
+ /// The virtual register required, or a register unit.
+ unsigned Reg;
+
+ /// For virtual registers: Minimum height of the defining instruction.
+ /// For regunits: Height of the highest user in the trace.
+ unsigned Height;
+
+ LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ };
+
+ /// Per-basic block information that relates to a specific trace through the
+ /// block. Convergent traces means that only one of these is required per
+ /// block in a trace ensemble.
+ struct TraceBlockInfo {
+ /// Trace predecessor, or NULL for the first block in the trace.
+ /// Valid when hasValidDepth().
+ const MachineBasicBlock *Pred;
+
+ /// Trace successor, or NULL for the last block in the trace.
+ /// Valid when hasValidHeight().
+ const MachineBasicBlock *Succ;
+
+ /// The block number of the head of the trace. (When hasValidDepth()).
+ unsigned Head;
+
+ /// The block number of the tail of the trace. (When hasValidHeight()).
+ unsigned Tail;
+
+ /// Accumulated number of instructions in the trace above this block.
+ /// Does not include instructions in this block.
+ unsigned InstrDepth;
+
+ /// Accumulated number of instructions in the trace below this block.
+ /// Includes instructions in this block.
+ unsigned InstrHeight;
+
+ TraceBlockInfo() :
+ Pred(0), Succ(0),
+ InstrDepth(~0u), InstrHeight(~0u),
+ HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+ /// Returns true if the depth resources have been computed from the trace
+ /// above this block.
+ bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+ /// Returns true if the height resources have been computed from the trace
+ /// below this block.
+ bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+ /// Invalidate depth resources when some block above this one has changed.
+ void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+ /// Invalidate height resources when a block below this one has changed.
+ void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+ // Data-dependency-related information. Per-instruction depth and height
+ // are computed from data dependencies in the current trace, using
+ // itinerary data.
+
+ /// Instruction depths have been computed. This implies hasValidDepth().
+ bool HasValidInstrDepths;
+
+ /// Instruction heights have been computed. This implies hasValidHeight().
+ bool HasValidInstrHeights;
+
+ /// Critical path length. This is the number of cycles in the longest data
+ /// dependency chain through the trace. This is only valid when both
+ /// HasValidInstrDepths and HasValidInstrHeights are set.
+ unsigned CriticalPath;
+
+ /// Live-in registers. These registers are defined above the current block
+ /// and used by this block or a block below it.
+ /// This does not include PHI uses in the current block, but it does
+ /// include PHI uses in deeper blocks.
+ SmallVector<LiveInReg, 4> LiveIns;
+
+ void print(raw_ostream&) const;
+ };
+
+ /// InstrCycles represents the cycle height and depth of an instruction in a
+ /// trace.
+ struct InstrCycles {
+ /// Earliest issue cycle as determined by data dependencies and instruction
+ /// latencies from the beginning of the trace. Data dependencies from
+ /// before the trace are not included.
+ unsigned Depth;
+
+ /// Minimum number of cycles from this instruction is issued to the of the
+ /// trace, as determined by data dependencies and instruction latencies.
+ unsigned Height;
+ };
+
+ /// A trace represents a plausible sequence of executed basic blocks that
+ /// passes through the current basic block one. The Trace class serves as a
+ /// handle to internal cached data structures.
+ class Trace {
+ Ensemble &TE;
+ TraceBlockInfo &TBI;
+
+ unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+ public:
+ explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+ void print(raw_ostream&) const;
+
+ /// Compute the total number of instructions in the trace.
+ unsigned getInstrCount() const {
+ return TBI.InstrDepth + TBI.InstrHeight;
+ }
+
+ /// Return the resource depth of the top/bottom of the trace center block.
+ /// This is the number of cycles required to execute all instructions from
+ /// the trace head to the trace center block. The resource depth only
+ /// considers execution resources, it ignores data dependencies.
+ /// When Bottom is set, instructions in the trace center block are included.
+ unsigned getResourceDepth(bool Bottom) const;
+
+ /// Return the resource length of the trace. This is the number of cycles
+ /// required to execute the instructions in the trace if they were all
+ /// independent, exposing the maximum instruction-level parallelism.
+ ///
+ /// Any blocks in Extrablocks are included as if they were part of the
+ /// trace.
+ unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+ ArrayRef<const MachineBasicBlock*>()) const;
+
+ /// Return the length of the (data dependency) critical path through the
+ /// trace.
+ unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+ /// Return the depth and height of MI. The depth is only valid for
+ /// instructions in or above the trace center block. The height is only
+ /// valid for instructions in or below the trace center block.
+ InstrCycles getInstrCycles(const MachineInstr *MI) const {
+ return TE.Cycles.lookup(MI);
+ }
+
+ /// Return the slack of MI. This is the number of cycles MI can be delayed
+ /// before the critical path becomes longer.
+ /// MI must be an instruction in the trace center block.
+ unsigned getInstrSlack(const MachineInstr *MI) const;
+
+ /// Return the Depth of a PHI instruction in a trace center block successor.
+ /// The PHI does not have to be part of the trace.
+ unsigned getPHIDepth(const MachineInstr *PHI) const;
+ };
+
+ /// A trace ensemble is a collection of traces selected using the same
+ /// strategy, for example 'minimum resource height'. There is one trace for
+ /// every block in the function.
+ class Ensemble {
+ SmallVector<TraceBlockInfo, 4> BlockInfo;
+ DenseMap<const MachineInstr*, InstrCycles> Cycles;
+ friend class Trace;
+
+ void computeTrace(const MachineBasicBlock*);
+ void computeDepthResources(const MachineBasicBlock*);
+ void computeHeightResources(const MachineBasicBlock*);
+ unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+ void computeInstrDepths(const MachineBasicBlock*);
+ void computeInstrHeights(const MachineBasicBlock*);
+ void addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace);
+
+ protected:
+ MachineTraceMetrics &MTM;
+ virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+ virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+ explicit Ensemble(MachineTraceMetrics*);
+ const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+
+ public:
+ virtual ~Ensemble();
+ virtual const char *getName() const =0;
+ void print(raw_ostream&) const;
+ void invalidate(const MachineBasicBlock *MBB);
+ void verify() const;
+
+ /// Get the trace that passes through MBB.
+ /// The trace is computed on demand.
+ Trace getTrace(const MachineBasicBlock *MBB);
+ };
+
+ /// Strategies for selecting traces.
+ enum Strategy {
+ /// Select the trace through a block that has the fewest instructions.
+ TS_MinInstrCount,
+
+ TS_NumStrategies
+ };
+
+ /// Get the trace ensemble representing the given trace selection strategy.
+ /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+ /// and valid for the lifetime of the analysis pass.
+ Ensemble *getEnsemble(Strategy);
+
+ /// Invalidate cached information about MBB. This must be called *before* MBB
+ /// is erased, or the CFG is otherwise changed.
+ ///
+ /// This invalidates per-block information about resource usage for MBB only,
+ /// and it invalidates per-trace information for any trace that passes
+ /// through MBB.
+ ///
+ /// Call Ensemble::getTrace() again to update any trace handles.
+ void invalidate(const MachineBasicBlock *MBB);
+
+private:
+ // One entry per basic block, indexed by block number.
+ SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+ // One ensemble per strategy.
+ Ensemble* Ensembles[TS_NumStrategies];
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Trace &Tr) {
+ Tr.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Ensemble &En) {
+ En.print(OS);
+ return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 45ce3ab28b..852c169254 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -73,8 +73,10 @@ namespace {
typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
BitVector regsReserved;
BitVector regsAllocatable;
@@ -117,6 +119,9 @@ namespace {
// block. This set is disjoint from regsLiveOut.
RegSet vregsRequired;
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
BBInfo() : reachable(false) {}
// Add register to vregsPassed if it belongs there. Return true if
@@ -203,6 +208,10 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
@@ -212,6 +221,10 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- *OS << "- basic block: " << MBB->getName()
- << " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")";
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (void*)MBB << ')';
if (Indexes)
*OS << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg,
*OS << "\n";
}
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
@@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() {
regsAllocatable = TRI->getAllocatableSet(*MF);
markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
}
// Does iterator point to a and b as the first two elements?
@@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
E = MBB->succ_end(); I != E; ++I) {
if ((*I)->isLandingPad())
LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
}
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
- } if (MBB->succ_size() != 2) {
+ } if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (TBB && FBB) {
// Block conditionally branches somewhere, otherwise branches
// somewhere else.
- if (MBB->succ_size() != 2) {
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -651,10 +737,10 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const MCInstrDesc &MCID = MI->getDesc();
- const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// The first MCID.NumDefs operands must be explicit register defines
if (MONum < MCID.getNumDefs()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
@@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugValue())
checkLiveness(MO, MONum);
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (MO->readsReg()) {
regsLiveInButUnused.erase(Reg);
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical", MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
+ if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
@@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
}
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -837,6 +938,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Reserved registers may be used even when 'dead'.
if (!isReserved(Reg))
report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
} else {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
// We don't know which virtual registers are live in, so only complain
@@ -1057,8 +1160,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
++I)
if (MInfo.regsKilled.count(*I)) {
- report("Virtual register killed in block, but needed live out.", MFI);
- *OS << "Virtual register " << PrintReg(*I)
+ report("Virtual register killed in block, but needed live out.", MFI);
+ *OS << "Virtual register " << PrintReg(*I)
<< " is used after the block.\n";
}
}
@@ -1122,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() {
const LiveInterval &LI = LiveInts->getInterval(Reg);
assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I!=E; ++I) {
- VNInfo *VNI = *I;
- const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
- if (!DefVNI) {
- if (!VNI->isUnused()) {
- report("Valno not live at def and not marked unused", MF);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
- continue;
- }
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
- if (VNI->isUnused())
- continue;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (DefVNI != VNI) {
- report("Live range at def has different valno", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
- continue;
- }
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
- if (!MBB) {
- report("Invalid definition index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
- if (VNI->isPHIDef()) {
- if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber()
- << " in " << LI << '\n';
- }
- } else {
- // Non-PHI def.
- const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
- if (!MI) {
- report("No instruction at def index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
- bool hasDef = false;
- bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- if (MOI->getReg() != LI.reg)
- continue;
- } else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
- !TRI->regsOverlap(LI.reg, MOI->getReg()))
- continue;
- }
- hasDef = true;
- if (MOI->isEarlyClobber())
- isEarlyClobber = true;
- }
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
- if (!hasDef) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
- // Early clobber defs begin at USE slots, but other defs must begin at
- // DEF slots.
- if (isEarlyClobber) {
- if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- } else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- }
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const VNInfo *VNI = I->valno;
- assert(VNI && "Live range has no valno");
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
- if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
- report("Foreign valno in live range", MF);
- I->print(*OS);
- *OS << " has a valno not in " << LI << '\n';
- }
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
- if (VNI->isUnused()) {
- report("Live range valno is marked unused", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
- if (!MBB) {
- report("Bad start of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
- SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
- if (I->start != MBBStartIdx && I->start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- }
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
- const MachineBasicBlock *EndMBB =
- LiveInts->getMBBFromIndex(I->end.getPrevSlot());
- if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
- // No more checks for live-out segments.
- if (I->end == LiveInts->getMBBEndIdx(EndMBB))
- continue;
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
continue;
- }
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
- // The block slot must refer to a basic block boundary.
- if (I->end.isBlock()) {
- report("Live segment ends at B slot of an instruction", MI);
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
-
- if (I->end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(I->start, I->end)) {
- report("Live segment ending at dead slot spans instructions", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
-
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (I->end.isEarlyClobber()) {
- if (I+1 == E || (I+1)->start != I->end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
}
+ }
+ }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != LI.reg)
- continue;
- if (MOI->readsReg())
- hasRead = true;
- if (MOI->isDef() && MOI->isDead())
- hasDeadDef = true;
- }
-
- if (I->end.isDead()) {
- if (!hasDeadDef) {
- report("Instruction doesn't have a dead def operand", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- } else {
- if (!hasRead) {
- report("Instruction ending live range doesn't read the register",
- MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
- }
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
- // Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
- // Is this live range the beginning of a non-PHIDef VN?
- if (I->start == VNI->def && !VNI->isPHIDef()) {
- // Not live-in to any blocks.
- if (MBB == EndMBB)
- continue;
- // Skip this block.
- ++MFI;
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
}
- for (;;) {
- assert(LiveInts->isLiveInToMBB(LI, MFI));
- // We don't know how to track physregs into a landing pad.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
- MFI->isLandingPad()) {
- if (&*MFI == EndMBB)
- break;
- ++MFI;
- continue;
- }
- // Is VNI a PHI-def in the current block?
- bool IsPHI = VNI->isPHIDef() &&
- VNI->def == LiveInts->getMBBStartIdx(MFI);
-
- // Check that VNI is live-out of all predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
- PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
- const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
-
- // All predecessors must have a live-out value.
- if (!PVNI) {
- report("Register not marked live out of predecessor", *PI);
- *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << " in " << LI << '\n';
- continue;
- }
-
- // Only PHI-defs can take different predecessor values.
- if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << " in "
- << PrintReg(Reg) << ": " << LI << '\n';
- }
- }
- if (&*MFI == EndMBB)
- break;
- ++MFI;
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
}
}
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
- // Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF);
- *OS << NumComp << " components in " << LI << '\n';
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- *OS << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- *OS << ' ' << (*I)->id;
- *OS << '\n';
- }
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
}
}
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b820578d6c..e6e23da27c 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -194,7 +194,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
-/// under the assuption that it needs to be lowered in a way that supports
+/// under the assumption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
@@ -355,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode(
// add a kill marker in this block saying that it kills the incoming value!
if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
// In our final twist, we have to decide which instruction kills the
- // register. In most cases this is the copy, however, the first
- // terminator instruction at the end of the block may also use the value.
- // In this case, we should mark *it* as being the killing block, not the
- // copy.
- MachineBasicBlock::iterator KillInst;
- MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
- if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
- KillInst = Term;
-
- // Check that no other terminators use values.
-#ifndef NDEBUG
- for (MachineBasicBlock::iterator TI = llvm::next(Term);
- TI != opBlock.end(); ++TI) {
- if (TI->isDebugValue())
- continue;
- assert(!TI->readsRegister(SrcReg) &&
- "Terminator instructions cannot use virtual registers unless"
- "they are the first terminator in a block!");
- }
-#endif
- } else if (reusedIncoming || !IncomingReg) {
- // We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = Term;
- while (KillInst != opBlock.begin()) {
- --KillInst;
- if (KillInst->isDebugValue())
- continue;
- if (KillInst->readsRegister(SrcReg))
- break;
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
}
- } else {
- // We just inserted this copy.
- KillInst = prior(InsertPos);
}
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -427,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
bool Changed = false;
for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
- // We break edges when registers are live out from the predecessor block
- // (not considering PHI nodes). If the register is live in to this block
- // anyway, we would gain nothing from splitting.
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
// Avoid splitting backedges of loops. It would introduce small
// out-of-line blocks into the loop which is very bad for code placement.
- if (PreMBB != &MBB &&
- !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
- if (!MLI ||
- !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
- MLI->isLoopHeader(&MBB))) {
- if (PreMBB->SplitCriticalEdge(&MBB, this)) {
- Changed = true;
- ++NumCriticalEdgesSplit;
- }
- }
+ if (PreMBB == &MBB)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ if (IsLoopHeader && PreLoop == CurLoop)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ if (!LV.isLiveOut(Reg, *PreMBB))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ bool ShouldSplit = !LV.isLiveIn(Reg, MBB);
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+ continue;
}
+ Changed = true;
+ ++NumCriticalEdgesSplit;
}
}
return Changed;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 9693780bda..56526f2732 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -49,6 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
+ cl::desc("Enable Early If-conversion"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -86,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"));
+// Experimental option to run live inteerval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -154,6 +160,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
if (StandardID == &DeadMachineInstructionElimID)
return applyDisable(TargetID, DisableMachineDCE);
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, !EnableEarlyIfConversion);
+
if (StandardID == &MachineLICMID)
return applyDisable(TargetID, DisableMachineLICM);
@@ -228,6 +237,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+ // Disable early if-conversion. Targets that are ready can enable it.
+ disablePass(&EarlyIfConverterID);
+
// Temporarily disable experimental passes.
substitutePass(&MachineSchedulerID, 0);
}
@@ -425,9 +437,6 @@ void TargetPassConfig::addISelPrepare() {
/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
/// before/after any target-independent pass. But it's currently overkill.
void TargetPassConfig::addMachinePasses() {
- // Print the instruction selected machine code...
- printAndVerify("After Instruction Selection");
-
// Insert a machine instr printer pass after the specified pass.
// If -print-machineinstrs specified, print machineinstrs after all passes.
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
@@ -443,8 +452,12 @@ void TargetPassConfig::addMachinePasses() {
insertPass(TID, IID);
}
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
// Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -527,6 +540,7 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen DCE pass");
+ addPass(&EarlyIfConverterID);
addPass(&MachineLICMID);
addPass(&MachineCSEID);
addPass(&MachineSinkingID);
@@ -639,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&MachineLoopInfoID);
addPass(&PHIEliminationID);
}
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
addPass(&TwoAddressInstructionPassID);
if (EnableStrongPHIElim)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4af4..9099862bd3 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -108,12 +110,14 @@ namespace {
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
};
}
@@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
return false;
}
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(MI))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
- bool First = true;
- MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
LocalMIs.insert(MI);
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
MI->hasUnmodeledSideEffects()) {
- ++MII;
+ FoldAsLoadDefReg = 0;
continue;
}
-
- if (MI->isBitcast()) {
- if (optimizeBitcastInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
- } else if (MI->isCompare()) {
- if (optimizeCmpInstr(MI, MBB)) {
- // MI is deleted.
- LocalMIs.erase(MI);
- Changed = true;
- MII = First ? I->begin() : llvm::next(PMII);
- continue;
- }
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
+
+ if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
}
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
- First = false;
- PMII = MII;
- ++MII;
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ continue;
+ }
+ }
}
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8325f20e41..6b3a48eefd 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
/// its virtual register, and it is guaranteed to be a block-local register.
///
bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
- // Check for non-debug uses or defs following MO.
- // This is the most likely way to fail - fast path it.
- MachineOperand *Next = &MO;
- while ((Next = Next->getNextOperandForReg()))
- if (!Next->isDebug())
- return false;
-
// If the register has ever been spilled or reloaded, we conservatively assume
// it is a global register used in multiple blocks.
if (StackSlotForVirtReg[MO.getReg()] != -1)
return false;
// Check that the use/def chain has exactly one operand - MO.
- return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
}
/// addKillFlag - Set kill flags on last use of a virtual register.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428605..d0cff481cb 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: "
- << ((Value*)mf.getFunction())->getName() << '\n');
+ << mf.getFunction()->getName() << '\n');
MF = &mf;
if (VerifyEnabled)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 733312fbd0..990633440e 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
// Okay, merge "B1" into the same value number as "B0".
- if (BValNo != ValLR->valno) {
- // If B1 is killed by a PHI, then the merged live range must also be killed
- // by the same PHI, as B0 and B1 can not overlap.
- bool HasPHIKill = BValNo->hasPHIKill();
+ if (BValNo != ValLR->valno)
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
- if (HasPHIKill)
- ValLR->valno->setHasPHIKill(true);
- }
DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
@@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
VNInfo *AValNo,
VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
-
- // If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ if (AValNo->isPHIDef() || AValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
if (TargetRegisterInfo::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
@@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// register live range doesn't need to be accurate as long as all the
// defs are there.
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
++J;
}
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
- E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned LHSValID = LHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[LHSValID]->setHasPHIKill(true);
- }
-
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
- E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned RHSValID = RHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[RHSValID]->setHasPHIKill(true);
- }
-
// Clear kill flags where live ranges are extended.
while (!LHSOldKills.empty())
LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 110f478f48..9c1dba355b 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
- // SSA defs do not have output/anti dependencies.
+ // Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
- //
- // FIXME: This optimization is disabled pending PR13112.
- //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
- // return;
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
+ return;
// Add output dependence to the next nearest def of this vreg.
//
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 7110b7566a..e675366485 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -72,10 +72,12 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
+ // If MaxLookAhead is not set above, then we are not enabled.
if (!isEnabled())
DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
else {
- IssueWidth = ItinData->Props.IssueWidth;
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel->IssueWidth;
DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
<< ScoreboardDepth << '\n');
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f16b1e7f4a..10491d98cb 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -228,6 +228,9 @@ namespace {
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -1639,7 +1645,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
- N1.getOperand(0));
+ N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
@@ -2340,7 +2346,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeVectorOps) {
+ && Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
@@ -2716,6 +2722,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+
return SDValue();
}
@@ -5651,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
@@ -5676,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
@@ -5696,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
@@ -5728,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegal(ISD::FMA, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
- return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
- DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
}
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
}
return SDValue();
@@ -5974,6 +6020,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -5999,6 +6077,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -6152,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7561,6 +7694,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7568,12 +7706,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
- bool AllUndef = true;
+
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
- AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -7601,9 +7738,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
AllAnyExt &= AnyExt;
}
- if (AllUndef)
- return DAG.getUNDEF(VT);
-
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
@@ -7742,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If the element type of the input vector is not the same as
+ // the output element type, make concat_vectors based on input element
+ // type and then bitcast it to the output vector type.
+ //
+ // In another words avoid nodes like this:
+ // <NODE> v16i8 = concat_vectors v4i16 v4i16
+ // Replace it with this one:
+ // <NODE0> v8i16 = concat_vectors v4i16 v4i16
+ // <NODE1> v16i8 = bitcast NODE0
+ EVT ItemType = VecIn1.getValueType().getVectorElementType();
+ if (ItemType != VT.getVectorElementType()) {
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+ ItemType,
+ VecIn1.getValueType().getVectorNumElements()*2);
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+ } else
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
}
// If VecIn2 is unused then change it to undef.
@@ -7779,6 +7933,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (N->getNumOperands() == 1)
return N->getOperand(0);
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 07dc36577f..683fac6744 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -484,7 +485,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
// N = N + Offset
- TotalOffs +=
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
@@ -573,7 +574,10 @@ bool FastISel::SelectCall(const User *I) {
// At -O0 we don't care about the lifetime intrinsics.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
return true;
+
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
@@ -642,7 +646,7 @@ bool FastISel::SelectCall(const User *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addCImm(CI).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
- else
+ else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(CI->getZExtValue()).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
@@ -786,13 +790,24 @@ FastISel::SelectInstruction(const Instruction *I) {
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
- // Remove dead code. However, ignore call instructions since we've flushed
+ // Remove dead code. However, ignore call instructions since we've flushed
// the local value map and recomputed the insert point.
if (!isa<CallInst>(I)) {
recomputeInsertPt();
@@ -1037,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
@@ -1046,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo)
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()) {
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
}
FastISel::~FastISel() {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index f154271894..4488d2790b 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
return N;
}
-/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// countOperands - The inputs to target nodes have any actual inputs first,
/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
-unsigned InstrEmitter::CountOperands(SDNode *Node) {
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ for (unsigned I = N; I; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
return N;
}
@@ -337,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
-/// operand number (in the II) that we are adding. IIOpNum and II are used for
-/// assertions only.
+/// operand number (in the II) that we are adding.
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
@@ -353,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
const ConstantFP *CFP = F->getConstantFPValue();
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
- MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp));
} else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
@@ -393,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
BA->getTargetFlags()));
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
+ TI->getOffset(),
+ TI->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
@@ -461,7 +483,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
- SubIdx == DefSubIdx) {
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
// Optimize these:
// r1025 = s/zext r1024, 4
// r1026 = extract_subreg r1025, 4
@@ -696,7 +719,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
- unsigned NodeOperands = CountOperands(Node);
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands = countOperands(Node, NumImpUses);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
@@ -705,7 +729,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
"Too few operands for a variadic node!");
else
assert(NumMIOperands >= II.getNumOperands() &&
- NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
"#operands for dag node doesn't match .td file!");
#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c081f38be0..9eddee9e33 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -105,12 +105,6 @@ public:
/// (which do not go into the machine instrs.)
static unsigned CountResults(SDNode *Node);
- /// CountOperands - The inputs to target nodes have any actual inputs first,
- /// followed by an optional chain operand, then flag operands. Compute
- /// the number of actual operands that will go into the resulting
- /// MachineInstr.
- static unsigned CountOperands(SDNode *Node);
-
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a36137db4a..908ebb9486 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -70,6 +70,9 @@ private:
SDValue OptimizeFloatStore(StoreSDNode *ST);
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
@@ -425,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
DebugLoc dl = LD->getDebugLoc();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
- if (TLI.isTypeLegal(intVT)) {
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
@@ -433,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
- if (VT.isFloatingPoint() && LoadedVT != VT)
- Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
ValResult = Result;
ChainResult = Chain;
@@ -639,9 +643,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// probably means that we need to integrate dag combiner and legalizer
// together.
// We generally can't do this one for long doubles.
- SDValue Tmp1 = ST->getChain();
- SDValue Tmp2 = ST->getBasePtr();
- SDValue Tmp3;
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -649,19 +652,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
TLI.isTypeLegal(MVT::i32)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
@@ -674,11 +677,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
@@ -689,14 +692,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
return SDValue(0, 0);
}
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ DebugLoc dl = Node->getDebugLoc();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ EVT VT = Value.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Value = DAG.getNode(ISD::BITCAST, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ DebugLoc dl = Node->getDebugLoc();
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ EVT VT = Node->getValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ }
+}
+
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return;
- DebugLoc dl = Node->getDebugLoc();
-
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
TargetLowering::TypeLegal &&
@@ -709,9 +1146,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- bool isCustom = false;
-
// Figure out the correct action; the way to query this varies by opcode
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
@@ -865,19 +1299,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
switch (Action) {
case TargetLowering::Legal:
return;
- case TargetLowering::Custom:
+ case TargetLowering::Custom: {
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode()) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
- ResultVals.push_back(Tmp1);
+ ResultVals.push_back(Res);
else
- ResultVals.push_back(Tmp1.getValue(i));
+ ResultVals.push_back(Res.getValue(i));
}
- if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+ if (Res.getNode() != Node || Res.getResNo() != 0) {
DAG.ReplaceAllUsesWith(Node, ResultVals.data());
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
@@ -885,7 +1319,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
return;
}
-
+ }
// FALL THROUGH
case TargetLowering::Expand:
ExpandNode(Node);
@@ -909,429 +1343,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::CALLSEQ_END:
break;
case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LD->getChain(); // Legalize the chain.
- Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD) {
- EVT VT = Node->getValueType(0);
- Tmp3 = SDValue(Node, 0);
- Tmp4 = SDValue(Node, 1);
-
- switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp3, Tmp4);
- }
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Tmp3, DAG);
- if (Tmp1.getNode()) {
- Tmp3 = Tmp1;
- Tmp4 = Tmp1.getValue(1);
- }
- break;
- case TargetLowering::Promote: {
- // Only promote a load of vector type to another.
- assert(VT.isVector() && "Cannot promote this load!");
- // Change base type to a different vector type.
- EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
- Tmp4 = Tmp1.getValue(1);
- break;
- }
- }
- if (Tmp4.getNode() != Node) {
- assert(Tmp3.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
- ReplacedNode(Node);
- }
- return;
- }
-
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = Result;
- Tmp2 = Ch;
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- } else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp2 = Ch;
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Tmp1 = SDValue(Node, 0);
- Tmp2 = SDValue(Node, 1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp3.getNode()) {
- Tmp1 = Tmp3;
- Tmp2 = Tmp3.getValue(1);
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp1, Tmp2);
- }
- }
- }
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp2 = Load.getValue(1);
- break;
- }
-
- assert(!SrcVT.isVector() &&
- "Vector Loads are handled in LegalizeVectorOps");
-
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = ValRes;
- Tmp2 = Result.getValue(1);
- break;
- }
- }
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- if (Tmp2.getNode() != Node) {
- assert(Tmp1.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
- ReplacedNode(Node);
- }
- break;
+ return LegalizeLoadOps(Node);
}
case ISD::STORE: {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = ST->getChain();
- Tmp2 = ST->getBasePtr();
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
-
- if (!ST->isTruncatingStore()) {
- if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- ReplaceNode(ST, OptStore);
- break;
- }
-
- {
- Tmp3 = ST->getValue();
- EVT VT = Tmp3.getValueType();
- switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node),
- DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode())
- ReplaceNode(SDValue(Node, 0), Tmp1);
- break;
- case TargetLowering::Promote: {
- assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BITCAST, dl,
- TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- break;
- }
- } else {
- Tmp3 = ST->getValue();
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
-
- if (StWidth != StVT.getStoreSizeInBits()) {
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
- Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- SDValue Result =
- DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
- // If not storing a power-of-2 number of bits, expand as two stores.
- assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Store size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
- // Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- RoundVT,
- isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- } else {
- // Big endian - avoid unaligned stores.
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
- // Store the top RoundWidth bits.
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- }
-
- // The order of the stores doesn't matter.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- ReplaceNode(SDValue(Node, 0), Result);
- } else {
- switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode())
- ReplaceNode(SDValue(Node, 0), Tmp1);
- break;
- case TargetLowering::Expand:
- assert(!StVT.isVector() &&
- "Vector Stores are handled in LegalizeVectorOps");
-
- // TRUNCSTORE:i16 i32 -> STORE i16
- assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
- Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- }
- break;
+ return LegalizeStoreOps(Node);
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75b31f79ad..e8e968aaef 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
unsigned NumElts = InVT.getVectorNumElements();
assert(NumElts == NVT.getVectorNumElements() &&
"Dst and Src must have the same number of elements");
- EVT EltVT = InVT.getScalarType();
assert(isPowerOf2_32(NumElts) &&
"Promoted vector type must be a power of two");
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts/2);
-
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(0));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(NumElts/2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
@@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// A divide for UMULO will be faster than a function call. Select to
// make sure we aren't using 0.
SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
- RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
- DAG.getConstant(1, VT), RHS);
+ DAG.getConstant(1, VT), RHS);
SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
SDValue Overflow;
Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
@@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
- DAG.getConstant(0, PtrVT), Temp,
- MachinePointerInfo(), false, false, 0);
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2321,15 +2317,15 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
TargetLowering::
CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Func, Args, DAG, dl);
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 106b086184..94fc9761ec 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -516,6 +516,7 @@ private:
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a8ff7c65ab..06f6bd63b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue OldVec = N->getOperand(0);
unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
DebugLoc dl = N->getDebugLoc();
// Convert to a vector of the expanded element type, for example
@@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d09411c42f..4709202965 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
- case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
@@ -152,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
NewVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
@@ -447,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
-
+
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true))
return;
@@ -459,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split the result of this operator!");
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
@@ -1007,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split this operator's operand!");
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -1233,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
GetSplitVector(N->getOperand(0), Lo, Hi);
EVT InVT = Lo.getValueType();
-
+
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
-
+
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
-
+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
+}
@@ -1785,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (InputWidened)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getIntPtrConstant(j));
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -1846,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
DAG.getIntPtrConstant(0));
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
}
@@ -1862,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getIntPtrConstant(i));
Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -1966,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
Cond1 = GetWidenedVector(Cond1);
if (Cond1.getValueType() != CondWidenVT)
- Cond1 = ModifyToType(Cond1, CondWidenVT);
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
}
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -2232,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
ResVT, WideSETCC, DAG.getIntPtrConstant(0));
- return PromoteTargetBoolean(CC, N->getValueType(0));
+ return PromoteTargetBoolean(CC, N->getValueType(0));
}
@@ -2401,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- isVolatile,
- isNonTemporal, isInvariant,
- MinAlign(Align, Increment));
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2593,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type
Idx = Idx * NewVTWidth / ValEltWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 01622cb295..c3794d5f78 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
- if (Packet.size() >= InstrItins->Props.IssueWidth) {
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
ResourcesModel->clearResources();
Packet.clear();
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 67f42b18ee..bf0a43785b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -853,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
/// After backtracking, the hazard checker needs to be restored to a state
-/// corresponding the the current cycle.
+/// corresponding the current cycle.
void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
HazardRec->Reset();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5384576a0c..84e41fc4a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -61,6 +61,7 @@ namespace llvm {
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
if (isa<JumpTableSDNode>(Node)) return true;
if (isa<ExternalSymbolSDNode>(Node)) return true;
if (isa<BlockAddressSDNode>(Node)) return true;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a8dce2176f..f4fe8927f6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -219,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) {
return true;
}
+/// allOperandsUndef - Return true if the node has at least one operand
+/// and all operands of the specified node are ISD::UNDEF.
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+
+ return true;
+}
+
/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -387,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
break;
}
case ISD::BasicBlock:
@@ -422,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(CP->getTargetFlags());
break;
}
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
case ISD::LOAD: {
const LoadSDNode *LD = cast<LoadSDNode>(N);
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
break;
}
case ISD::STORE: {
const StoreSDNode *ST = cast<StoreSDNode>(N);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -451,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -467,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
} // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1084,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1183,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
@@ -1949,6 +2004,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
return;
}
case ISD::FGETSIGN:
@@ -2427,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -2674,6 +2748,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1 == N2) return N1;
break;
case ISD::CONCAT_VECTORS:
+ // Concat of UNDEFs is UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF &&
+ N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
@@ -3879,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -3951,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4007,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4084,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -4203,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4292,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4359,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4383,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -4659,13 +4746,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
continue;
- bool NoMatch = false;
- for (unsigned i = 2; i != NumVTs; ++i)
- if (VTs[i] != I->VTs[i]) {
- NoMatch = true;
- break;
- }
- if (!NoMatch)
+ if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
return *I;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e5d24e9d18..e0a061d8be 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -942,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
default: llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
#include "llvm/Instruction.def"
}
@@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
// Update successor info
addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1833,7 +1836,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
visitInlineAsm(&I);
else if (Fn && Fn->isIntrinsic()) {
assert(Fn->getIntrinsicID() == Intrinsic::donothing);
- return; // ignore invokes to @llvm.donothing
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
@@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic store");
@@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return 0;
+ case Intrinsic::floor:
+ setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5239,9 +5247,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- DAG.getMachineFunction(),
- FTy->isVarArg(), Outs,
- FTy->getContext());
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
@@ -5538,6 +5546,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
@@ -5568,150 +5592,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call.
- if (!F->hasLocalLinkage() && F->hasName()) {
- StringRef Name = F->getName();
- if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
- (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
- (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType()) {
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
- } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
- (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
- (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
- (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
- (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
return;
- }
- } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
- (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
- (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
- (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
- (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
- }
- } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
- (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
- (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
- }
- } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
- (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
- (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
- (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
- (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
- }
- } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
- (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
- (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
- (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
- (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
- }
- } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
- (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
- (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
- }
- } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
- (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
- (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
- }
- } else if (Name == "memcmp") {
+ break;
+ case LibFunc::memcmp:
if (visitMemCmpCall(I))
return;
+ break;
}
}
}
@@ -6008,11 +5979,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -6805,7 +6776,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index.
if (FrameIndexSDNode *FI =
- dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d0fde6f01d..4090002314 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -520,6 +520,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5233479701..1c357ef375 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
case ISD::INTRINSIC_WO_CHAIN:
@@ -418,6 +419,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = CP->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
} else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
OS << "<";
const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index f02e895fcb..9f277fd8b4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -971,7 +971,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (TM.Options.EnableFastISel)
- FastIS = TLI.createFastISel(*FuncInfo);
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -2973,6 +2973,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getFunction()->getName();
} else {
bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
unsigned iid =
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6212918d6c..6820175c1b 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND)
if (ConstantSDNode *AndRHS =
dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize() ?
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
getPointerTy() : getShiftAmountTy(N0.getValueType());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
@@ -2322,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+ EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
}
if (isa<ConstantFPSDNode>(N0.getNode())) {
@@ -2390,25 +2439,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
// We can always fold X == X for integer setcc's.
if (N0.getValueType().isInteger()) {
- switch (getBooleanContents(N0.getValueType().isVector())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
- case ZeroOrNegativeOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
- }
+ return DAG.getConstant(EqVal, VT);
}
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ return DAG.getConstant(EqVal, VT);
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(UOF, VT);
+ return DAG.getConstant(EqVal, VT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond)
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getValueType()) == Legal))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2977,10 +3034,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9a751c13b5..4a2b7ec1cf 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
// Adjust RegAssign if a register assignment is killed at VNI->def. We
// want to avoid calculating the live range of the source register if
// possible.
- AssignI.find(VNI->def.getPrevSlot());
+ AssignI.find(Def.getPrevSlot());
if (!AssignI.valid() || AssignI.start() >= Def)
continue;
// If MI doesn't kill the assigned register, just leave it.
@@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
assert(ParentVNI && "Parent not live at complement def");
@@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
if (!Dom.first || Dom.second == VNI->def)
@@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
- VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
- VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 43a6ad8c97..a04ac3fbc1 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -28,15 +28,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
- cl::desc("Lower bound for a buffer to be considered for "
- "stack protection"));
-
namespace {
class StackProtector : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -46,7 +41,7 @@ namespace {
Function *F;
Module *M;
- DominatorTree* DT;
+ DominatorTree *DT;
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
@@ -60,6 +55,11 @@ namespace {
/// check fails.
BasicBlock *CreateFailBB();
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
bool RequiresStackProtector() const;
@@ -70,8 +70,8 @@ namespace {
}
StackProtector(const TargetLowering *tli)
: FunctionPass(ID), TLI(tli) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
@@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) {
DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
-
+
return InsertStackProtectors();
}
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, true))
+ return true;
+
+ return false;
+}
+
/// RequiresStackProtector - Check whether or not this function needs a stack
/// protector based upon the stack protector level. The heuristic we use is to
/// add a guard variable to functions that call alloca, and functions with
@@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const {
if (!F->hasFnAttr(Attribute::StackProtect))
return false;
- const TargetData *TD = TLI->getTargetData();
-
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
@@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
- // If an array has more than SSPBufferSize bytes of allocated space,
- // then we emit stack protectors.
- if (SSPBufferSize <= TD->getTypeAllocSize(AT))
- return true;
+ if (ContainsProtectableArray(AI->getAllocatedType()))
+ return true;
}
}
@@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuardSlot = alloca i8*
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
- //
+ //
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
+
StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
PointerType::get(PtrTy, AddressSpace));
} else {
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
}
BasicBlock &Entry = F->getEntryBlock();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 1e940b1d07..20da36e8fb 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
- bool ColorWithRegs;
LiveStacks* LS;
MachineFrameInfo *MFI;
const TargetInstrInfo *TII;
@@ -82,7 +81,7 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ MachineFunctionPass(ID), NextColor(-1) {
initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index c6fdc73824..5b06195046 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
LiveInterval &SrcInterval = LI->getInterval(SrcReg);
SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
assert(SrcVNI);
- SrcVNI->setHasPHIKill(true);
continue;
}
@@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
- DestVNI->setIsPHIDef(true);
// Prior to PHI elimination, the live ranges of PHIs begin at their defining
// instruction. After PHI elimination, PHI instructions are replaced by VNs
@@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
LI->getVNInfoAllocator());
- CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
DestCopyIndex.getRegSlot(),
CopyVNI));
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 54be88a8bb..ddee6b2401 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
}
/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
const MachineInstr *DefMI) const {
if (DefMI->mayLoad())
- return ItinData->Props.LoadLatency;
+ return SchedModel->LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
- return ItinData->Props.HighLatency;
+ return SchedModel->HighLatency;
return 1;
}
@@ -629,7 +629,7 @@ static int computeDefOperandLatency(
if (FindMin) {
// If MinLatency is valid, call getInstrLatency. This uses Stage latency if
// it exists before defaulting to MinLatency.
- if (ItinData->Props.MinLatency >= 0)
+ if (ItinData->SchedModel->MinLatency >= 0)
return TII->getInstrLatency(ItinData, DefMI);
// If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
@@ -638,10 +638,10 @@ static int computeDefOperandLatency(
return 1;
}
else if(ItinData->isEmpty())
- return TII->defaultDefLatency(ItinData, DefMI);
+ return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
// ...operand lookup required
-return -1;
+ return -1;
}
/// computeOperandLatency - Compute and return the latency of the given data
@@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
@@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
// Expected latency is the max of the stage latency and itinerary props.
if (!FindMin)
- InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index babfacb782..75c9467a4f 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -109,8 +109,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// N.B.: The defaults used in here are no the same ones used in MC.
// We follow gcc, MC follows gas. For example, given ".section .eh_frame",
// both gas and MC will produce a section with no flags. Given
- // section(".eh_frame") gcc will produce
- // .section .eh_frame,"a",@progbits
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
if (Name.empty() || Name[0] != '.') return K;
// Some lame default implementation based on some magic section names.
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 153f8711d8..aa601af21b 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -30,6 +30,7 @@
#define DEBUG_TYPE "twoaddrinstr"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
-STATISTIC(NumReMats, "Number of instructions re-materialized");
-STATISTIC(NumDeletes, "Number of dead instructions deleted");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
AliasAnalysis *AA;
CodeGenOpt::Level OptLevel;
@@ -92,16 +94,9 @@ namespace {
unsigned Reg,
MachineBasicBlock::iterator OldPos);
- bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc);
-
bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
unsigned &LastDef);
- MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
- unsigned Dist);
-
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist);
@@ -117,14 +112,6 @@ namespace {
MachineFunction::iterator &mbbi,
unsigned RegA, unsigned RegB, unsigned Dist);
- typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
- bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist);
- bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi, unsigned Dist);
-
bool isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI, MachineBasicBlock *MBB);
@@ -150,6 +137,11 @@ namespace {
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
@@ -167,6 +159,8 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->isTerminator())
+ KillMI == OldPos || KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
}
}
}
+ assert(KillMO && "Didn't find kill");
// Update kill and LV information.
KillMO->setIsKill(false);
@@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MBB->remove(MI);
MBB->insert(KillPos, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+
++Num3AddrSunk;
return true;
}
-/// isTwoAddrUse - Return true if the specified MI is using the specified
-/// register as a two-address operand.
-static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
- const MCInstrDesc &MCID = UseMI->getDesc();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = UseMI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg &&
- (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
- // Earlier use is a two-address one.
- return true;
- }
- return false;
-}
-
-/// isProfitableToReMat - Return true if the heuristics determines it is likely
-/// to be profitable to re-materialize the definition of Reg rather than copy
-/// the register.
-bool
-TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
- const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc) {
- bool OtherUse = false;
- for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = UseMO.getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end() && DI->second == Loc)
- continue; // Current use.
- OtherUse = true;
- // There is at least one other use in the MBB that will clobber the
- // register.
- if (isTwoAddrUse(UseMI, Reg))
- return true;
- }
- }
-
- // If other uses in MBB are not two-address uses, then don't remat.
- if (OtherUse)
- return false;
-
- // No other uses in the same block, remat if it's defined in the same
- // block so it does not unnecessarily extend the live range.
- return MBB == DefMI->getParent();
-}
-
/// NoUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
return !(LastUse > LastDef && LastUse < Dist);
}
-MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
- MachineBasicBlock *MBB,
- unsigned Dist) {
- unsigned LastUseDist = 0;
- MachineInstr *LastUse = 0;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
- E = MRI->reg_end(); I != E; ++I) {
- MachineOperand &MO = I.getOperand();
- MachineInstr *MI = MO.getParent();
- if (MI->getParent() != MBB || MI->isDebugValue())
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
- if (DI == DistanceMap.end())
- continue;
- if (DI->second >= Dist)
- continue;
-
- if (MO.isUse() && DI->second > LastUseDist) {
- LastUse = DI->first;
- LastUseDist = DI->second;
- }
- }
- return LastUse;
-}
-
/// isCopyToReg - Return true if the specified MI is a copy instruction or
/// a extract_subreg instruction. It also returns the source and destination
/// registers and whether they are physical registers by reference.
@@ -483,32 +407,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
-/// findLocalKill - Look for an instruction below MI in the MBB that kills the
-/// specified register. Returns null if there are any other Reg use between the
-/// instructions.
-static
-MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
- MachineInstr *MI, MachineRegisterInfo *MRI,
- DenseMap<MachineInstr*, unsigned> &DistanceMap) {
- MachineInstr *KillMI = 0;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (UseMI == MI || UseMI->getParent() != MBB)
- continue;
- if (DistanceMap.count(UseMI))
- continue;
- if (!UI.getOperand().isKill())
- return 0;
- if (KillMI)
- return 0; // -O0 kill markers cannot be trusted?
- KillMI = UseMI;
- }
-
- return KillMI;
-}
-
/// findOnlyInterestingUse - Given a register, if has a single in-basic block
/// use, return the use instruction if it's a copy or a two-address use.
static
@@ -564,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
@@ -654,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
if (LV)
// Update live variables
LV->replaceKillInstruction(RegC, MI, NewMI);
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(MI, NewMI);
mbbi->insert(mi, NewMI); // Insert the new inst
mbbi->erase(mi); // Nuke the old inst.
@@ -702,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(mi, NewMI);
+
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
// uses RegB, convertToThreeAddress must have created more
@@ -811,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
return;
}
-/// isSafeToDelete - If the specified instruction does not produce any side
-/// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI,
- const TargetInstrInfo *TII,
- SmallVector<unsigned, 4> &Kills) {
- if (MI->mayStore() || MI->isCall())
- return false;
- if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- Kills.push_back(MO.getReg());
- }
- return true;
-}
-
-/// canUpdateDeletedKills - Check if all the registers listed in Kills are
-/// killed by instructions in MBB preceding the current instruction at
-/// position Dist. If so, return true and record information about the
-/// preceding kills in NewKills.
-bool TwoAddressInstructionPass::
-canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist) {
- while (!Kills.empty()) {
- unsigned Kill = Kills.back();
- Kills.pop_back();
- if (TargetRegisterInfo::isPhysicalRegister(Kill))
- return false;
-
- MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
- if (!LastKill)
- return false;
-
- bool isModRef = LastKill->definesRegister(Kill);
- NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
- LastKill));
- }
- return true;
-}
-
-/// DeleteUnusedInstr - If an instruction with a tied register operand can
-/// be safely deleted, just delete it.
-bool
-TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned Dist) {
- // Check if the instruction has no side effects and if all its defs are dead.
- SmallVector<unsigned, 4> Kills;
- if (!isSafeToDelete(mi, TII, Kills))
- return false;
-
- // If this instruction kills some virtual registers, we need to
- // update the kill information. If it's not possible to do so,
- // then bail out.
- SmallVector<NewKill, 4> NewKills;
- if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
- return false;
-
- if (LV) {
- while (!NewKills.empty()) {
- MachineInstr *NewKill = NewKills.back().second;
- unsigned Kill = NewKills.back().first.first;
- bool isDead = NewKills.back().first.second;
- NewKills.pop_back();
- if (LV->removeVirtualRegisterKilled(Kill, mi)) {
- if (isDead)
- LV->addVirtualRegisterDead(Kill, NewKill);
- else
- LV->addVirtualRegisterKilled(Kill, NewKill);
- }
- }
- }
-
- mbbi->erase(mi); // Nuke the old inst.
- mi = nmi;
- return true;
-}
-
/// RescheduleMIBelowKill - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
/// instruction in order to eliminate the need for the copy.
@@ -905,14 +722,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -999,6 +821,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
// Don't want to extend other live ranges and update kills.
return false;
+ if (MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
}
}
}
@@ -1012,20 +840,13 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MBB->splice(KillPos, MBB, From, To);
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
}
@@ -1061,14 +882,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -1094,6 +920,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
continue;
if (isDefTooClose(MOReg, DI->second, MI, MBB))
return false;
+ if (MOReg == Reg && !MO.isKill())
+ return false;
Uses.insert(MOReg);
if (MO.isKill() && MOReg != Reg)
Kills.insert(MOReg);
@@ -1135,6 +963,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
if (Kills.count(MOReg))
// Don't want to extend other live ranges and update kills.
return false;
+ if (OtherMI != MI && MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
} else {
OtherDefs.push_back(MOReg);
}
@@ -1165,19 +996,13 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(KillMI);
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
}
@@ -1202,15 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
-
- // If regA is dead and the instruction can be deleted, just delete
- // it so it doesn't clobber regB.
bool regBKilled = isKilled(MI, regB, MRI, TII);
- if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
- DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
- ++NumDeletes;
- return true; // Done with this instruction.
- }
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
@@ -1294,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (NewOpc != 0) {
const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
if (UnfoldMCID.getNumDefs() == 1) {
- MachineFunction &MF = *mbbi->getParent();
-
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
- TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF));
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -1380,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->isInlineAsm() ?
+ MI->getNumOperands() : MCID.getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ SlotIndex CopyIdx;
+ if (Indexes)
+ CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- MRI = &MF.getRegInfo();
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
InstrItins = TM.getInstrItineraryData();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
OptLevel = TM.getOptLevel();
@@ -1396,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << MF->getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs(MRI->getNumVirtRegs());
-
- typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
- TiedOperandMap;
- TiedOperandMap TiedOperands(4);
+ TiedOperandMap TiedOperands;
SmallPtrSet<MachineInstr*, 8> Processed;
- for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
unsigned Dist = 0;
DistanceMap.clear();
@@ -1428,199 +1400,48 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
if (mi->isRegSequence())
RegSequences.push_back(&*mi);
- const MCInstrDesc &MCID = mi->getDesc();
- bool FirstTied = true;
-
DistanceMap.insert(std::make_pair(mi, ++Dist));
ProcessCopy(&*mi, &*mbbi, Processed);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- unsigned NumOps = mi->isInlineAsm()
- ? mi->getNumOperands() : MCID.getNumOperands();
- for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
- unsigned DstIdx = 0;
- if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
- continue;
-
- if (FirstTied) {
- FirstTied = false;
- ++NumTwoAddressInstrs;
- DEBUG(dbgs() << '\t' << *mi);
- }
-
- assert(mi->getOperand(SrcIdx).isReg() &&
- mi->getOperand(SrcIdx).getReg() &&
- mi->getOperand(SrcIdx).isUse() &&
- "two address instruction invalid");
-
- unsigned regB = mi->getOperand(SrcIdx).getReg();
-
- // Deal with <undef> uses immediately - simply rewrite the src operand.
- if (mi->getOperand(SrcIdx).isUndef()) {
- unsigned DstReg = mi->getOperand(DstIdx).getReg();
- // Constrain the DstReg register class if required.
- if (TargetRegisterInfo::isVirtualRegister(DstReg))
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, MF))
- MRI->constrainRegClass(DstReg, RC);
- mi->getOperand(SrcIdx).setReg(DstReg);
- DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi);
- continue;
- }
- TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
}
- // Now iterate over the information collected above.
- for (TiedOperandMap::iterator OI = TiedOperands.begin(),
- OE = TiedOperands.end(); OI != OE; ++OI) {
- SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
-
- // If the instruction has a single pair of tied operands, try some
- // transformations that may either eliminate the tied operands or
- // improve the opportunities for coalescing away the register copy.
- if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
unsigned SrcIdx = TiedPairs[0].first;
unsigned DstIdx = TiedPairs[0].second;
-
- // If the registers are already equal, nothing needs to be done.
- if (mi->getOperand(SrcIdx).getReg() ==
- mi->getOperand(DstIdx).getReg())
- break; // Done with this instruction.
-
- if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
- Processed))
- break; // The tied operands have been eliminated.
- }
-
- bool IsEarlyClobber = false;
- bool RemovedKillFlag = false;
- bool AllUsesCopied = true;
- unsigned LastCopiedReg = 0;
- unsigned regB = OI->first;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- unsigned SrcIdx = TiedPairs[tpi].first;
- unsigned DstIdx = TiedPairs[tpi].second;
-
- const MachineOperand &DstMO = mi->getOperand(DstIdx);
- unsigned regA = DstMO.getReg();
- IsEarlyClobber |= DstMO.isEarlyClobber();
-
- // Grab regB from the instruction because it may have changed if the
- // instruction was commuted.
- regB = mi->getOperand(SrcIdx).getReg();
-
- if (regA == regB) {
- // The register is tied to multiple destinations (or else we would
- // not have continued this far), but this use of the register
- // already matches the tied destination. Leave it.
- AllUsesCopied = false;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+ Processed)) {
+ // The tied operands have been eliminated or shifted further down the
+ // block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
continue;
}
- LastCopiedReg = regA;
-
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
-
-#ifndef NDEBUG
- // First, verify that we don't have a use of "a" in the instruction
- // (a = b + a for example) because our transformation will not
- // work. This should never occur because we are in SSA form.
- for (unsigned i = 0; i != mi->getNumOperands(); ++i)
- assert(i == DstIdx ||
- !mi->getOperand(i).isReg() ||
- mi->getOperand(i).getReg() != regA);
-#endif
-
- // Emit a copy or rematerialize the definition.
- bool isCopy = false;
- const TargetRegisterClass *rc = MRI->getRegClass(regB);
- MachineInstr *DefMI = MRI->getUniqueVRegDef(regB);
- // If it's safe and profitable, remat the definition instead of
- // copying it.
- if (DefMI &&
- DefMI->isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, AA, regB) &&
- isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
- DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
- unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
- ++NumReMats;
- } else {
- BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
- regA).addReg(regB);
- isCopy = true;
- }
-
- // Update DistanceMap.
- MachineBasicBlock::iterator prevMI = prior(mi);
- DistanceMap.insert(std::make_pair(prevMI, Dist));
- DistanceMap[mi] = ++Dist;
-
- DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
-
- MachineOperand &MO = mi->getOperand(SrcIdx);
- assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
- "inconsistent operand info for 2-reg pass");
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
-
- // Make sure regA is a legal regclass for the SrcIdx operand.
- if (TargetRegisterInfo::isVirtualRegister(regA) &&
- TargetRegisterInfo::isVirtualRegister(regB))
- MRI->constrainRegClass(regA, MRI->getRegClass(regB));
-
- MO.setReg(regA);
-
- if (isCopy)
- // Propagate SrcRegMap.
- SrcRegMap[regA] = regB;
- }
-
- if (AllUsesCopied) {
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- }
- }
- }
-
- // Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
- LV->addVirtualRegisterKilled(regB, prior(mi));
-
- } else if (RemovedKillFlag) {
- // Some tied uses of regB matched their destination registers, so
- // regB is still used in this instruction, but a kill flag was
- // removed from a different tied use of regB, so now we need to add
- // a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- MO.setIsKill(true);
- break;
- }
- }
}
+ }
- // Schedule the source copy / remat inserted to form two-address
- // instruction. FIXME: Does it matter the distance map may not be
- // accurate after it's scheduled?
- TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
-
- MadeChange = true;
-
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ processTiedPairs(mi, OI->second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
@@ -1645,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Some remat'ed instructions are dead.
- for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
- if (MRI->use_nodbg_empty(VReg)) {
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- DefMI->eraseFromParent();
- }
- }
-
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
// SSA form. It's now safe to de-SSA.
MadeChange |= EliminateRegSequences();