diff options
Diffstat (limited to 'lib/CodeGen')
61 files changed, 5002 insertions, 1898 deletions
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index b60fda86a6..bf5d8c4880 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, ARMException::ARMException(AsmPrinter *A) - : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) - {} + : DwarfException(A) {} ARMException::~ARMException() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 84277604ef..64a68d3d9c 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -218,16 +218,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkerPrivateWeakDefAutoLinkage) + GlobalValue::LinkOnceODRAutoHideLinkage) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9d17767f3b..d30e5bbd8e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf compile unit. +// This file contains support for constructing a dwarf compile unit. // //===----------------------------------------------------------------------===// @@ -419,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index fa7fb1f8d0..abac337042 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1410,7 +1410,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - 0); + DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0); } } @@ -1450,6 +1450,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIVariable DV(Variables.getElement(i)); if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) continue; + // Check that DbgVariable for DV wasn't created earlier, when + // findAbstractVariable() was called for inlined instance of DV. + LLVMContext &Ctx = DV->getContext(); + DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); + if (AbstractVariables.lookup(CleanDV)) + continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) addScopeVariable(Scope, new DbgVariable(DV, NULL)); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index b5f86ab1b9..75f6056c44 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -175,17 +175,6 @@ public: }; class ARMException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index a2ce7a004d..2e189ad7e7 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen DeadMachineInstructionElim.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp + EarlyIfConversion.cpp EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp @@ -60,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 7b015d850d..fb2c2e83f1 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeCalculateSpillWeightsPass(Registry); initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); initializeFinalizeMachineBundlesPass(Registry); diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp new file mode 100644 index 0000000000..f9347efdb0 --- /dev/null +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -0,0 +1,803 @@ +//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Early if-conversion is for out-of-order CPUs that don't have a lot of +// predicable instructions. The goal is to eliminate conditional branches that +// may mispredict. +// +// Instructions from both sides of the branch are executed specutatively, and a +// cmov instruction selects the result. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt<unsigned> +BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +//===----------------------------------------------------------------------===// +// SSAIfConv +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +namespace { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg, FReg; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles, TCycles, FCycles; + + PHIInfo(MachineInstr *phi) + : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + }; + + SmallVector<PHIInfo, 8> PHIs; + +private: + /// The branch condition determined by AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; + + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet<MachineInstr*, 8> InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet<unsigned> LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); + } + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); +}; +} // end anonymous namespace + + +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << *I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I->mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << *I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << *I); + return false; + } + + // Check for any dependencies on Head instructions. + for (MIOperands MO(I); MO.isValid(); ++MO) { + if (MO->isRegMask()) { + DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + + // Remember clobbered regunits. + if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI)) + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + if (DefMI->isTerminator()) { + DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + } + return true; +} + + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector<unsigned, 8> Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(I)) { + DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (MIOperands MO(I); MO.isValid(); ++MO) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO->isDef()) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + LiveRegUnits.erase(*Units); + // Unless I reads Reg. + if (MO->readsReg()) + Reads.push_back(Reg); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); + ++Units) + if (ClobberedRegUnits.test(*Units)) + LiveRegUnits.insert(*Units); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + DEBUG({ + dbgs() << "Would clobber"; + for (SparseSet<unsigned>::const_iterator + i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) + dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + + + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = 0; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << "/BB#" << Succ1->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + } + + // This is a triangle or a diamond. + // If Tail doesn't have any phis, there must be side effects. + if (Tail->empty() || !Tail->front().isPHI()) { + DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + return false; + } + + // AnalyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i+1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i+1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, + PI.CondCycles, PI.TCycles, PI.FCycles)) { + DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); + unsigned DstReg = PI.PHI->getOperand(0).getReg(); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = 0; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB); + if (TBB != Tail) + TBB->removeSuccessor(Tail); + if (FBB != Tail) + FBB->removeSuccessor(Tail); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // Erase the now empty conditional blocks. It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemovedBlocks.push_back(TBB); + TBB->eraseFromParent(); + } + if (FBB != Tail) { + RemovedBlocks.push_back(FBB); + FBB->eraseFromParent(); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() + << " into head BB#" << Head->getNumber() << '\n'); + Head->splice(Head->end(), Tail, + Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemovedBlocks.push_back(Tail); + Tail->eraseFromParent(); + } else { + // We need a branch to Tail, let code placement work it out later. + DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector<MachineOperand, 0> EmptyCond; + TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + DEBUG(dbgs() << *Head); +} + + +//===----------------------------------------------------------------------===// +// EarlyIfConverter Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfConverter : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + bool tryConvertIf(MachineBasicBlock*); + void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); + void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +char EarlyIfConverter::ID = 0; +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) + +void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + } + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { + if (!Loops) + return; + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector<MachineBasicBlock*, 4> RemovedBlocks; + IfConv.convertIf(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis<MachineDominatorTree>(); + Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (po_iterator<MachineDominatorTree*> + I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) + if (tryConvertIf(I->getBlock())) + Changed = true; + + MF.verify(this, "After early if-conversion"); + return Changed; +} diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index b14afc286d..7a17331ba1 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); + DEBUG(dbgs() << "subreg: " << *CopyMI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 4c7f5d8c88..07e37af57f 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1081,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); + // Some (out-of-tree) targets have EC reload instructions. + if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) + if (MO->isEarlyClobber()) + LoadIdx = LoadIdx.getRegSlot(true); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 8990360a72..0a795e644c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { valnos.pop_back(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->setIsUnused(true); + ValNo->markUnused(); } } @@ -196,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // If NewEnd was in the middle of an interval, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. - Ranges::iterator Next = llvm::next(I); - if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { - I->end = Next->end; - ranges.erase(Next); + if (MergeTo != ranges.end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + I->end = MergeTo->end; + ++MergeTo; } + + // Erase any dead ranges. + ranges.erase(llvm::next(I), MergeTo); } @@ -381,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other, const int *RHSValNoAssignments, SmallVector<VNInfo*, 16> &NewVNInfo, MachineRegisterInfo *MRI) { + verify(); + // Determine if any of our live range values are mapped. This is uncommon, so // we want to avoid the interval scan if not. bool MustMapCurValNos = false; @@ -448,14 +450,148 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. - iterator InsertPos = begin(); unsigned RangeNo = 0; for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { // Map the valno in the other live range to the current live range. I->valno = NewVNInfo[OtherAssignments[RangeNo]]; assert(I->valno && "Adding a dead range?"); - InsertPos = addRangeFrom(*I, InsertPos); } + mergeIntervalRanges(Other); + + verify(); +} + +/// \brief Helper function for merging in another LiveInterval's ranges. +/// +/// This is a helper routine implementing an efficient merge of another +/// LiveIntervals ranges into the current interval. +/// +/// \param LHSValNo If non-NULL, set as the new value number for every range +/// from RHS which is merged into the LHS. +/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value +/// number maches this value number will be merged into LHS. +void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, + VNInfo *LHSValNo, + const VNInfo *RHSValNo) { + if (RHS.empty()) + return; + + // Ensure we're starting with a valid range. Note that we don't verify RHS + // because it may have had its value numbers adjusted in preparation for + // merging. + verify(); + + // The strategy for merging these efficiently is as follows: + // + // 1) Find the beginning of the impacted ranges in the LHS. + // 2) Create a new, merged sub-squence of ranges merging from the position in + // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS + // entries being merged will be copied into this new range. + // 3) Replace the relevant section in LHS with these newly merged ranges. + // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. + // + // We don't follow the typical in-place merge strategy for sorted ranges of + // appending the new ranges to the back and then using std::inplace_merge + // because one step of the merge can both mutate the original elements and + // remove elements from the original. Essentially, because the merge includes + // collapsing overlapping ranges, a more complex approach is required. + + // We do an initial binary search to optimize for a common pattern: a large + // LHS, and a very small RHS. + const_iterator RI = RHS.begin(), RE = RHS.end(); + iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); + + // Merge into NewRanges until one of the ranges is exhausted. + SmallVector<LiveRange, 4> NewRanges; + + // Keep track of where to begin the replacement. + iterator ReplaceI = LI; + + // If there are preceding ranges in the LHS, put the last one into NewRanges + // so we can optionally extend it. Adjust the replacement point accordingly. + if (LI != begin()) { + ReplaceI = llvm::prior(LI); + NewRanges.push_back(*ReplaceI); + } + + // Now loop over the mergable portions of both LHS and RHS, merging into + // NewRanges. + while (LI != LE && RI != RE) { + // Skip incoming ranges with the wrong value. + if (RHSValNo && RI->valno != RHSValNo) { + ++RI; + continue; + } + + // Select the first range. We pick the earliest start point, and then the + // largest range. + LiveRange R = *LI; + if (*RI < R) { + R = *RI; + ++RI; + if (LHSValNo) + R.valno = LHSValNo; + } else { + ++LI; + } + + if (NewRanges.empty()) { + NewRanges.push_back(R); + continue; + } + + LiveRange &LastR = NewRanges.back(); + if (R.valno == LastR.valno) { + // Try to merge this range into the last one. + if (R.start <= LastR.end) { + LastR.end = std::max(LastR.end, R.end); + continue; + } + } else { + // We can't merge ranges across a value number. + assert(R.start >= LastR.end && + "Cannot overlap two LiveRanges with differing ValID's"); + } + + // If all else fails, just append the range. + NewRanges.push_back(R); + } + assert(RI == RE || LI == LE); + + // Check for being able to merge into the trailing sequence of ranges on the LHS. + if (!NewRanges.empty()) + for (; LI != LE && (LI->valno == NewRanges.back().valno && + LI->start <= NewRanges.back().end); + ++LI) + NewRanges.back().end = std::max(NewRanges.back().end, LI->end); + + // Replace the ranges in the LHS with the newly merged ones. It would be + // really nice if there were a move-supporting 'replace' directly in + // SmallVector, but as there is not, we pay the price of copies to avoid + // wasted memory allocations. + SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(), + NRE = NewRanges.end(); + for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) + *ReplaceI = *NRI; + if (NRI == NRE) + ranges.erase(ReplaceI, LI); + else + ranges.insert(LI, NRI, NRE); + + // And finally insert any trailing end of RHS (if we have one). + for (; RI != RE; ++RI) { + LiveRange R = *RI; + if (LHSValNo) + R.valno = LHSValNo; + if (!ranges.empty() && + ranges.back().valno == R.valno && R.start <= ranges.back().end) + ranges.back().end = std::max(ranges.back().end, R.end); + else + ranges.push_back(R); + } + + // Ensure we finished with a valid new sequence of ranges. + verify(); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -464,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } + mergeIntervalRanges(RHS, LHSValNo); } - /// MergeValueInAsValue - Merge all of the live ranges of a specific val# /// in RHS into this live interval as the specified value number. /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the /// current interval, it will replace the value numbers of the overlaped /// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue( - const LiveInterval &RHS, - const VNInfo *RHSValNo, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - if (I->valno != RHSValNo) - continue; - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { + mergeIntervalRanges(RHS, LHSValNo, RHSValNo); } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -549,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } - // Merge the relevant flags. - V2->mergeFlags(V1); - // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -575,6 +690,8 @@ void LiveInterval::Copy(const LiveInterval &RHS, const LiveRange &LR = RHS.ranges[i]; addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); } + + verify(); } unsigned LiveInterval::getSize() const { @@ -617,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const { } else { OS << vni->def; if (vni->isPHIDef()) - OS << "-phidef"; - if (vni->hasPHIKill()) - OS << "-phikill"; + OS << "-phi"; } } } @@ -629,6 +744,23 @@ void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#ifndef NDEBUG +void LiveInterval::verify() const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + assert(I->start.isValid()); + assert(I->end.isValid()); + assert(I->start < I->end); + assert(I->valno != 0); + assert(I->valno == valnos[I->valno->id]); + if (llvm::next(I) != E) { + assert(I->end <= llvm::next(I)->start); + if (I->end == llvm::next(I)->start) + assert(I->valno != llvm::next(I)->valno); + } + } +} +#endif + void LiveRange::print(raw_ostream &os) const { os << *this; @@ -690,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - if (MO.isUse() && MO.isUndef()) - continue; // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = Idx.getRegSlot(MO.isUse()); - const VNInfo *VNI = LI.getVNInfoAt(Idx); - // FIXME: We should be able to assert(VNI) here, but the coalescer leaves - // dangling defs around. + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) continue; MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 819707f59f..d0f8ae1af3 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -38,7 +39,13 @@ #include <cmath> using namespace llvm; +// Switch to the new experimental algorithm for computing live intervals. +static cl::opt<bool> +NewLiveIntervals("new-live-intervals", cl::Hidden, + cl::desc("Use new algorithm forcomputing live intervals")); + char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { AllocatableRegs = TRI->getAllocatableSet(fn); ReservedRegs = TRI->getReservedRegs(fn); - computeIntervals(); + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + if (NewLiveIntervals) { + // This is the new way of computing live intervals. + // It is independent of LiveVariables, and it can run at any time. + computeVirtRegs(); + computeRegMasks(); + } else { + // This is the old way of computing live intervals. + // It depends on LiveVariables. + computeIntervals(); + } computeLiveInRegUnits(); DEBUG(dump()); @@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the @@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, VNInfoAllocator); - ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); @@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { llvm_unreachable("Multiply defined register"); @@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { } +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first;; + } +} + //===----------------------------------------------------------------------===// // Register Unit Liveness //===----------------------------------------------------------------------===// @@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - VNI->setIsUnused(true); + VNI->markUnused(); NewLI.removeRange(*LII); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; @@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { return MBB1 == MBB2 ? MBB1 : NULL; } +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 3e5d7f6c29..d828f25932 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { .getRegSlot(I.getOperand().isEarlyClobber()); // Create the def in LI. This may find an existing def. - VNInfo *VNI = LI->createDeadDef(Idx, *Alloc); - VNI->setIsPHIDef(MI->isPHI()); + LI->createDeadDef(Idx, *Alloc); } } @@ -95,7 +94,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { Idx = Idx.getRegSlot(true); } } - extend(LI, Idx); + extend(LI, Idx, Reg); } } @@ -129,7 +128,8 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { void LiveRangeCalc::extend(LiveInterval *LI, - SlotIndex Kill) { + SlotIndex Kill, + unsigned PhysReg) { assert(LI && "Missing live range"); assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); @@ -146,7 +146,7 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill); + VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); // When there were multiple different values, we may need new PHIs. if (!VNI) @@ -169,7 +169,8 @@ void LiveRangeCalc::calculateValues() { VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, - SlotIndex Kill) { + SlotIndex Kill, + unsigned PhysReg) { // Blocks where LI should be live-in. SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); @@ -180,7 +181,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = WorkList[i]; - assert(!MBB->pred_empty() && "Value live-in to entry block?"); + +#ifndef NDEBUG + if (MBB->pred_empty()) { + MBB->getParent()->verify(); + llvm_unreachable("Use not jointly dominated by defs."); + } + + if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && + !MBB->isLiveIn(PhysReg)) { + MBB->getParent()->verify(); + errs() << "The register needs to be live in to BB#" << MBB->getNumber() + << ", but is missing from the live-in list.\n"; + llvm_unreachable("Invalid global physical register"); + } +#endif + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; @@ -303,7 +319,6 @@ void LiveRangeCalc::updateSSA() { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); - VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 3c9e40f0fe..909829b228 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -105,9 +105,12 @@ class LiveRangeCalc { /// to be live-in are added to LiveIn. If a unique reaching def is found, /// its value is returned, if Kill is jointly dominated by multiple values, /// NULL is returned. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, - SlotIndex Kill); + SlotIndex Kill, + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -157,7 +160,9 @@ public: /// Kill is not dominated by a single existing value, PHI-defs are inserted /// as required to preserve SSA form. If Kill is known to be dominated by a /// single existing value, Alloc may be null. - void extend(LiveInterval *LI, SlotIndex Kill); + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 261d860e01..b4ce9aa8c1 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -177,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (!DefMI || !UseMI) return false; + // Since we're moving the DefMI load, make sure we're not extending any live + // ranges. + if (!allUsesAvailableAt(DefMI, + LIS.getInstructionIndex(DefMI), + LIS.getInstructionIndex(UseMI))) + return false; + + // We also need to make sure it is safe to move the load. + // Assume there are stores between DefMI and UseMI. + bool SawStore = true; + if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + return false; + DEBUG(dbgs() << "Try to fold single def: " << *DefMI << " into single use: " << *UseMI); @@ -226,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Collect virtual registers to be erased after MI is gone. SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), @@ -233,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + ReadsPhysRegs = true; continue; + } LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and @@ -258,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } - if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } // Erase any virtregs that are now empty and unused. There may be <undef> // uses around. Keep the empty live range in that case. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index fa971e7c3a..a7c9f46f4d 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. - N->RemoveRegOperandsFromUseLists(); + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(0); @@ -311,8 +312,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { if (!succ_empty()) { if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } OS << '\n'; } } @@ -478,18 +482,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) { void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New) { - uint32_t weight = 0; - succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + if (Old == New) + return; - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(SI); - weight = *WI; + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); - // Update the successor information. - removeSuccessor(SI); - addSuccessor(New, weight); + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); } void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { @@ -508,14 +536,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t weight = 0; - + uint32_t Weight = 0; // If Weight list is empty it means we don't use it (disabled optimization). if (!fromMBB->Weights.empty()) - weight = *fromMBB->Weights.begin(); + Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, weight); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); } } @@ -527,7 +554,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. @@ -541,9 +571,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } } +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); - return I != Successors.end(); + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -674,7 +707,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Inherit live-ins from the successor for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), - E = Succ->livein_end(); I != E; ++I) + E = Succ->livein_end(); I != E; ++I) NMBB->addLiveIn(*I); // Update LiveVariables. @@ -910,12 +943,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { if (Weights.empty()) return 0; - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - return *getWeightIterator(I); + return *getWeightIterator(Succ); } /// getWeightIterator - Return wight iterator corresonding to the I successor diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 5a15f92a18..c4dca2cd15 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0cc1af0795..447921147f 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight; } @@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Sum = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight / Scale; } assert(Sum <= UINT32_MAX); return Sum; } -uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; return Weight; } +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { MachineBasicBlock *MaxSucc = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); if (Weight > MaxWeight) { MaxWeight = Weight; MaxSucc = *I; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f2539baabd..896461fd19 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -84,7 +84,7 @@ namespace { bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const ; + MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, @@ -100,8 +100,7 @@ namespace { void ExitScope(MachineBasicBlock *MBB); bool ProcessBlock(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -216,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRefs.insert(*AI); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); } @@ -325,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -395,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -436,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; - SmallSet<unsigned,8> PhysRefs; + SmallSet<unsigned, 8> PhysRefs; SmallVector<unsigned, 2> PhysDefs; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; @@ -464,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } @@ -487,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // within the register class of the new instruction. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } @@ -502,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -521,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; @@ -536,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { /// up the dominator tree to destroy ancestors which are now done. void MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { if (OpenChildren[Node]) return; @@ -545,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, ExitScope(Node->getBlock()); // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { + while (MachineDomTreeNode *Parent = Node->getIDom()) { unsigned Left = --OpenChildren[Parent]; if (Left != 0) break; @@ -557,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; CurrVN = 0; @@ -572,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { OpenChildren[Node] = NumChildren; for (unsigned i = 0; i != NumChildren; ++i) { MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; WorkList.push_back(Child); } } while (!WorkList.empty()); @@ -585,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { EnterScope(MBB); Changed |= ProcessBlock(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. - ExitScopeIfDone(Node, OpenChildren, ParentMap); + ExitScopeIfDone(Node, OpenChildren); } return Changed; diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index dbda93e4fd..0102ac708d 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" @@ -42,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) { OS << "# " << Banner << ":\n"; - MF.print(OS); + MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); return false; } }; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 5d16c20460..b166849946 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -47,55 +47,6 @@ using namespace llvm; // MachineOperand Implementation //===----------------------------------------------------------------------===// -/// AddRegOperandToRegInfo - Add this register operand to the specified -/// MachineRegisterInfo. If it is null, then the next/prev fields should be -/// explicitly nulled out. -void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { - assert(isReg() && "Can only add reg operand to use lists"); - - // If the reginfo pointer is null, just explicitly null out or next/prev - // pointers, to ensure they are not garbage. - if (RegInfo == 0) { - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; - return; - } - - // Otherwise, add this operand to the head of the registers use/def list. - MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - - // For SSA values, we prefer to keep the definition at the start of the list. - // we do this by skipping over the definition if it is at the head of the - // list. - if (*Head && (*Head)->isDef()) - Head = &(*Head)->Contents.Reg.Next; - - Contents.Reg.Next = *Head; - if (Contents.Reg.Next) { - assert(getReg() == Contents.Reg.Next->getReg() && - "Different regs on the same list!"); - Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; - } - - Contents.Reg.Prev = Head; - *Head = this; -} - -/// RemoveRegOperandFromRegInfo - Remove this register operand from the -/// MachineRegisterInfo it is linked with. -void MachineOperand::RemoveRegOperandFromRegInfo() { - assert(isOnRegUseList() && "Reg operand is not on a use list"); - // Unlink this from the doubly linked list of operands. - MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; - if (NextOp) { - assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); - NextOp->Contents.Reg.Prev = Contents.Reg.Prev; - } - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; -} - void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. @@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { - RemoveRegOperandFromRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); SmallContents.RegNo = Reg; - AddRegOperandToRegInfo(&MF->getRegInfo()); + MRI.addRegOperandToUseList(this); return; } @@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { setReg(Reg); } +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. - if (isReg() && getParent() && getParent()->getParent() && - getParent()->getParent()->getParent()) - RemoveRegOperandFromRegInfo(); + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; @@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the // register's use/def lists. - if (isReg()) { - assert(!isEarlyClobber()); - setReg(Reg); - } else { - // Otherwise, change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - AddRegOperandToRegInfo(&MF->getRegInfo()); - } + if (RegInfo && isReg()) + RegInfo->removeRegOperandFromUseList(this); + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -182,11 +151,18 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; - SubReg = 0; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); } /// isIdenticalTo - Return true if this operand is identical to the specified -/// operand. +/// operand. Note that this should stay in sync with the hash_value overload +/// below. bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { if (getType() != Other.getType() || getTargetFlags() != Other.getTargetFlags()) @@ -207,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_FrameIndex: return getIndex() == Other.getIndex(); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); case MachineOperand::MO_JumpTableIndex: return getIndex() == Other.getIndex(); @@ -227,6 +204,47 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { llvm_unreachable("Invalid machine operand type"); } +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), + MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getBlockAddress()); + case MachineOperand::MO_RegisterMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + } + llvm_unreachable("Invalid machine operand type"); +} + /// print - Print the specified machine operand. /// void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -312,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; case MachineOperand::MO_JumpTableIndex: OS << "<jt#" << getIndex() << '>'; break; @@ -609,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists() { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); - } + MRI.removeRegOperandFromUseList(&Operands[i]); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(&RegInfo); - } + MRI.addRegOperandToUseList(&Operands[i]); } - /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list @@ -654,13 +674,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; if (RegInfo) - Operands[OpNo].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). - assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) && + // RegMask operands go between the explicit and implicit operands. + assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || + OpNo < MCID->getNumOperands()) && "Trying to add an operand to a machine instr that is already done!"); // All operands from OpNo have been removed from RegInfo. If the Operands @@ -669,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); // Insert the new operand at OpNo. Operands.insert(Operands.begin() + OpNo, Op); @@ -680,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); // When adding a register operand, tell RegInfo about it. if (Operands[OpNo].isReg()) { - // Add the new operand to RegInfo, even when RegInfo is NULL. - // This will initialize the linked list pointers. - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // Ensure isOnRegUseList() returns false, regardless of Op's status. + Operands[OpNo].Contents.Reg.Prev = 0; + // Add the new operand to RegInfo. + if (RegInfo) + RegInfo->addRegOperandToUseList(&Operands[OpNo]); // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); @@ -696,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (RegInfo) { for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -706,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. - if (Operands.back().isReg() && Operands.back().isOnRegUseList()) - Operands.back().RemoveRegOperandFromRegInfo(); + if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) + RegInfo->removeRegOperandFromUseList(&Operands.back()); Operands.pop_back(); return; @@ -720,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { // Otherwise, we are removing an interior operand. If we have reginfo to // update, remove all operands that will be shifted down from their reg lists, // move everything down, then re-add them. - MachineRegisterInfo *RegInfo = getRegInfo(); if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); } } @@ -733,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -1852,52 +1876,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { // Build up a buffer of hash code components. - // - // FIXME: This is a total hack. We should have a hash_value overload for - // MachineOperand, but currently that doesn't work because there are many - // different ideas of "equality" and thus different sets of information that - // contribute to the hash code. This one happens to want to take a specific - // subset. And it's still not clear that this routine uses the *correct* - // subset of information when computing the hash code. The goal is to use the - // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to - // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would - // be very useful to factor the selection of relevant inputs out of the two - // functions and into a common routine, but it's not clear how that can be - // done. SmallVector<size_t, 8> HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - switch (MO.getType()) { - default: break; - case MachineOperand::MO_Register: - if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; // Skip virtual register defs. - HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); - break; - case MachineOperand::MO_Immediate: - HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); - break; - case MachineOperand::MO_FrameIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_JumpTableIndex: - HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); - break; - case MachineOperand::MO_MachineBasicBlock: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); - break; - case MachineOperand::MO_GlobalAddress: - HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); - break; - case MachineOperand::MO_BlockAddress: - HashComponents.push_back(hash_combine(MO.getType(), - MO.getBlockAddress())); - break; - case MachineOperand::MO_MCSymbol: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); - break; - } + if (MO.isReg() && MO.isDef() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + HashComponents.push_back(hash_value(MO)); } return hash_combine_range(HashComponents.begin(), HashComponents.end()); } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 82e123528a..5fb938f340 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - - // Add a reg, but keep track of whether the vector reallocated or not. - const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); - void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); - - if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) - // The vector reallocated, handle this now. - HandleVRegListReallocation(); return Reg; } @@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() { VRegInfo.clear(); } -/// HandleVRegListReallocation - We just added a virtual register to the -/// VRegInfo info list and it reallocated. Update the use/def lists info -/// pointers. -void MachineRegisterInfo::HandleVRegListReallocation() { - // The back pointers for the vreg lists point into the previous vector. - // Update them to point to their correct slots. - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - MachineOperand *List = VRegInfo[Reg].second; - if (!List) continue; - // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[Reg].second; +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; } } +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { return &*I; } -bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); -} - bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { use_nodbg_iterator UI = use_nodbg_begin(RegNo); if (UI == use_nodbg_end()) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index acb1ee6cb6..076547a5ed 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, } MachineSSAUpdater::~MachineSSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 847bf1e76e..a1dc9481c6 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -212,7 +212,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->startBlock(MBB); // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. RegionEnd points the the scheduling + // region as soon as it is discovered. RegionEnd points the scheduling // boundary at the bottom of the region. The DAG does not include RegionEnd, // but the region does (i.e. the next RegionEnd is above the previous // RegionBegin). If the current block has no terminator then RegionEnd == @@ -403,7 +403,8 @@ public: /// getIssueWidth - Return the max instructions per scheduling group. unsigned getIssueWidth() const { - return InstrItins ? InstrItins->Props.IssueWidth : 1; + return (InstrItins && InstrItins->SchedModel) + ? InstrItins->SchedModel->IssueWidth : 1; } /// getNumMicroOps - Return the number of issue slots required for this MI. diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1ce546b578..bc383cba45 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -99,6 +99,16 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + // We give successors with smaller loop depth higher priority. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000000..1a3aa60918 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1153 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + ItinData = MF->getTarget().getInstrItineraryData(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + BlockInfo.resize(MF->getNumBlockIDs()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, + Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. + if (I->MI) + DepHeight += TII->computeOperandLatency(ItinData, + MI, MO.getOperandNo(), + I->MI, I->Op); + else + // No UseMI. Just use the MI latency instead. + DepHeight += TII->getInstrLatency(ItinData, MI); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists +/// of all the blocks in Trace. Stop when reaching the block that contains +/// DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps.front().DefMI, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.ItinData, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps[i].DefMI, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, + Dep.DefMI, Dep.DefOp, + PHI, Dep.UseOp, + /* FindMin = */ false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 0000000000..c5b86f31db --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,341 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InstrItineraryData; +class MachineBasicBlock; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *ItinData; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + void verifyAnalysis() const; + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// A virtual register or regunit required by a basic block or its trace + /// successors. + struct LiveInReg { + /// The virtual register required, or a register unit. + unsigned Reg; + + /// For virtual registers: Minimum height of the defining instruction. + /// For regunits: Height of the highest user in the trace. + unsigned Height; + + LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + }; + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + /// Valid when hasValidDepth(). + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + /// Valid when hasValidHeight(). + const MachineBasicBlock *Succ; + + /// The block number of the head of the trace. (When hasValidDepth()). + unsigned Head; + + /// The block number of the tail of the trace. (When hasValidHeight()). + unsigned Tail; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : + Pred(0), Succ(0), + InstrDepth(~0u), InstrHeight(~0u), + HasValidInstrDepths(false), HasValidInstrHeights(false) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + + // Data-dependency-related information. Per-instruction depth and height + // are computed from data dependencies in the current trace, using + // itinerary data. + + /// Instruction depths have been computed. This implies hasValidDepth(). + bool HasValidInstrDepths; + + /// Instruction heights have been computed. This implies hasValidHeight(). + bool HasValidInstrHeights; + + /// Critical path length. This is the number of cycles in the longest data + /// dependency chain through the trace. This is only valid when both + /// HasValidInstrDepths and HasValidInstrHeights are set. + unsigned CriticalPath; + + /// Live-in registers. These registers are defined above the current block + /// and used by this block or a block below it. + /// This does not include PHI uses in the current block, but it does + /// include PHI uses in deeper blocks. + SmallVector<LiveInReg, 4> LiveIns; + + void print(raw_ostream&) const; + }; + + /// InstrCycles represents the cycle height and depth of an instruction in a + /// trace. + struct InstrCycles { + /// Earliest issue cycle as determined by data dependencies and instruction + /// latencies from the beginning of the trace. Data dependencies from + /// before the trace are not included. + unsigned Depth; + + /// Minimum number of cycles from this instruction is issued to the of the + /// trace, as determined by data dependencies and instruction latencies. + unsigned Height; + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + + /// Return the resource depth of the top/bottom of the trace center block. + /// This is the number of cycles required to execute all instructions from + /// the trace head to the trace center block. The resource depth only + /// considers execution resources, it ignores data dependencies. + /// When Bottom is set, instructions in the trace center block are included. + unsigned getResourceDepth(bool Bottom) const; + + /// Return the resource length of the trace. This is the number of cycles + /// required to execute the instructions in the trace if they were all + /// independent, exposing the maximum instruction-level parallelism. + /// + /// Any blocks in Extrablocks are included as if they were part of the + /// trace. + unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks = + ArrayRef<const MachineBasicBlock*>()) const; + + /// Return the length of the (data dependency) critical path through the + /// trace. + unsigned getCriticalPath() const { return TBI.CriticalPath; } + + /// Return the depth and height of MI. The depth is only valid for + /// instructions in or above the trace center block. The height is only + /// valid for instructions in or below the trace center block. + InstrCycles getInstrCycles(const MachineInstr *MI) const { + return TE.Cycles.lookup(MI); + } + + /// Return the slack of MI. This is the number of cycles MI can be delayed + /// before the critical path becomes longer. + /// MI must be an instruction in the trace center block. + unsigned getInstrSlack(const MachineInstr *MI) const; + + /// Return the Depth of a PHI instruction in a trace center block successor. + /// The PHI does not have to be part of the trace. + unsigned getPHIDepth(const MachineInstr *PHI) const; + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector<TraceBlockInfo, 4> BlockInfo; + DenseMap<const MachineInstr*, InstrCycles> Cycles; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); + void computeInstrDepths(const MachineBasicBlock*); + void computeInstrHeights(const MachineBasicBlock*); + void addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace); + + protected: + MachineTraceMetrics &MTM; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + const MachineLoop *getLoopFor(const MachineBasicBlock*) const; + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() const =0; + void print(raw_ostream&) const; + void invalidate(const MachineBasicBlock *MBB); + void verify() const; + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + /// + /// This invalidates per-block information about resource usage for MBB only, + /// and it invalidates per-trace information for any trace that passes + /// through MBB. + /// + /// Call Ensemble::getTrace() again to update any trace handles. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector<FixedBlockInfo, 4> BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Ensemble &En) { + En.print(OS); + return OS; +} +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 45ce3ab28b..852c169254 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -73,8 +73,10 @@ namespace { typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; BitVector regsReserved; BitVector regsAllocatable; @@ -117,6 +119,9 @@ namespace { // block. This set is disjoint from regsLiveOut. RegSet vregsRequired; + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -203,6 +208,10 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); @@ -212,6 +221,10 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getName() - << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")"; + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg, *OS << "\n"; } +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() { regsAllocatable = TRI->getAllocatableSet(*MF); markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } } // Does iterator point to a and b as the first two elements? @@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); @@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() != 2) { + } if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { @@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() != 2) { + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { @@ -651,10 +737,10 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); - const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // The first MCID.NumDefs operands must be explicit register defines if (MONum < MCID.getNumDefs()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef() && !MCOI.isOptionalDef()) @@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); } else if (MONum < MCID.getNumOperands()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && @@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (MO->readsReg()) { regsLiveInButUnused.erase(Reg); - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) + if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. @@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } @@ -837,6 +938,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Reserved registers may be used even when 'dead'. if (!isReserved(Reg)) report("Using an undefined physical register", MO, MONum); + } else if (MRI->def_empty(Reg)) { + report("Reading virtual register without a def", MO, MONum); } else { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; // We don't know which virtual registers are live in, so only complain @@ -1057,8 +1160,8 @@ void MachineVerifier::visitMachineFunctionAfter() { I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; ++I) if (MInfo.regsKilled.count(*I)) { - report("Virtual register killed in block, but needed live out.", MFI); - *OS << "Virtual register " << PrintReg(*I) + report("Virtual register killed in block, but needed live out.", MFI); + *OS << "Virtual register " << PrintReg(*I) << " is used after the block.\n"; } } @@ -1122,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() { const LiveInterval &LI = LiveInts->getInterval(Reg); assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) { - VNInfo *VNI = *I; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} - if (!DefVNI) { - if (!VNI->isUnused()) { - report("Valno not live at def and not marked unused", MF); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } - continue; - } +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; - if (VNI->isUnused()) - continue; + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (DefVNI != VNI) { - report("Live range at def has different valno", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; - continue; - } + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); - if (!MBB) { - report("Invalid definition index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } - if (VNI->isPHIDef()) { - if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() - << " in " << LI << '\n'; - } - } else { - // Non-PHI def. - const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); - if (!MI) { - report("No instruction at def index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->regsOverlap(LI.reg, MOI->getReg())) - continue; - } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF); - I->print(*OS); - *OS << " has a valno not in " << LI << '\n'; - } + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} - if (VNI->isUnused()) { - report("Live range valno is marked unused", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - } +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); - if (!MBB) { - report("Bad start of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } - SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } - const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); - if (!EndMBB) { - report("Bad end of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } - // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) - continue; + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) continue; - } + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } - // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", MI); + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); I->print(*OS); *OS << " in " << LI << '\n'; } - - if (I->end.isDead()) { - // Segment ends on the dead slot. - // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == E || (I+1)->start != I->end) { - report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; } + } + } - // The following checks only apply to virtual registers. Physreg liveness - // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - bool hasRead = false; - bool hasDeadDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) - continue; - if (MOI->readsReg()) - hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; - } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { - if (!hasRead) { - report("Instruction ending live range doesn't read the register", - MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - } + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } - // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { - // Not live-in to any blocks. - if (MBB == EndMBB) - continue; - // Skip this block. - ++MFI; + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; } - for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); - // We don't know how to track physregs into a landing pad. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && - MFI->isLandingPad()) { - if (&*MFI == EndMBB) - break; - ++MFI; - continue; - } - // Is VNI a PHI-def in the current block? - bool IsPHI = VNI->isPHIDef() && - VNI->def == LiveInts->getMBBStartIdx(MFI); - - // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - - // All predecessors must have a live-out value. - if (!PVNI) { - report("Register not marked live out of predecessor", *PI); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << " in " << LI << '\n'; - continue; - } - - // Only PHI-defs can take different predecessor values. - if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " - << PrintReg(Reg) << ": " << LI << '\n'; - } - } - if (&*MFI == EndMBB) - break; - ++MFI; + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; } } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} - // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF); - *OS << NumComp << " components in " << LI << '\n'; - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; } } } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b820578d6c..e6e23da27c 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -194,7 +194,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assuption that it needs to be lowered in a way that supports +/// under the assumption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// @@ -355,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode( // add a kill marker in this block saying that it kills the incoming value! if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the - // register. In most cases this is the copy, however, the first - // terminator instruction at the end of the block may also use the value. - // In this case, we should mark *it* as being the killing block, not the - // copy. - MachineBasicBlock::iterator KillInst; - MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); - if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { - KillInst = Term; - - // Check that no other terminators use values. -#ifndef NDEBUG - for (MachineBasicBlock::iterator TI = llvm::next(Term); - TI != opBlock.end(); ++TI) { - if (TI->isDebugValue()) - continue; - assert(!TI->readsRegister(SrcReg) && - "Terminator instructions cannot use virtual registers unless" - "they are the first terminator in a block!"); - } -#endif - } else if (reusedIncoming || !IncomingReg) { - // We may have to rewind a bit if we didn't insert a copy this time. - KillInst = Term; - while (KillInst != opBlock.begin()) { - --KillInst; - if (KillInst->isDebugValue()) - continue; - if (KillInst->readsRegister(SrcReg)) - break; + // register. In most cases this is the copy, however, terminator + // instructions at the end of the block may also use the value. In this + // case, we should mark the last such terminator as being the killing + // block, not the copy. + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); } - } else { - // We just inserted this copy. - KillInst = prior(InsertPos); } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -427,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); + bool Changed = false; for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); - // We break edges when registers are live out from the predecessor block - // (not considering PHI nodes). If the register is live in to this block - // anyway, we would gain nothing from splitting. + // Is there a critical edge from PreMBB to MBB? + if (PreMBB->succ_size() == 1) + continue; + // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB != &MBB && - !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { - if (!MLI || - !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && - MLI->isLoopHeader(&MBB))) { - if (PreMBB->SplitCriticalEdge(&MBB, this)) { - Changed = true; - ++NumCriticalEdgesSplit; - } - } + if (PreMBB == &MBB) + continue; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + if (IsLoopHeader && PreLoop == CurLoop) + continue; + + // LV doesn't consider a phi use live-out, so isLiveOut only returns true + // when the source register is live-out for some other reason than a phi + // use. That means the copy we will insert in PreMBB won't be a kill, and + // there is a risk it may not be coalesced away. + // + // If the copy would be a kill, there is no need to split the edge. + if (!LV.isLiveOut(Reg, *PreMBB)) + continue; + + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + + // If Reg is not live-in to MBB, it means it must be live-in to some + // other PreMBB successor, and we can avoid the interference by splitting + // the edge. + // + // If Reg *is* live-in to MBB, the interference is inevitable and a copy + // is likely to be left after coalescing. If we are looking at a loop + // exiting edge, split it so we won't insert code in the loop, otherwise + // don't bother. + bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + + // Check for a loop exiting edge. + if (!ShouldSplit && CurLoop != PreLoop) { + DEBUG({ + dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; + if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + }); + // This edge could be entering a loop, exiting a loop, or it could be + // both: Jumping directly form one loop to the header of a sibling + // loop. + // Split unless this edge is entering CurLoop from an outer loop. + ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); + } + if (!ShouldSplit) + continue; + if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + continue; } + Changed = true; + ++NumCriticalEdgesSplit; } } return Changed; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 9693780bda..56526f2732 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -49,6 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden, + cl::desc("Enable Early If-conversion")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, @@ -86,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Experimental option to run live inteerval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -154,6 +160,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { if (StandardID == &DeadMachineInstructionElimID) return applyDisable(TargetID, DisableMachineDCE); + if (StandardID == &EarlyIfConverterID) + return applyDisable(TargetID, !EnableEarlyIfConversion); + if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisableMachineLICM); @@ -228,6 +237,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); + // Disable early if-conversion. Targets that are ready can enable it. + disablePass(&EarlyIfConverterID); + // Temporarily disable experimental passes. substitutePass(&MachineSchedulerID, 0); } @@ -425,9 +437,6 @@ void TargetPassConfig::addISelPrepare() { /// TODO: We could use a single addPre/Post(ID) hook to allow pass injection /// before/after any target-independent pass. But it's currently overkill. void TargetPassConfig::addMachinePasses() { - // Print the instruction selected machine code... - printAndVerify("After Instruction Selection"); - // Insert a machine instr printer pass after the specified pass. // If -print-machineinstrs specified, print machineinstrs after all passes. if (StringRef(PrintMachineInstrs.getValue()).equals("")) @@ -443,8 +452,12 @@ void TargetPassConfig::addMachinePasses() { insertPass(TID, IID); } + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -527,6 +540,7 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); + addPass(&EarlyIfConverterID); addPass(&MachineLICMID); addPass(&MachineCSEID); addPass(&MachineSinkingID); @@ -639,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&MachineLoopInfoID); addPass(&PHIEliminationID); } + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4af4..9099862bd3 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -108,12 +110,14 @@ namespace { bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); + bool optimizeSelect(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector<MachineOperand, 4> Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { @@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; - bool First = true; - MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { - ++MII; + FoldAsLoadDefReg = 0; continue; } - - if (MI->isBitcast()) { - if (optimizeBitcastInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } - } else if (MI->isCompare()) { - if (optimizeCmpInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; + + if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isSelect() && optimizeSelect(MI))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { @@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } - First = false; - PMII = MII; - ++MII; + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + continue; + } + } } } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8325f20e41..6b3a48eefd 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { - // Check for non-debug uses or defs following MO. - // This is the most likely way to fail - fast path it. - MachineOperand *Next = &MO; - while ((Next = Next->getNextOperandForReg())) - if (!Next->isDebug()) - return false; - // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. - return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6ac5428605..d0cff481cb 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getFunction()->getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 733312fbd0..990633440e 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); + if (BValNo != ValLR->valno) IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the @@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + if (AValNo->isPHIDef() || AValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); if (TargetRegisterInfo::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else @@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // register live range doesn't need to be accurate as long as all the // defs are there. + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + // We don't track kills for reserved registers. MRI->clearKillFlags(CP.getSrcReg()); @@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { ++J; } - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - // Clear kill flags where live ranges are extended. while (!LHSOldKills.empty()) LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 110f478f48..9c1dba355b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - // SSA defs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - // - // FIXME: This optimization is disabled pending PR13112. - //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) - // return; + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) + return; // Add output dependence to the next nearest def of this vreg. // diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 7110b7566a..e675366485 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -72,10 +72,12 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, ReservedScoreboard.reset(ScoreboardDepth); RequiredScoreboard.reset(ScoreboardDepth); + // If MaxLookAhead is not set above, then we are not enabled. if (!isEnabled()) DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); else { - IssueWidth = ItinData->Props.IssueWidth; + // A nonempty itinerary must have a SchedModel. + IssueWidth = ItinData->SchedModel->IssueWidth; DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " << ScoreboardDepth << '\n'); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f16b1e7f4a..10491d98cb 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -228,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -1639,7 +1645,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, - N1.getOperand(0)); + N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && @@ -2340,7 +2346,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeVectorOps) { + && Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2716,6 +2722,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + } + } + + return SDValue(); } @@ -5651,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { @@ -5676,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5696,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold @@ -5728,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + DAG.getNode(ISD::FNEG, dl, VT, N1)); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } } return SDValue(); @@ -5974,6 +6020,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -5999,6 +6077,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -6152,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); @@ -7561,6 +7694,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7568,12 +7706,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // using shuffles. EVT SourceType = MVT::Other; bool AllAnyExt = true; - bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.getOpcode() == ISD::UNDEF) continue; - AllUndef = false; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -7601,9 +7738,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { AllAnyExt &= AnyExt; } - if (AllUndef) - return DAG.getUNDEF(VT); - // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. @@ -7742,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the element type of the input vector is not the same as + // the output element type, make concat_vectors based on input element + // type and then bitcast it to the output vector type. + // + // In another words avoid nodes like this: + // <NODE> v16i8 = concat_vectors v4i16 v4i16 + // Replace it with this one: + // <NODE0> v8i16 = concat_vectors v4i16 v4i16 + // <NODE1> v16i8 = bitcast NODE0 + EVT ItemType = VecIn1.getValueType().getVectorElementType(); + if (ItemType != VT.getVectorElementType()) { + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), + ItemType, + VecIn1.getValueType().getVectorNumElements()*2); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); + } else + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } // If VecIn2 is unused then change it to undef. @@ -7779,6 +7933,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (N->getNumOperands() == 1) return N->getOperand(0); + // Check if all of the operands are undefs. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(N->getValueType(0)); + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 07dc36577f..683fac6744 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" @@ -484,7 +485,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += + TotalOffs += TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); @@ -573,7 +574,10 @@ bool FastISel::SelectCall(const User *I) { // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: return true; + case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || @@ -642,7 +646,7 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); - else + else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); @@ -786,13 +790,24 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } - // Remove dead code. However, ignore call instructions since we've flushed + // Remove dead code. However, ignore call instructions since we've flushed // the local value map and recomputed the insert point. if (!isa<CallInst>(I)) { recomputeInsertPt(); @@ -1037,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo) +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), @@ -1046,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo) TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()) { + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { } FastISel::~FastISel() {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index f154271894..4488d2790b 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { return N; } -/// CountOperands - The inputs to target nodes have any actual inputs first, +/// countOperands - The inputs to target nodes have any actual inputs first, /// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. -unsigned InstrEmitter::CountOperands(SDNode *Node) { +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + for (unsigned I = N; I; --I) { + if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) + if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + return N; } @@ -337,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the -/// operand number (in the II) that we are adding. IIOpNum and II are used for -/// assertions only. +/// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, @@ -353,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, const ConstantFP *CFP = F->getConstantFPValue(); MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { @@ -393,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), BA->getTargetFlags())); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), + TI->getOffset(), + TI->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -461,7 +483,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && - SubIdx == DefSubIdx) { + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 @@ -696,7 +719,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); - unsigned NodeOperands = CountOperands(Node); + unsigned NumImpUses = 0; + unsigned NodeOperands = countOperands(Node, NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -705,7 +729,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && - NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index c081f38be0..9eddee9e33 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -105,12 +105,6 @@ public: /// (which do not go into the machine instrs.) static unsigned CountResults(SDNode *Node); - /// CountOperands - The inputs to target nodes have any actual inputs first, - /// followed by an optional chain operand, then flag operands. Compute - /// the number of actual operands that will go into the resulting - /// MachineInstr. - static unsigned CountOperands(SDNode *Node); - /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a36137db4a..908ebb9486 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -70,6 +70,9 @@ private: SDValue OptimizeFloatStore(StoreSDNode *ST); + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform @@ -425,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), @@ -433,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (VT.isFloatingPoint() && LoadedVT != VT) - Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; ChainResult = Chain; @@ -639,9 +643,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // probably means that we need to integrate dag combiner and legalizer // together. // We generally can't do this one for long doubles. - SDValue Tmp1 = ST->getChain(); - SDValue Tmp2 = ST->getBasePtr(); - SDValue Tmp3; + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -649,19 +652,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF(). + SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } @@ -674,11 +677,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, + Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); @@ -689,14 +692,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + DebugLoc dl = Node->getDebugLoc(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + { + SDValue Value = ST->getValue(); + EVT VT = Value.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), + DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + assert(VT.isVector() && "Unknown legal promote case!"); + Value = DAG.getNode(ISD::BITCAST, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + } else { + SDValue Value = ST->getValue(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, + isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + DebugLoc dl = Node->getDebugLoc(); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + EVT VT = Node->getValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, RVal, RChain); + } + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + } + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + ReplacedNode(Node); + } + return; + } + + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: { + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; + } + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + } +} + /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; - DebugLoc dl = Node->getDebugLoc(); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal && @@ -709,9 +1146,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - bool isCustom = false; - // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; @@ -865,19 +1299,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { switch (Action) { case TargetLowering::Legal: return; - case TargetLowering::Custom: + case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) - ResultVals.push_back(Tmp1); + ResultVals.push_back(Res); else - ResultVals.push_back(Tmp1.getValue(i)); + ResultVals.push_back(Res.getValue(i)); } - if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + if (Res.getNode() != Node || Res.getResNo() != 0) { DAG.ReplaceAllUsesWith(Node, ResultVals.data()); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); @@ -885,7 +1319,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } return; } - + } // FALL THROUGH case TargetLowering::Expand: ExpandNode(Node); @@ -909,429 +1343,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::CALLSEQ_END: break; case ISD::LOAD: { - LoadSDNode *LD = cast<LoadSDNode>(Node); - Tmp1 = LD->getChain(); // Legalize the chain. - Tmp2 = LD->getBasePtr(); // Legalize the base pointer. - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) { - EVT VT = Node->getValueType(0); - Tmp3 = SDValue(Node, 0); - Tmp4 = SDValue(Node, 1); - - switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Tmp3, Tmp4); - } - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Tmp3, DAG); - if (Tmp1.getNode()) { - Tmp3 = Tmp1; - Tmp4 = Tmp1.getValue(1); - } - break; - case TargetLowering::Promote: { - // Only promote a load of vector type to another. - assert(VT.isVector() && "Cannot promote this load!"); - // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); - - Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); - Tmp4 = Tmp1.getValue(1); - break; - } - } - if (Tmp4.getNode() != Node) { - assert(Tmp3.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); - ReplacedNode(Node); - } - return; - } - - EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - - if (SrcWidth != SrcVT.getStoreSizeInBits() && - // Some targets pretend to have an i1 loading operation, and actually - // load an i8. This trick is correct for ZEXTLOAD because the top 7 - // bits are guaranteed to be zero; it helps the optimizers understand - // that these bits are zero. It is also useful for EXTLOAD, since it - // tells the optimizers that those bits are undefined. It would be - // nice to have an effective generic way of getting these benefits... - // Until such a way is found, don't insist on promoting i1 here. - (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { - // Promote to a byte-sized load if not loading an integral number of - // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - unsigned NewWidth = SrcVT.getStoreSizeInBits(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); - SDValue Ch; - - // The extra bits are guaranteed to be zero, since we stored them that - // way. A zext load from NVT thus automatically gives zext from SrcVT. - - ISD::LoadExtType NewExtType = - ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - - Ch = Result.getValue(1); // The chain. - - if (ExtType == ISD::SEXTLOAD) - // Having the top bits zero doesn't help when sign extending. - Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); - - Tmp1 = Result; - Tmp2 = Ch; - } else if (SrcWidth & (SrcWidth - 1)) { - // If not loading a power-of-2 number of bits, expand as two loads. - assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Load size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi, Ch; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) - // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } else { - // Big endian - avoid unaligned loads. - // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 - // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } - - Tmp2 = Ch; - } else { - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: - Tmp1 = SDValue(Node, 0); - Tmp2 = SDValue(Node, 1); - - if (isCustom) { - Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp3.getNode()) { - Tmp1 = Tmp3; - Tmp2 = Tmp3.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Tmp1, Tmp2); - } - } - } - break; - case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp2 = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = ValRes; - Tmp2 = Result.getValue(1); - break; - } - } - - // Since loads produce two values, make sure to remember that we legalized - // both of them. - if (Tmp2.getNode() != Node) { - assert(Tmp1.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); - ReplacedNode(Node); - } - break; + return LegalizeLoadOps(Node); } case ISD::STORE: { - StoreSDNode *ST = cast<StoreSDNode>(Node); - Tmp1 = ST->getChain(); - Tmp2 = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - break; - } - - { - Tmp3 = ST->getValue(); - EVT VT = Tmp3.getValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), - DAG, TLI, this); - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - ReplaceNode(SDValue(Node, 0), Tmp1); - break; - case TargetLowering::Promote: { - assert(VT.isVector() && "Unknown legal promote case!"); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, - TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - break; - } - } else { - Tmp3 = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - RoundVT, - isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } - - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); - } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - ReplaceNode(SDValue(Node, 0), Tmp1); - break; - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); - Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - } - break; + return LegalizeStoreOps(Node); } } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75b31f79ad..e8e968aaef 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { unsigned NumElts = InVT.getVectorNumElements(); assert(NumElts == NVT.getVectorNumElements() && "Dst and Src must have the same number of elements"); - EVT EltVT = InVT.getScalarType(); assert(isPowerOf2_32(NumElts) && "Promoted vector type must be a power of two"); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts/2); - - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(0)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(NumElts/2)); EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); @@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETNE); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); SDValue Overflow; Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); @@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, PtrVT), Temp, - MachinePointerInfo(), false, false, 0); + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2321,15 +2317,15 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); TargetLowering:: CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 106b086184..94fc9761ec 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -516,6 +516,7 @@ private: SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a8ff7c65ab..06f6bd63b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); DebugLoc dl = N->getDebugLoc(); // Convert to a vector of the expanded element type, for example @@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT OldVT = N->getValueType(0); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d09411c42f..4709202965 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; - case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; @@ -152,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { NewVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); @@ -447,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; - + // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) return; @@ -459,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split the result of this operator!"); + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: @@ -1007,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split this operator's operand!"); + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; @@ -1233,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { DebugLoc DL = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); - + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); - + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} +} @@ -1785,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (InputWidened) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1846,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, DAG.getIntPtrConstant(0)); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } } @@ -1862,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getIntPtrConstant(i)); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -1966,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { Cond1 = GetWidenedVector(Cond1); if (Cond1.getValueType() != CondWidenVT) - Cond1 = ModifyToType(Cond1, CondWidenVT); + Cond1 = ModifyToType(Cond1, CondWidenVT); } SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -2232,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getIntPtrConstant(0)); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2401,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - isVolatile, - isNonTemporal, isInvariant, - MinAlign(Align, Increment)); + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2593,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); - } while (StWidth != 0 && StWidth >= NewVTWidth); + } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 01622cb295..c3794d5f78 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->Props.IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 67f42b18ee..bf0a43785b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -853,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } /// After backtracking, the hazard checker needs to be restored to a state -/// corresponding the the current cycle. +/// corresponding the current cycle. void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { HazardRec->Reset(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5384576a0c..84e41fc4a1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -61,6 +61,7 @@ namespace llvm { if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a8dce2176f..f4fe8927f6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -219,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) { return true; } +/// allOperandsUndef - Return true if the node has at least one operand +/// and all operands of the specified node are ISD::UNDEF. +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + + return true; +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -387,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -422,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } case ISD::LOAD: { const LoadSDNode *LD = cast<LoadSDNode>(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); break; } case ISD::STORE: { const StoreSDNode *ST = cast<StoreSDNode>(N); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -451,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { const AtomicSDNode *AT = cast<AtomicSDNode>(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); break; } case ISD::VECTOR_SHUFFLE: { @@ -467,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -1084,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1183,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); @@ -1949,6 +2004,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); + KnownOne &= (~KnownZero); return; } case ISD::FGETSIGN: @@ -2427,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2674,6 +2748,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1 == N2) return N1; break; case ISD::CONCAT_VECTORS: + // Concat of UNDEFs is UNDEF. + if (N1.getOpcode() == ISD::UNDEF && + N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. if (N1.getOpcode() == ISD::BUILD_VECTOR && @@ -3879,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -3951,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4007,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4084,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); @@ -4203,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4292,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4359,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4383,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4659,13 +4746,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) continue; - bool NoMatch = false; - for (unsigned i = 2; i != NumVTs; ++i) - if (VTs[i] != I->VTs[i]) { - NoMatch = true; - break; - } - if (!NoMatch) + if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) return *I; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e5d24e9d18..e0a061d8be 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -942,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/Instruction.def" } @@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Update successor info addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1833,7 +1836,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { assert(Fn->getIntrinsicID() == Intrinsic::donothing); - return; // ignore invokes to @llvm.donothing + // Ignore invokes to @llvm.donothing: jump directly to the next BB. } else LowerCallTo(&I, getValue(Callee), false, LandingPad); @@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic store"); @@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; + case Intrinsic::floor: + setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5239,9 +5247,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -5538,6 +5546,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. @@ -5568,150 +5592,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || - (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || - (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType()) { + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } - } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || - (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || - (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) return; - } - } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || - (LibInfo->has(LibFunc::sinf) && Name == "sinf") || - (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) return; - } - } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || - (LibInfo->has(LibFunc::cosf) && Name == "cosf") || - (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) return; - } - } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || - (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || - (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) return; - } - } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || - (LibInfo->has(LibFunc::floorf) && Name == "floorf") || - (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; - } - } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || - (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || - (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; - } - } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || - (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || - (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) return; - } - } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || - (LibInfo->has(LibFunc::rintf) && Name == "rintf") || - (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) return; - } - } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || - (LibInfo->has(LibFunc::truncf) && Name == "truncf") || - (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; - } - } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || - (LibInfo->has(LibFunc::log2f) && Name == "log2f") || - (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) return; - } - } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || - (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || - (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) return; - } - } else if (Name == "memcmp") { + break; + case LibFunc::memcmp: if (visitMemCmpCall(I)) return; + break; } } } @@ -6008,11 +5979,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6805,7 +6776,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Note down frame index. if (FrameIndexSDNode *FI = - dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d0fde6f01d..4090002314 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -520,6 +520,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5233479701..1c357ef375 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: @@ -418,6 +419,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f02e895fcb..9f277fd8b4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -971,7 +971,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo); + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -2973,6 +2973,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getFunction()->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6212918d6c..6820175c1b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -2322,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC = NewC.lshr(ShiftBits); + if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } } if (isa<ConstantFPSDNode>(N0.getNode())) { @@ -2390,25 +2439,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal; + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - switch (getBooleanContents(N0.getValueType().isVector())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); - case ZeroOrNegativeOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); - } + return DAG.getConstant(EqVal, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + return DAG.getConstant(EqVal, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(UOF, VT); + return DAG.getConstant(EqVal, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond) + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2977,10 +3034,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 9a751c13b5..4a2b7ec1cf 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { // Adjust RegAssign if a register assignment is killed at VNI->def. We // want to avoid calculating the live range of the source register if // possible. - AssignI.find(VNI->def.getPrevSlot()); + AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; // If MI doesn't kill the assigned register, just leave it. @@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); assert(ParentVNI && "Parent not live at complement def"); @@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; if (!Dom.first || Dom.second == VNI->def) @@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); - VNI->setIsPHIDef(ParentVNI->isPHIDef()); + defValue(RegIdx, ParentVNI, ParentVNI->def); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 43a6ad8c97..a04ac3fbc1 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -28,15 +28,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Triple.h" using namespace llvm; -// SSPBufferSize - The lower bound for a buffer to be considered for stack -// smashing protection. -static cl::opt<unsigned> -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); - namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -46,7 +41,7 @@ namespace { Function *F; Module *M; - DominatorTree* DT; + DominatorTree *DT; /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. @@ -60,6 +55,11 @@ namespace { /// check fails. BasicBlock *CreateFailBB(); + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. bool RequiresStackProtector() const; @@ -70,8 +70,8 @@ namespace { } StackProtector(const TargetLowering *tli) : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); @@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) { DT = getAnalysisIfAvailable<DominatorTree>(); if (!RequiresStackProtector()) return false; - + return InsertStackProtectors(); } +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, true)) + return true; + + return false; +} + /// RequiresStackProtector - Check whether or not this function needs a stack /// protector based upon the stack protector level. The heuristic we use is to /// add a guard variable to functions that call alloca, and functions with @@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const { if (!F->hasFnAttr(Attribute::StackProtect)) return false; - const TargetData *TD = TLI->getTargetData(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) - // If an array has more than SSPBufferSize bytes of allocated space, - // then we emit stack protectors. - if (SSPBufferSize <= TD->getTypeAllocSize(AT)) - return true; + if (ContainsProtectableArray(AI->getAllocatedType())) + return true; } } @@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // + // PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 1e940b1d07..20da36e8fb 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { - bool ColorWithRegs; LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; @@ -82,7 +81,7 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { + MachineFunctionPass(ID), NextColor(-1) { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index c6fdc73824..5b06195046 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; assert(SrcVNI); - SrcVNI->setHasPHIKill(true); continue; } @@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex PHIIndex = LI->getInstructionIndex(PHI); VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); - DestVNI->setIsPHIDef(true); // Prior to PHI elimination, the live ranges of PHIs begin at their defining // instruction. After PHI elimination, PHI instructions are replaced by VNs @@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, LI->getVNInfoAllocator()); - CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, DestCopyIndex.getRegSlot(), CopyVNI)); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 54be88a8bb..ddee6b2401 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, } /// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData, +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { if (DefMI->mayLoad()) - return ItinData->Props.LoadLatency; + return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) - return ItinData->Props.HighLatency; + return SchedModel->HighLatency; return 1; } @@ -629,7 +629,7 @@ static int computeDefOperandLatency( if (FindMin) { // If MinLatency is valid, call getInstrLatency. This uses Stage latency if // it exists before defaulting to MinLatency. - if (ItinData->Props.MinLatency >= 0) + if (ItinData->SchedModel->MinLatency >= 0) return TII->getInstrLatency(ItinData, DefMI); // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. @@ -638,10 +638,10 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData, DefMI); + return TII->defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required -return -1; + return -1; } /// computeOperandLatency - Compute and return the latency of the given data @@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } @@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index babfacb782..75c9467a4f 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -109,8 +109,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given - // section(".eh_frame") gcc will produce - // .section .eh_frame,"a",@progbits + // section(".eh_frame") gcc will produce: + // + // .section .eh_frame,"a",@progbits if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 153f8711d8..aa601af21b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); -STATISTIC(NumReMats, "Number of instructions re-materialized"); -STATISTIC(NumDeletes, "Number of dead instructions deleted"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,16 +94,9 @@ namespace { unsigned Reg, MachineBasicBlock::iterator OldPos); - bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc); - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, unsigned &LastDef); - MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, - unsigned Dist); - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -117,14 +112,6 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist); - typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; - bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist); - bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI, MachineBasicBlock *MBB); @@ -150,6 +137,11 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part @@ -167,6 +159,8 @@ namespace { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->isTerminator()) + KillMI == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, } } } + assert(KillMO && "Didn't find kill"); // Update kill and LV information. KillMO->setIsKill(false); @@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MBB->remove(MI); MBB->insert(KillPos, MI); + if (LIS) + LIS->handleMove(MI); + ++Num3AddrSunk; return true; } -/// isTwoAddrUse - Return true if the specified MI is using the specified -/// register as a two-address operand. -static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const MCInstrDesc &MCID = UseMI->getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && - (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) - // Earlier use is a two-address one. - return true; - } - return false; -} - -/// isProfitableToReMat - Return true if the heuristics determines it is likely -/// to be profitable to re-materialize the definition of Reg rather than copy -/// the register. -bool -TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, - const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc) { - bool OtherUse = false; - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = UseMO.getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end() && DI->second == Loc) - continue; // Current use. - OtherUse = true; - // There is at least one other use in the MBB that will clobber the - // register. - if (isTwoAddrUse(UseMI, Reg)) - return true; - } - } - - // If other uses in MBB are not two-address uses, then don't remat. - if (OtherUse) - return false; - - // No other uses in the same block, remat if it's defined in the same - // block so it does not unnecessarily extend the live range. - return MBB == DefMI->getParent(); -} - /// NoUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, return !(LastUse > LastDef && LastUse < Dist); } -MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, - MachineBasicBlock *MBB, - unsigned Dist) { - unsigned LastUseDist = 0; - MachineInstr *LastUse = 0; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB || MI->isDebugValue()) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); - if (DI == DistanceMap.end()) - continue; - if (DI->second >= Dist) - continue; - - if (MO.isUse() && DI->second > LastUseDist) { - LastUse = DI->first; - LastUseDist = DI->second; - } - } - return LastUse; -} - /// isCopyToReg - Return true if the specified MI is a copy instruction or /// a extract_subreg instruction. It also returns the source and destination /// registers and whether they are physical registers by reference. @@ -483,32 +407,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } -/// findLocalKill - Look for an instruction below MI in the MBB that kills the -/// specified register. Returns null if there are any other Reg use between the -/// instructions. -static -MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, - MachineInstr *MI, MachineRegisterInfo *MRI, - DenseMap<MachineInstr*, unsigned> &DistanceMap) { - MachineInstr *KillMI = 0; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI == MI || UseMI->getParent() != MBB) - continue; - if (DistanceMap.count(UseMI)) - continue; - if (!UI.getOperand().isKill()) - return 0; - if (KillMI) - return 0; // -O0 kill markers cannot be trusted? - KillMI = UseMI; - } - - return KillMI; -} - /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -564,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, @@ -654,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); + if (Indexes) + Indexes->replaceMachineInstrInMaps(MI, NewMI); mbbi->insert(mi, NewMI); // Insert the new inst mbbi->erase(mi); // Nuke the old inst. @@ -702,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't // uses RegB, convertToThreeAddress must have created more @@ -811,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, return; } -/// isSafeToDelete - If the specified instruction does not produce any side -/// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, - const TargetInstrInfo *TII, - SmallVector<unsigned, 4> &Kills) { - if (MI->mayStore() || MI->isCall()) - return false; - if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - Kills.push_back(MO.getReg()); - } - return true; -} - -/// canUpdateDeletedKills - Check if all the registers listed in Kills are -/// killed by instructions in MBB preceding the current instruction at -/// position Dist. If so, return true and record information about the -/// preceding kills in NewKills. -bool TwoAddressInstructionPass:: -canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist) { - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) - return false; - - MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); - if (!LastKill) - return false; - - bool isModRef = LastKill->definesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), - LastKill)); - } - return true; -} - -/// DeleteUnusedInstr - If an instruction with a tied register operand can -/// be safely deleted, just delete it. -bool -TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned Dist) { - // Check if the instruction has no side effects and if all its defs are dead. - SmallVector<unsigned, 4> Kills; - if (!isSafeToDelete(mi, TII, Kills)) - return false; - - // If this instruction kills some virtual registers, we need to - // update the kill information. If it's not possible to do so, - // then bail out. - SmallVector<NewKill, 4> NewKills; - if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) - return false; - - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - return true; -} - /// RescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. @@ -905,14 +722,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -999,6 +821,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; + if (MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; + // Ensure that if this is register in question, its the kill we expect. + assert((MOReg != Reg || OtherMI == KillMI) && + "Found multiple kills of a register in a basic block"); } } } @@ -1012,20 +840,13 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MBB->splice(KillPos, MBB, From, To); DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(MI); + DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; } @@ -1061,14 +882,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -1094,6 +920,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, continue; if (isDefTooClose(MOReg, DI->second, MI, MBB)) return false; + if (MOReg == Reg && !MO.isKill()) + return false; Uses.insert(MOReg); if (MO.isKill() && MOReg != Reg) Kills.insert(MOReg); @@ -1135,6 +963,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; + if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; } else { OtherDefs.push_back(MOReg); } @@ -1165,19 +996,13 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(KillMI); + + DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; } @@ -1202,15 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. bool regBKilled = isKilled(MI, regB, MRI, TII); - if (!regBKilled && MI.getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { - ++NumDeletes; - return true; // Done with this instruction. - } if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); @@ -1294,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (NewOpc != 0) { const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { - MachineFunction &MF = *mbbi->getParent(); - // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF)); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, Reg, + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -1380,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->isInlineAsm() ? + MI->getNumOperands() : MCID.getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + SlotIndex CopyIdx; + if (Indexes) + CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - MRI = &MF.getRegInfo(); +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); + Indexes = getAnalysisIfAvailable<SlotIndexes>(); LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); OptLevel = TM.getOptLevel(); @@ -1396,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << MF->getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); - // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs(MRI->getNumVirtRegs()); - - typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > - TiedOperandMap; - TiedOperandMap TiedOperands(4); + TiedOperandMap TiedOperands; SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); @@ -1428,199 +1400,48 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const MCInstrDesc &MCID = mi->getDesc(); - bool FirstTied = true; - DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : MCID.getNumOperands(); - for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { - unsigned DstIdx = 0; - if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) - continue; - - if (FirstTied) { - FirstTied = false; - ++NumTwoAddressInstrs; - DEBUG(dbgs() << '\t' << *mi); - } - - assert(mi->getOperand(SrcIdx).isReg() && - mi->getOperand(SrcIdx).getReg() && - mi->getOperand(SrcIdx).isUse() && - "two address instruction invalid"); - - unsigned regB = mi->getOperand(SrcIdx).getReg(); - - // Deal with <undef> uses immediately - simply rewrite the src operand. - if (mi->getOperand(SrcIdx).isUndef()) { - unsigned DstReg = mi->getOperand(DstIdx).getReg(); - // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) - if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, - TRI, MF)) - MRI->constrainRegClass(DstReg, RC); - mi->getOperand(SrcIdx).setReg(DstReg); - DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi); - continue; - } - TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; } - // Now iterate over the information collected above. - for (TiedOperandMap::iterator OI = TiedOperands.begin(), - OE = TiedOperands.end(); OI != OE; ++OI) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; - - // If the instruction has a single pair of tied operands, try some - // transformations that may either eliminate the tied operands or - // improve the opportunities for coalescing away the register copy. - if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1) { + SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + = TiedOperands.begin()->second; + if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - - // If the registers are already equal, nothing needs to be done. - if (mi->getOperand(SrcIdx).getReg() == - mi->getOperand(DstIdx).getReg()) - break; // Done with this instruction. - - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) - break; // The tied operands have been eliminated. - } - - bool IsEarlyClobber = false; - bool RemovedKillFlag = false; - bool AllUsesCopied = true; - unsigned LastCopiedReg = 0; - unsigned regB = OI->first; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; - - const MachineOperand &DstMO = mi->getOperand(DstIdx); - unsigned regA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); - - // Grab regB from the instruction because it may have changed if the - // instruction was commuted. - regB = mi->getOperand(SrcIdx).getReg(); - - if (regA == regB) { - // The register is tied to multiple destinations (or else we would - // not have continued this far), but this use of the register - // already matches the tied destination. Leave it. - AllUsesCopied = false; + unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + if (SrcReg != DstReg && + TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) { + // The tied operands have been eliminated or shifted further down the + // block to ease elimination. Continue processing with 'nmi'. + TiedOperands.clear(); + mi = nmi; continue; } - LastCopiedReg = regA; - - assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); - -#ifndef NDEBUG - // First, verify that we don't have a use of "a" in the instruction - // (a = b + a for example) because our transformation will not - // work. This should never occur because we are in SSA form. - for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == DstIdx || - !mi->getOperand(i).isReg() || - mi->getOperand(i).getReg() != regA); -#endif - - // Emit a copy or rematerialize the definition. - bool isCopy = false; - const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getUniqueVRegDef(regB); - // If it's safe and profitable, remat the definition instead of - // copying it. - if (DefMI && - DefMI->isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, AA, regB) && - isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); - unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); - ++NumReMats; - } else { - BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), - regA).addReg(regB); - isCopy = true; - } - - // Update DistanceMap. - MachineBasicBlock::iterator prevMI = prior(mi); - DistanceMap.insert(std::make_pair(prevMI, Dist)); - DistanceMap[mi] = ++Dist; - - DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); - - MachineOperand &MO = mi->getOperand(SrcIdx); - assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && - "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - - // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(regA) && - TargetRegisterInfo::isVirtualRegister(regB)) - MRI->constrainRegClass(regA, MRI->getRegClass(regB)); - - MO.setReg(regA); - - if (isCopy) - // Propagate SrcRegMap. - SrcRegMap[regA] = regB; - } - - if (AllUsesCopied) { - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - } - } - } - - // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) - LV->addVirtualRegisterKilled(regB, prior(mi)); - - } else if (RemovedKillFlag) { - // Some tied uses of regB matched their destination registers, so - // regB is still used in this instruction, but a kill flag was - // removed from a different tied use of regB, so now we need to add - // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - MO.setIsKill(true); - break; - } - } } + } - // Schedule the source copy / remat inserted to form two-address - // instruction. FIXME: Does it matter the distance map may not be - // accurate after it's scheduled? - TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); - - MadeChange = true; - + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + processTiedPairs(mi, OI->second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } @@ -1645,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } - // Some remat'ed instructions are dead. - for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(i); - if (MRI->use_nodbg_empty(VReg)) { - MachineInstr *DefMI = MRI->getVRegDef(VReg); - DefMI->eraseFromParent(); - } - } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. MadeChange |= EliminateRegSequences(); |