diff options
Diffstat (limited to 'lib/CodeGen')
51 files changed, 1665 insertions, 271 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 447f3981b5..1f3f5a5f38 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -318,7 +318,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasZExtAttr() || CallerRetAttr.hasSExtAttr()) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -358,7 +358,7 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasZExtAttr() || CallerRetAttr.hasSExtAttr()) return false; // Check if the only use is a function return node. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 260871d33b..50f0fc30a0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -137,80 +137,113 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, report_fatal_error("Error parsing inline asm\n"); } +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; -/// EmitInlineAsm - This method formats and emits the specified machine -/// instruction that is an inline asm. -void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { - assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); - + const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - // Count the number of register definitions to find the asm string. - unsigned NumDefs = 0; - for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); - ++NumDefs) - assert(NumDefs != NumOperands-2 && "No asm string?"); - - assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; - // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. - const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; - // If this asmstr is empty, just print the #APP/#NOAPP markers. - // These are useful to see where empty asm's wound up. - if (AsmStr[0] == 0) { - // Don't emit the comments if writing to a .o file. - if (!OutStreamer.hasRawTextSupport()) return; + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); - return; - } + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; - // Emit the #APP start marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; - // Get the !srcloc metadata node if we have it, and decode the loc cookie from - // it. - unsigned LocCookie = 0; - const MDNode *LocMD = 0; - for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata() && - (LocMD = MI->getOperand(i-1).getMetadata()) && - LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } - } - } + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); - // Emit the inline asm to a temporary string so we can emit it through - // EmitInlineAsm. - SmallString<256> StringData; - raw_svector_ostream OS(StringData); + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; - OS << '\t'; + bool Error = false; - // The variant of the current asmprinter. - int AsmPrinterVariant = MAI->getAssemblerDialect(); - int InlineAsmVariant = MI->getInlineAsmDialect(); + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } - // Switch to the inline assembly variant. - if (AsmPrinterVariant != InlineAsmVariant) { - if (InlineAsmVariant == 0) - OS << ".att_syntax\n\t"; - else - OS << ".intel_syntax\n\t"; + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + int AsmPrinterVariant, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; while (*LastEmitted) { switch (*LastEmitted) { @@ -283,7 +316,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { " string: '" + Twine(AsmStr) + "'"); std::string Val(StrStart, StrEnd); - PrintSpecial(MI, OS, Val.c_str()); + AP->PrintSpecial(MI, OS, Val.c_str()); LastEmitted = StrEnd+1; break; } @@ -351,7 +384,6 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // FIXME: What if the operand isn't an MBB, report error? OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { - AsmPrinter *AP = const_cast<AsmPrinter*>(this); if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, @@ -373,15 +405,74 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } } - // Switch to the AsmPrinter variant. - if (AsmPrinterVariant != InlineAsmVariant) { - if (AsmPrinterVariant == 0) - OS << "\n\t.att_syntax"; - else - OS << "\n\t.intel_syntax"; + OS << '\n' << (char)0; // null terminate string. +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. + if (!OutStreamer.hasRawTextSupport()) return; + + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); + return; } - OS << '\n' << (char)0; // null terminate string. + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (InlineAsmVariant == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, + AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 963b8cdf34..92d1bbe4f7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -237,8 +237,8 @@ private: #endif }; - DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT - void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5e5d9c83d5..ee8a6f36e7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -330,6 +330,9 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, SPCU->addType(Arg, ATy); if (ATy.isArtificial()) SPCU->addFlag(Arg, dwarf::DW_AT_artificial); + if (ATy.isObjectPointer()) + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, dwarf::DW_FORM_ref4, + Arg); SPDie->addChild(Arg); } DIE *SPDeclDie = SPDie; @@ -496,21 +499,26 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { return NULL; SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) + TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); + if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + } // Collect lexical scope children first. const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) + TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); + if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + } const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) @@ -544,6 +552,10 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); + if (DS.isSubprogram() && ObjectPointer != NULL) + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, ObjectPointer); + if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index df4aedb540..5508674bc9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -159,11 +159,19 @@ public: bool isArtificial() const { if (Var.isArtificial()) return true; - if (Var.getTag() == dwarf::DW_TAG_arg_variable - && getType().isArtificial()) + if (getType().isArtificial()) return true; return false; } + + bool isObjectPointer() const { + if (Var.isObjectPointer()) + return true; + if (getType().isObjectPointer()) + return true; + return false; + } + bool variableHasComplexAddress() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 386509b702..fa6d4e16cf 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_library(LLVMCodeGen MachineCopyPropagation.cpp MachineCSE.cpp MachineDominators.cpp + MachinePostDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -102,6 +103,7 @@ add_llvm_library(LLVMCodeGen TargetInstrInfoImpl.cpp TargetLoweringObjectFileImpl.cpp TargetOptionsImpl.cpp + TargetSchedule.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 65f0941287..a53f6f8d0f 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCopyPropagationPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 622127cc74..37828a70b5 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); @@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index cac0c83bca..24daafaa62 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -172,7 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -191,7 +191,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -266,7 +266,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 3e9b485ca8..f4ebcd6fa4 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -69,21 +69,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } -/// killedInRange - Return true if the interval has kills in [Start,End). -bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { - Ranges::const_iterator r = - std::lower_bound(ranges.begin(), ranges.end(), End); - - // Now r points to the first interval with start >= End, or ranges.end(). - if (r == ranges.begin()) - return false; - - --r; - // Now r points to the last interval with end <= End. - // r->end is the kill point. - return r->end >= Start && r->end < End; -} - // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -716,27 +701,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } -void LiveInterval::Copy(const LiveInterval &RHS, - MachineRegisterInfo *MRI, - VNInfo::Allocator &VNInfoAllocator) { - ranges.clear(); - valnos.clear(); - std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg); - MRI->setRegAllocationHint(reg, Hint.first, Hint.second); - - weight = RHS.weight; - for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { - const VNInfo *VNI = RHS.getValNumInfo(i); - createValueCopy(VNI, VNInfoAllocator); - } - for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { - const LiveRange &LR = RHS.ranges[i]; - addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); - } - - verify(); -} - unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const_iterator I = begin(), E = end(); I != E; ++I) @@ -748,7 +712,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveRange::dump() const { dbgs() << *this << "\n"; } @@ -785,7 +749,7 @@ void LiveInterval::print(raw_ostream &OS) const { } } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveInterval::dump() const { dbgs() << *this << "\n"; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 17f9d9e982..dca305849e 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -156,7 +156,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { MF->print(OS, Indexes); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } @@ -731,6 +731,70 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, return CanSeparate; } +void LiveIntervals::extendToIndices(LiveInterval *LI, + ArrayRef<SlotIndex> Indices) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + LRCalc->extend(LI, Indices[i]); +} + +void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, + SmallVectorImpl<SlotIndex> *EndPoints) { + LiveRangeQuery LRQ(*LI, Kill); + VNInfo *VNI = LRQ.valueOut(); + if (!VNI) + return; + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); + SlotIndex MBBStart, MBBEnd; + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + + // If VNI isn't live out from KillMBB, the value is trivially pruned. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(Kill, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + return; + } + + // VNI is live out of KillMBB. + LI->removeRange(Kill, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + + // Find all blocks that are reachable from MBB without leaving VNI's live + // range. + for (df_iterator<MachineBasicBlock*> + I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) { + MachineBasicBlock *MBB = *I; + // KillMBB itself was already handled. + if (MBB == KillMBB) { + ++I; + continue; + } + + // Check if VNI is live in to MBB. + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveRangeQuery LRQ(*LI, MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI live range. Prune the search. + I.skipChildren(); + continue; + } + + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } + + // VNI is live through MBB. + LI->removeRange(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; + } +} //===----------------------------------------------------------------------===// // Register allocator hooks. @@ -1196,14 +1260,35 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(Reg), - UE = MRI.use_nodbg_end(); - UI != UE; UI.skipInstruction()) { - const MachineInstr* MI = &*UI; - SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); - if (InstSlot > LastUse && InstSlot < OldIdx) - LastUse = InstSlot; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; + } + } else { + MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); + MachineBasicBlock::iterator MII(MI); + ++MII; + MachineBasicBlock* MBB = MI->getParent(); + for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){ + for (MachineInstr::mop_iterator MOI = MII->operands_begin(), + MOE = MII->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& mop = *MOI; + if (!mop.isReg() || mop.getReg() == 0 || + TargetRegisterInfo::isVirtualRegister(mop.getReg())) + continue; + + if (TRI.hasRegUnit(mop.getReg(), Reg)) + LastUse = LIS.getInstructionIndex(MII); + } + } } return LastUse; } diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index cd4e690c37..4d41fca85a 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -178,8 +178,8 @@ public: bool checkLoopInterference(MachineLoopRange*); private: - Query(const Query&); // DO NOT IMPLEMENT - void operator=(const Query&); // DO NOT IMPLEMENT + Query(const Query&) LLVM_DELETED_FUNCTION; + void operator=(const Query&) LLVM_DELETED_FUNCTION; }; // Array of LiveIntervalUnions. diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h index b3e2d7f4b4..8f22c24478 100644 --- a/lib/CodeGen/LiveRegMatrix.h +++ b/lib/CodeGen/LiveRegMatrix.h @@ -15,7 +15,7 @@ // Register units are defined in MCRegisterInfo.h, they represent the smallest // unit of interference when dealing with overlapping physical registers. The // LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When -// a virtual register is assigned to a physicval register, the live range for +// a virtual register is assigned to a physical register, the live range for // the virtual register is inserted into the LiveIntervalUnion for each regunit // in the physreg. // diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 939e795b4a..f0b522bd7d 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,7 +25,10 @@ using namespace llvm; char LiveStacks::ID = 0; -INITIALIZE_PASS(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveStacks, "livestacks", "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index f294124228..7359bb92a1 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -65,7 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 0d44fd9938..22a64e5509 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -229,7 +229,7 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { return 0; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineBasicBlock::dump() const { print(dbgs()); } @@ -972,6 +972,80 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { return Weights.begin() + index; } +/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed +/// as of just before "MI". +/// +/// Search is localised to a neighborhood of +/// Neighborhood instructions before (searching for defs or kills) and N +/// instructions after (searching just for defs) MI. +MachineBasicBlock::LivenessQueryResult +MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, + unsigned Reg, MachineInstr *MI, + unsigned Neighborhood) { + + unsigned N = Neighborhood; + MachineBasicBlock *MBB = MI->getParent(); + + // Start by searching backwards from MI, looking for kills, reads or defs. + + MachineBasicBlock::iterator I(MI); + // If this is the first insn in the block, don't search backwards. + if (I != MBB->begin()) { + do { + --I; + + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.Kills) + // Register killed, so isn't live. + return LQR_Dead; + + else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap) + // Defined or read without a previous kill - live. + return (Analysis.Defines || Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + } while (I != MBB->begin() && --N > 0); + } + + // Did we get to the start of the block? + if (I == MBB->begin()) { + // If so, the register's state is definitely defined by the live-in state. + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); + RAI.isValid(); ++RAI) { + if (MBB->isLiveIn(*RAI)) + return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; + } + + return LQR_Dead; + } + + N = Neighborhood; + + // Try searching forwards from MI, looking for reads or defs. + I = MachineBasicBlock::iterator(MI); + // If this is the last insn in the block, don't search forwards. + if (I != MBB->end()) { + for (++I; I != MBB->end() && N > 0; ++I, --N) { + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.ReadsOverlap) + // Used, therefore must have been live. + return (Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + else if (Analysis.DefinesOverlap) + // Defined (but not read) therefore cannot have been live. + return LQR_Dead; + } + } + + // At this point we have no idea of the liveness of the register. + return LQR_Unknown; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index c4dca2cd15..9a8cc48172 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain( assert(BB); assert(BlockToChain[BB] == &Chain); assert(*llvm::prior(Chain.end()) == BB); - MachineBasicBlock *BestSucc = 0; // Look for the best viable successor if there is one to place immediately // after this block. - BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index c282332037..304e39e159 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -60,8 +60,8 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( - Fn->getAttributes().getFnAttributes())); + FrameInfo->ensureMaxAlignment(Fn->getAttributes(). + getFnAttributes().getStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. @@ -284,7 +284,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFunction::dump() const { print(dbgs()); } @@ -534,7 +534,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } @@ -633,7 +633,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineJumpTableInfo::dump() const { print(dbgs()); } #endif @@ -768,6 +768,6 @@ void MachineConstantPool::print(raw_ostream &OS) const { } } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineConstantPool::dump() const { print(dbgs()); } #endif diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 0508b9f612..b71b37ac6f 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -198,7 +198,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return !strcmp(getSymbolName(), Other.getSymbolName()) && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: - return getBlockAddress() == Other.getBlockAddress(); + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); case MO_RegisterMask: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: @@ -239,7 +240,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { MO.getOffset()); case MachineOperand::MO_BlockAddress: return hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getBlockAddress()); + MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: @@ -362,6 +363,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_BlockAddress: OS << '<'; WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; case MachineOperand::MO_RegisterMask: @@ -1496,7 +1498,7 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) { } void MachineInstr::dump() const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " " << *this; #endif } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index b7de7bfb49..1f7fbfc719 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -109,10 +109,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); - SmallVector<unsigned, 8> LocalDefs; - SmallSet<unsigned, 8> LocalDefSet; + SmallVector<unsigned, 32> LocalDefs; + SmallSet<unsigned, 32> LocalDefSet; SmallSet<unsigned, 8> DeadDefSet; - SmallSet<unsigned, 8> KilledDefSet; + SmallSet<unsigned, 16> KilledDefSet; SmallVector<unsigned, 8> ExternUses; SmallSet<unsigned, 8> ExternUseSet; SmallSet<unsigned, 8> KilledUseSet; @@ -181,7 +181,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, Defs.clear(); } - SmallSet<unsigned, 8> Added; + SmallSet<unsigned, 32> Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; if (Added.insert(Reg)) { @@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) { // MachineOperand iterator //===----------------------------------------------------------------------===// -MachineOperandIteratorBase::RegInfo +MachineOperandIteratorBase::VirtRegInfo MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { - RegInfo RI = { false, false, false }; + VirtRegInfo RI = { false, false, false }; for(; isValid(); ++*this) { MachineOperand &MO = deref(); if (!MO.isReg() || MO.getReg() != Reg) @@ -276,3 +276,53 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, } return RI; } + +MachineOperandIteratorBase::PhysRegInfo +MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, + const TargetRegisterInfo *TRI) { + bool AllDefsDead = true; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; + + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "analyzePhysReg not given a physical register!"); + for (; isValid(); ++*this) { + MachineOperand &MO = deref(); + + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + PRI.Clobbers = true; // Regmask clobbers Reg. + + if (!MO.isReg()) + continue; + + unsigned MOReg = MO.getReg(); + if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + continue; + + bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); + bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); + + if (IsRegOrSuperReg && MO.readsReg()) { + // Reg or a super-reg is read, and perhaps killed also. + PRI.Reads = true; + PRI.Kills = MO.isKill(); + } if (IsRegOrOverlapping && MO.readsReg()) { + PRI.ReadsOverlap = true;// Reg or an overlapping register is read. + } + + if (!MO.isDef()) + continue; + + if (IsRegOrSuperReg) { + PRI.Defines = true; // Reg or a super-register is defined. + if (!MO.isDead()) + AllDefsDead = false; + } + if (IsRegOrOverlapping) + PRI.Clobbers = true; // Reg or an overlapping reg is defined. + } + + if (AllDefsDead && PRI.Defines) + PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + + return PRI; +} diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 05d2f2a885..27afeec1d9 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -74,7 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineLoop::dump() const { print(dbgs()); } diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp new file mode 100644 index 0000000000..c3f6e9249e --- /dev/null +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -0,0 +1,55 @@ +//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// post dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePostDominators.h" + +using namespace llvm; + +char MachinePostDominatorTree::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { + initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate + // postdominator +} + +FunctionPass * +MachinePostDominatorTree::createMachinePostDominatorTreePass() { + return new MachinePostDominatorTree(); +} + +bool +MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +void +MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void +MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { + DT->print(OS); +} diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index e36c7d4315..d7ecec4163 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -279,7 +279,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { // unimplemented } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) @@ -484,6 +484,8 @@ void ScheduleDAGMI::releaseRoots() { void ScheduleDAGMI::schedule() { buildDAGWithRegPressure(); + postprocessDAG(); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -522,6 +524,13 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { initRegPressure(); } +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues() { // Initialize the strategy before modifying the DAG. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 6090752081..32c02bf0f0 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -240,7 +240,7 @@ void SchedulePostRATDList::exitRegion() { ScheduleDAGInstrs::exitRegion(); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dumpSchedule - dump the scheduled Sequence. void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index c021a937b6..06f69c1e0d 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -330,9 +330,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); + AU.addRequired<CalculateSpillWeights>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -508,7 +508,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. -/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param IsHint True when PhysReg is VirtReg's preferred register. /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index d018835456..dd0f548867 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); static cl::opt<bool> EnableJoining("join-liveintervals", @@ -66,6 +68,11 @@ VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); +// Temporary option for testing new coalescer algo. +static cl::opt<bool> +NewCoalescer("new-coalescer", cl::Hidden, + cl::desc("Use new coalescer algorithm")); + namespace { class RegisterCoalescer : public MachineFunctionPass, private LiveRangeEdit::Delegate { @@ -123,6 +130,9 @@ namespace { /// can use this information below to update aliases. bool joinIntervals(CoalescerPair &CP); + /// Attempt joining two virtual registers. Return true on success. + bool joinVirtRegs(CoalescerPair &CP); + /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); @@ -583,7 +593,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) + if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -1102,6 +1112,728 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { return true; } +//===----------------------------------------------------------------------===// +// Interference checking and interval joining +//===----------------------------------------------------------------------===// +// +// In the easiest case, the two live ranges being joined are disjoint, and +// there is no interference to consider. It is quite common, though, to have +// overlapping live ranges, and we need to check if the interference can be +// resolved. +// +// The live range of a single SSA value forms a sub-tree of the dominator tree. +// This means that two SSA values overlap if and only if the def of one value +// is contained in the live range of the other value. As a special case, the +// overlapping values can be defined at the same index. +// +// The interference from an overlapping def can be resolved in these cases: +// +// 1. Coalescable copies. The value is defined by a copy that would become an +// identity copy after joining SrcReg and DstReg. The copy instruction will +// be removed, and the value will be merged with the source value. +// +// There can be several copies back and forth, causing many values to be +// merged into one. We compute a list of ultimate values in the joined live +// range as well as a mappings from the old value numbers. +// +// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI +// predecessors have a live out value. It doesn't cause real interference, +// and can be merged into the value it overlaps. Like a coalescable copy, it +// can be erased after joining. +// +// 3. Copy of external value. The overlapping def may be a copy of a value that +// is already in the other register. This is like a coalescable copy, but +// the live range of the source register must be trimmed after erasing the +// copy instruction: +// +// %src = COPY %ext +// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. +// +// 4. Clobbering undefined lanes. Vector registers are sometimes built by +// defining one lane at a time: +// +// %dst:ssub0<def,read-undef> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// The live range of %src overlaps the %dst value defined by FOO, but +// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane +// which was undef anyway. +// +// The value mapping is more complicated in this case. The final live range +// will have different value numbers for both FOO and BAR, but there is no +// simple mapping from old to new values. It may even be necessary to add +// new PHI values. +// +// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that +// is live, but never read. This can happen because we don't compute +// individual live ranges per lane. +// +// %dst<def> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// This kind of interference is only resolved locally. If the clobbered +// lane value escapes the block, the join is aborted. + +namespace { +/// Track information about values in a single virtual register about to be +/// joined. Objects of this class are always created in pairs - one for each +/// side of the CoalescerPair. +class JoinVals { + LiveInterval &LI; + + // Location of this register in the final joined register. + // Either CP.DstIdx or CP.SrcIdx. + unsigned SubIdx; + + // Values that will be present in the final live range. + SmallVectorImpl<VNInfo*> &NewVNInfo; + + const CoalescerPair &CP; + LiveIntervals *LIS; + SlotIndexes *Indexes; + const TargetRegisterInfo *TRI; + + // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. + // This is suitable for passing to LiveInterval::join(). + SmallVector<int, 8> Assignments; + + // Conflict resolution for overlapping values. + enum ConflictResolution { + // No overlap, simply keep this value. + CR_Keep, + + // Merge this value into OtherVNI and erase the defining instruction. + // Used for IMPLICIT_DEF, coalescable copies, and copies from external + // values. + CR_Erase, + + // Merge this value into OtherVNI but keep the defining instruction. + // This is for the special case where OtherVNI is defined by the same + // instruction. + CR_Merge, + + // Keep this value, and have it replace OtherVNI where possible. This + // complicates value mapping since OtherVNI maps to two different values + // before and after this def. + // Used when clobbering undefined or dead lanes. + CR_Replace, + + // Unresolved conflict. Visit later when all values have been mapped. + CR_Unresolved, + + // Unresolvable conflict. Abort the join. + CR_Impossible + }; + + // Per-value info for LI. The lane bit masks are all relative to the final + // joined register, so they can be compared directly between SrcReg and + // DstReg. + struct Val { + ConflictResolution Resolution; + + // Lanes written by this def, 0 for unanalyzed values. + unsigned WriteLanes; + + // Lanes with defined values in this register. Other lanes are undef and + // safe to clobber. + unsigned ValidLanes; + + // Value in LI being redefined by this def. + VNInfo *RedefVNI; + + // Value in the other live range that overlaps this def, if any. + VNInfo *OtherVNI; + + // True when the live range of this value will be pruned because of an + // overlapping CR_Replace value in the other live range. + bool Pruned; + + // True once Pruned above has been computed. + bool PrunedComputed; + + Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {} + + bool isAnalyzed() const { return WriteLanes != 0; } + }; + + // One entry per value number in LI. + SmallVector<Val, 8> Vals; + + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); + VNInfo *stripCopies(VNInfo *VNI); + ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + void computeAssignment(unsigned ValNo, JoinVals &Other); + bool taintExtent(unsigned, unsigned, JoinVals&, + SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); + bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + bool isPrunedValue(unsigned ValNo, JoinVals &Other); + +public: + JoinVals(LiveInterval &li, unsigned subIdx, + SmallVectorImpl<VNInfo*> &newVNInfo, + const CoalescerPair &cp, + LiveIntervals *lis, + const TargetRegisterInfo *tri) + : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), + Indexes(LIS->getSlotIndexes()), TRI(tri), + Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + {} + + /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Returns false if any conflicts were impossible to resolve. + bool mapValues(JoinVals &Other); + + /// Try to resolve conflicts that require all values to be mapped. + /// Returns false if any conflicts were impossible to resolve. + bool resolveConflicts(JoinVals &Other); + + /// Prune the live range of values in Other.LI where they would conflict with + /// CR_Replace values in LI. Collect end points for restoring the live range + /// after joining. + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); + + /// Erase any machine instructions that have been coalesced away. + /// Add erased instructions to ErasedInstrs. + /// Add foreign virtual registers to ShrinkRegs if their live range ended at + /// the erased instrs. + void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs); + + /// Get the value assignments suitable for passing to LiveInterval::join. + const int *getAssignments() const { return &Assignments[0]; } +}; +} // end anonymous namespace + +/// Compute the bitmask of lanes actually written by DefMI. +/// Set Redef if there are any partial register definitions that depend on the +/// previous value of the register. +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { + unsigned L = 0; + for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + continue; + L |= TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg())); + if (MO->readsReg()) + Redef = true; + } + return L; +} + +/// Find the ultimate value that VNI was copied from. +VNInfo *JoinVals::stripCopies(VNInfo *VNI) { + while (!VNI->isPHIDef()) { + MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + assert(MI && "No defining instruction"); + if (!MI->isFullCopy()) + break; + unsigned Reg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + break; + LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + if (!LRQ.valueIn()) + break; + VNI = LRQ.valueIn(); + } + return VNI; +} + +/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. +/// Return a conflict resolution when possible, but leave the hard cases as +/// CR_Unresolved. +/// Recursively calls computeAssignment() on this and Other, guaranteeing that +/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. +/// The recursion always goes upwards in the dominator tree, making loops +/// impossible. +JoinVals::ConflictResolution +JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + assert(!V.isAnalyzed() && "Value has already been analyzed!"); + VNInfo *VNI = LI.getValNumInfo(ValNo); + if (VNI->isUnused()) { + V.WriteLanes = ~0u; + return CR_Keep; + } + + // Get the instruction defining this value, compute the lanes written. + const MachineInstr *DefMI = 0; + if (VNI->isPHIDef()) { + // Conservatively assume that all lanes in a PHI are valid. + V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + } else { + DefMI = Indexes->getInstructionFromIndex(VNI->def); + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + assert(V.RedefVNI && "Instruction is reading nonexistent value"); + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } + + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) + V.ValidLanes &= ~V.WriteLanes; + } + + // Find the value in Other that overlaps VNI->def, if any. + LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + + // It is possible that both values are defined by the same instruction, or + // the values are PHIs defined in the same block. When that happens, the two + // values should be merged into one, but not into any preceding value. + // The first value defined or visited gets CR_Keep, the other gets CR_Merge. + if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { + assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); + + // One value stays, the other is merged. Keep the earlier one, or the first + // one we see. + if (OtherVNI->def < VNI->def) + Other.computeAssignment(OtherVNI->id, *this); + else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { + // This is an early-clobber def overlapping a live-in value in the other + // register. Not mergeable. + V.OtherVNI = OtherLRQ.valueIn(); + return CR_Impossible; + } + V.OtherVNI = OtherVNI; + Val &OtherV = Other.Vals[OtherVNI->id]; + // Keep this value, check for conflicts when analyzing OtherVNI. + if (!OtherV.isAnalyzed()) + return CR_Keep; + // Both sides have been analyzed now. + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Merge; + if (V.ValidLanes & OtherV.ValidLanes) + // Overlapping lanes can't be resolved. + return CR_Impossible; + else + return CR_Merge; + } + + // No simultaneous def. Is Other live at the def? + V.OtherVNI = OtherLRQ.valueIn(); + if (!V.OtherVNI) + // No overlap, no conflict. + return CR_Keep; + + assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); + + // We have overlapping values, or possibly a kill of Other. + // Recursively compute assignments up the dominator tree. + Other.computeAssignment(V.OtherVNI->id, *this); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Replace; + + // Check for simple erasable conflicts. + if (DefMI->isImplicitDef()) + return CR_Erase; + + // Include the non-conflict where DefMI is a coalescable copy that kills + // OtherVNI. We still want the copy erased and value numbers merged. + if (CP.isCoalescable(DefMI)) { + // Some of the lanes copied from OtherVNI may be undef, making them undef + // here too. + V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; + return CR_Erase; + } + + // This may not be a real conflict if DefMI simply kills Other and defines + // VNI. + if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) + return CR_Keep; + + // Handle the case where VNI and OtherVNI can be proven to be identical: + // + // %other = COPY %ext + // %this = COPY %ext <-- Erase this copy + // + if (DefMI->isFullCopy() && !CP.isPartial() && + stripCopies(VNI) == stripCopies(V.OtherVNI)) + return CR_Erase; + + // If the lanes written by this instruction were all undef in OtherVNI, it is + // still safe to join the live ranges. This can't be done with a simple value + // mapping, though - OtherVNI will map to multiple values: + // + // 1 %dst:ssub0 = FOO <-- OtherVNI + // 2 %src = BAR <-- VNI + // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. + // 4 BAZ %dst<kill> + // 5 QUUX %src<kill> + // + // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace + // handles this complex value mapping. + if ((V.WriteLanes & OtherV.ValidLanes) == 0) + return CR_Replace; + + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.LI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; +} + +/// Compute the value assignment for ValNo in LI. +/// This may be called recursively by analyzeValue(), but never for a ValNo on +/// the stack. +void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.isAnalyzed()) { + // Recursion should always move up the dominator tree, so ValNo is not + // supposed to reappear before it has been assigned. + assert(Assignments[ValNo] != -1 && "Bad recursion?"); + return; + } + switch ((V.Resolution = analyzeValue(ValNo, Other))) { + case CR_Erase: + case CR_Merge: + // Merge this ValNo into OtherVNI. + assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); + assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); + Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; + DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' + << LI.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); + break; + case CR_Replace: + case CR_Unresolved: + // The other value is going to be pruned if this join is successful. + assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); + Other.Vals[V.OtherVNI->id].Pruned = true; + // Fall through. + default: + // This value number needs to go in the final joined live range. + Assignments[ValNo] = NewVNInfo.size(); + NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + break; + } +} + +bool JoinVals::mapValues(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + computeAssignment(i, Other); + if (Vals[i].Resolution == CR_Impossible) { + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + return false; + } + } + return true; +} + +/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// the extent of the tainted lanes in the block. +/// +/// Multiple values in Other.LI can be affected since partial redefinitions can +/// preserve previously tainted lanes. +/// +/// 1 %dst = VLOAD <-- Define all lanes in %dst +/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 +/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 +/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read +/// +/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) +/// entry to TaintedVals. +/// +/// Returns false if the tainted lanes extend beyond the basic block. +bool JoinVals:: +taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { + VNInfo *VNI = LI.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LI from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LI.find(VNI->def); + assert(OtherI != Other.LI.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} + +/// Return true if MI uses any of the given Lanes from Reg. +/// This does not include partial redefinitions of Reg. +bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) { + if (MI->isDebugValue()) + return false; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + continue; + if (!MO->readsReg()) + continue; + if (Lanes & + TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg()))) + return true; + } + return false; +} + +bool JoinVals::resolveConflicts(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) + continue; + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LI.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; + + assert(!TaintExtent.empty() && "There should be at least one conflict."); + + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; + } + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); + return false; + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; + } + + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; + } + return true; +} + +// Determine if ValNo is a copy of a value number in LI or Other.LI that will +// be pruned: +// +// %dst = COPY %src +// %src = COPY %dst <-- This value to be pruned. +// %dst = COPY %src <-- This value is a copy of a pruned value. +// +bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.Pruned || V.PrunedComputed) + return V.Pruned; + + if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) + return V.Pruned; + + // Follow copies up the dominator tree and check if any intermediate value + // has been pruned. + V.PrunedComputed = true; + V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); + return V.Pruned; +} + +void JoinVals::pruneValues(JoinVals &Other, + SmallVectorImpl<SlotIndex> &EndPoints) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + break; + case CR_Replace: + // This value takes precedence over the value in Other.LI. + LIS->pruneValue(&Other.LI, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def + << ": " << Other.LI << '\n'); + break; + case CR_Erase: + case CR_Merge: + if (isPrunedValue(i, Other)) { + // This value is ultimately a copy of a pruned value in LI or Other.LI. + // We can no longer trust the value mapping computed by + // computeAssignment(), the value that was originally copied could have + // been replaced. + LIS->pruneValue(&LI, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " + << Def << ": " << LI << '\n'); + } + break; + case CR_Unresolved: + case CR_Impossible: + llvm_unreachable("Unresolved conflicts"); + } + } +} + +void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + if (Vals[i].Resolution != CR_Erase) + continue; + SlotIndex Def = LI.getValNumInfo(i)->def; + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } +} + +bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); + JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); + JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + << '\n'); + + // First compute NewVNInfo and the simple value mappings. + // Detect impossible conflicts early. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + return false; + + // Some conflicts can only be resolved after all values have been mapped. + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + return false; + + // All clear, the live ranges can be merged. + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints); + RHSVals.pruneValues(LHSVals, EndPoints); + + // Erase COPY and IMPLICIT_DEF instructions. This may cause some external + // registers to require trimming. + SmallVector<unsigned, 8> ShrinkRegs; + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + while (!ShrinkRegs.empty()) + LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + + // Join RHS into LHS. + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, + MRI); + + // Kill flags are going to be wrong if the live ranges were overlapping. + // Eventually, we should simply clear all kill flags when computing live + // ranges. They are reinserted after register allocation. + MRI->clearKillFlags(LHS.reg); + MRI->clearKillFlags(RHS.reg); + + if (EndPoints.empty()) + return true; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices(&LHS, EndPoints); + return true; +} + /// ComputeUltimateVN - Assuming we are going to join two live intervals, /// compute what the resultant value numbers for each value in the input two /// ranges will be. This is complicated by copies between the two which can @@ -1229,6 +1961,9 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { if (CP.isPhys()) return joinReservedPhysReg(CP); + if (NewCoalescer) + return joinVirtRegs(CP); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS << '\n'); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 6cdfe7cd72..b267aea816 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -63,7 +63,7 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC, decreaseSetPressure(MaxSetPressure, RC, TRI); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void RegisterPressure::dump(const TargetRegisterInfo *TRI) { dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index af8cd8f347..9a65071001 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -279,7 +279,7 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 2d8f235c66..a1a4efd108 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -52,6 +52,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); + + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -274,15 +277,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - unsigned Latency = - TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, - (UseOp < 0 ? 0 : UseMI), UseOp); - dep.setLatency(Latency); - unsigned MinLatency = - TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, - (UseOp < 0 ? 0 : UseMI), UseOp, - /*FindMin=*/true); - dep.setMinLatency(MinLatency); + MachineInstr *RegUse = UseOp < 0 ? 0 : UseMI; + dep.setLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); ST.adjustSchedDependency(SU, UseSU, dep); } @@ -477,13 +478,10 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. int DefOp = Def->findRegisterDefOperandIdx(Reg); - unsigned Latency = - TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx); - dep.setLatency(Latency); - unsigned MinLatency = - TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx, - /*FindMin=*/true); - dep.setMinLatency(MinLatency); + dep.setLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -1012,7 +1010,7 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); #endif } diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 5ca22b23fe..2cd84d670a 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -89,7 +89,7 @@ void ScoreboardHazardRecognizer::Reset() { ReservedScoreboard.reset(); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 32c97c59b3..0107ded953 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1644,7 +1644,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, N1.getOperand(0)); } @@ -2346,16 +2347,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. - if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeTypes) { + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - // If both incoming values are integers, and the original types are the same. + DebugLoc DL = N->getDebugLoc(); + // If both incoming values are integers, and the original types are the + // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); - SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); AddToWorkList(Op.getNode()); return BC; } @@ -4057,7 +4061,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && + TLI.getBooleanContents(false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -5441,7 +5446,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && - N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -5675,12 +5681,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -7710,9 +7716,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because - // we may introduce new vector instructions which are not backed by TD patterns. - // For example on AVX, extracting elements from a wide vector without using - // extract_subvector. + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE && ConstEltNo && !LegalOperations) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); @@ -8096,7 +8102,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - // Check that we were able to transform all incoming values to the same type. + // Check that we were able to transform all incoming values to the same + // type. if (VecIn2.getValueType() != VecIn1.getValueType() || VecIn1.getValueType() != VT) return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 6d2cdeabd1..5e631c2d8e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -314,8 +314,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); - assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && - "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -412,6 +410,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, ES->getTargetFlags())); } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), + BA->getOffset(), BA->getTargetFlags())); } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7b341700b8..8b291e9a3c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -869,25 +869,24 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, RVal, RChain); - } - } - break; + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + } + } + break; case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { - RVal = Res; - RChain = Res.getValue(1); - } - break; + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; } case TargetLowering::Promote: { // Only promote a load of vector type to another. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e8e968aaef..1cccf1a057 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2253,32 +2253,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); - EVT PtrVT = TLI.getPointerTy(); - Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); - SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); SplitInteger(MUL, Lo, Hi); // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETEQ); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, DAG.getConstant(1, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); - SDValue Overflow; - Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 06f6bd63b6..7b8f138a34 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -94,14 +94,44 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); - Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(1)); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getIntPtrConstant(i))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; if (TLI.isBigEndian()) std::swap(Lo, Hi); diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index f88b26d5c4..d2269f8acc 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -28,8 +28,8 @@ class SDNode; class SDNodeOrdering { DenseMap<const SDNode*, unsigned> OrderMap; - void operator=(const SDNodeOrdering&); // Do not implement. - SDNodeOrdering(const SDNodeOrdering&); // Do not implement. + void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; + SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; public: SDNodeOrdering() {} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 2b86e36991..68be9ecc03 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1758,7 +1758,7 @@ public: return V; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1897,7 +1897,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 222dc559a2..660223a505 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -643,7 +643,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -663,7 +663,7 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { #endif } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d85d41bd86..bd33479b94 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -494,8 +494,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::TargetBlockAddress: case ISD::BlockAddress: { - ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress()); - ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags()); + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); break; } } // end switch (N->getOpcode()) @@ -1470,6 +1472,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1477,12 +1480,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddPointer(BA); + ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -2454,9 +2459,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(Val.bitsToFloat(), VT); + return getConstantFP(APFloat(Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(Val.bitsToDouble(), VT); + return getConstantFP(APFloat(Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bf338990ce..a870ee2ac8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -482,11 +482,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << *M->getMemOperand() << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); OS << "<"; WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); OS << ", "; WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 0337492049..213b2b6c80 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1996,7 +1996,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, return Res; } -/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +/// CheckSame - Implements OP_CheckSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index dcaa9ba923..56f3a45c9a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -996,9 +996,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (attr & Attribute::SExt) + if (attr.hasSExtAttr()) ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) + else if (attr.hasZExtAttr()) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to @@ -1016,18 +1016,17 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) + if (attr.hasInRegAttr()) Flags.setInReg(); // Propagate extension type if any - if (attr & Attribute::SExt) + if (attr.hasSExtAttr()) Flags.setSExt(); - else if (attr & Attribute::ZExt) + else if (attr.hasZExtAttr()) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) { + for (unsigned i = 0; i < NumParts; ++i) Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - } } } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c98efb480c..95faafab45 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -143,7 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { @@ -170,7 +170,7 @@ void SlotIndex::print(raw_ostream &os) const { os << "invalid"; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // Dump a SlotIndex to stderr. void SlotIndex::dump() const { print(dbgs()); diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 96151b6363..dca15ee758 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -356,7 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit->anyRematerializable(0); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 6df932c1ae..54d8c8cde7 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -59,12 +59,23 @@ using namespace llvm; static cl::opt<bool> DisableColoring("no-stack-coloring", - cl::init(true), cl::Hidden, - cl::desc("Suppress stack coloring")); + cl::init(false), cl::Hidden, + cl::desc("Disable stack coloring")); -STATISTIC(NumMarkerSeen, "Number of life markers found."); +/// The user may write code that uses allocas outside of the declared lifetime +/// zone. This can happen when the user returns a reference to a local +/// data-structure. We can detect these cases and decide not to optimize the +/// code. If this flag is enabled, we try to save the user. +static cl::opt<bool> +ProtectFromEscapedAllocas("protect-from-escaped-allocas", + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that are broken")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); +STATISTIC(EscapedAllocas, + "Number of allocas that escaped the lifetime region"); //===----------------------------------------------------------------------===// // StackColoring Pass @@ -104,7 +115,7 @@ class StackColoring : public MachineFunctionPass { /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. - SlotIndexes* Indexes; + SlotIndexes *Indexes; /// The list of lifetime markers found. These markers are to be removed /// once the coloring is done. @@ -158,6 +169,14 @@ private: /// slots to use the joint slots. void remapInstructions(DenseMap<int, int> &SlotRemap); + /// The input program may contain intructions which are not inside lifetime + /// markers. This can happen due to a bug in the compiler or due to a bug in + /// user code (for example, returning a reference to a local variable). + /// This procedure checks all of the instructions in the function and + /// invalidates lifetime ranges which do not contain all of the instructions + /// which access that frame slot. + void removeInvalidSlotRanges(); + /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); @@ -243,8 +262,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { const Value *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { - DEBUG(dbgs()<<"Found lifetime marker for allocation: "<< - Allocation->getName()<<"\n"); + DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< + " with allocation: "<< Allocation->getName()<<"\n"); } if (IsStart) { @@ -521,11 +540,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // In a debug build, check that the instruction that we are modifying is // inside the expected live range. If the instruction is not inside // the calculated range then it means that the alloca usage moved - // outside of the lifetime markers. + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. #ifndef NDEBUG - if (!I->isDebugValue()) { + bool TouchesMemory = I->mayLoad() || I->mayStore(); + // If we *don't* protect the user from escaped allocas, don't bother + // validating the instructions. + if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { SlotIndex Index = Indexes->getInstructionIndex(I); - LiveInterval* Interval = Intervals[FromSlot]; + LiveInterval *Interval = Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); } @@ -543,6 +568,53 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); } +void StackColoring::removeInvalidSlotRanges() { + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + continue; + + // Some intervals are suspicious! In some cases we find address + // calculations outside of the lifetime zone, but not actual memory + // read or write. Memory accesses outside of the lifetime zone are a clear + // violation, but address calculations are okay. This can happen when + // GEPs are hoisted outside of the lifetime zone. + // So, in here we only check instructions which can read or write memory. + if (!I->mayLoad() && !I->mayStore()) + continue; + + // Check all of the machine operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + + int Slot = MO.getIndex(); + + if (Slot<0) + continue; + + if (Intervals[Slot]->empty()) + continue; + + // Check that the used slot is inside the calculated lifetime range. + // If it is not, warn about it and invalidate the range. + LiveInterval *Interval = Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(I); + if (Interval->find(Index) == Interval->end()) { + Intervals[Slot]->clear(); + DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); + EscapedAllocas++; + } + } + } +} + void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots) { // Expunge slot remap map. @@ -598,7 +670,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); // Don't continue because there are not enough lifetime markers, or the - // stack or too small, or we are told not to optimize the slots. + // stack is too small, or we are told not to optimize the slots. if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); @@ -617,6 +689,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Propagate the liveness information. calculateLiveIntervals(NumSlots); + // Search for allocas which are used outside of the declared lifetime + // markers. + if (ProtectFromEscapedAllocas) + removeInvalidSlotRanges(); + // Maps old slots to new slots. DenseMap<int, int> SlotRemap; unsigned RemovedSlots = 0; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 9d0fd0aa20..d349abc357 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackcoloring" +#define DEBUG_TYPE "stackslotcoloring" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 7e7f835040..8ed66f7044 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -606,13 +606,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// If we can determine the operand latency from the def only, without itinerary /// lookup, do so. Otherwise return -1. -static int computeDefOperandLatency( - const TargetInstrInfo *TII, const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) { +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // Return a latency based on the itinerary properties and defining instruction // if possible. Some common subtargets don't require per-operand latency, @@ -621,7 +621,7 @@ static int computeDefOperandLatency( // If MinLatency is valid, call getInstrLatency. This uses Stage latency if // it exists before defaulting to MinLatency. if (ItinData->SchedModel->MinLatency >= 0) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. // For empty itineraries, short-cirtuit the check and default to one cycle. @@ -629,7 +629,7 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData->SchedModel, DefMI); + return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required return -1; @@ -652,7 +652,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *UseMI, unsigned UseIdx, bool FindMin) const { - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); if (DefLatency >= 0) return DefLatency; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp new file mode 100644 index 0000000000..353c9cf8aa --- /dev/null +++ b/lib/CodeGen/TargetSchedule.cpp @@ -0,0 +1,182 @@ +//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper around MCSchedModel that allows the interface +// to benefit from information currently only available in TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(false), + cl::desc("Use TargetSchedModel for latency lookup")); + +static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), + cl::desc("Use InstrItineraryData for latency lookup")); + +void TargetSchedModel::init(const MCSchedModel &sm, + const TargetSubtargetInfo *sti, + const TargetInstrInfo *tii) { + SchedModel = sm; + STI = sti; + TII = tii; + STI->initInstrItins(InstrItins); +} + +/// If we can determine the operand latency from the def only, without machine +/// model or itinerary lookup, do so. Otherwise return -1. +int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, + bool FindMin) const { + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is invalid, then use the itinerary for MinLatency. If no + // itinerary exists either, then use single cycle latency. + if (SchedModel.MinLatency < 0 + && !(EnableSchedItins && hasInstrItineraries())) { + return 1; + } + return SchedModel.MinLatency; + } + else if (!(EnableSchedModel && hasInstrSchedModel()) + && !(EnableSchedItins && hasInstrItineraries())) { + return TII->defaultDefLatency(&SchedModel, DefMI); + } + // ...operand lookup required + return -1; +} + +/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require +/// evaluation of predicates that depend on instruction operands or flags. +const MCSchedClassDesc *TargetSchedModel:: +resolveSchedClass(const MachineInstr *MI) const { + + // Get the definition's scheduling class descriptor from this machine model. + unsigned SchedClass = MI->getDesc().getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + +#ifndef NDEBUG + unsigned NIter = 0; +#endif + while (SCDesc->isVariant()) { + assert(++NIter < 6 && "Variants are nested deeper than the magic number"); + + SchedClass = STI->resolveSchedClass(SchedClass, MI, this); + SCDesc = SchedModel.getSchedClassDesc(SchedClass); + } + return SCDesc; +} + +/// Find the def index of this operand. This index maps to the machine model and +/// is independent of use operands. Def operands may be reordered with uses or +/// merged with uses without affecting the def index (e.g. before/after +/// regalloc). However, an instruction's def operands must never be reordered +/// with respect to each other. +static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { + unsigned DefIdx = 0; + for (unsigned i = 0; i != DefOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + return DefIdx; +} + +/// Find the use index of this operand. This is independent of the instruction's +/// def operands. +/// +/// Note that uses are not determined by the operand's isUse property, which +/// is simply the inverse of isDef. Here we consider any readsReg operand to be +/// a "use". The machine model allows an operand to be both a Def and Use. +static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { + unsigned UseIdx = 0; + for (unsigned i = 0; i != UseOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.readsReg()) + ++UseIdx; + } + return UseIdx; +} + +// Top-level API for clients that know the operand indices. +unsigned TargetSchedModel::computeOperandLatency( + const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *UseMI, unsigned UseOperIdx, + bool FindMin) const { + + int DefLatency = getDefLatency(DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + if (!FindMin && EnableSchedModel && hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = WLEntry->Cycles; + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); + return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + } + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + std::string Err; + raw_string_ostream ss(Err); + ss << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + report_fatal_error(ss.str()); + } +#endif + return 1; + } + assert(EnableSchedItins && hasInstrItineraries() && + "operand latency requires itinerary"); + + int OperLatency = 0; + if (UseMI) { + OperLatency = + TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + } + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); + return InstrLatency; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index bd12f92132..df33a94ca7 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1758,10 +1758,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) MO.setIsUndef(); } - // Make sure there is a full non-subreg imp-def operand on the - // instruction. This shouldn't be necessary, but it seems that at least - // RAFast requires it. - Def->addRegisterDefined(DstReg, TRI); DEBUG(dbgs() << "First def: " << *Def); } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index bd10a4b8d0..a69a8169d3 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -20,6 +20,7 @@ #include "VirtRegMap.h" #include "LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -126,7 +127,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << '\n'; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VirtRegMap::dump() const { print(dbgs()); } @@ -171,6 +172,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) @@ -183,6 +185,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); AU.addRequired<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index c3209854a4..7974dda66a 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -63,8 +63,8 @@ namespace llvm { /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); - VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT - void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION; + void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION; public: static char ID; |