diff options
author | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
---|---|---|
committer | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
commit | c6cf05cb5108f356dde97c01ee4188b0671d4542 (patch) | |
tree | 436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/CodeGen | |
parent | c75199c649c739aade160289d93f257edc798cde (diff) | |
parent | 7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff) |
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts:
docs/LangRef.rst
include/llvm/CodeGen/CallingConvLower.h
include/llvm/IRReader/IRReader.h
include/llvm/Target/TargetMachine.h
lib/CodeGen/CallingConvLower.cpp
lib/IRReader/IRReader.cpp
lib/IRReader/LLVMBuild.txt
lib/IRReader/Makefile
lib/LLVMBuild.txt
lib/Makefile
lib/Support/MemoryBuffer.cpp
lib/Support/Unix/PathV2.inc
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/ARMSubtarget.cpp
lib/Target/ARM/ARMTargetMachine.cpp
lib/Target/Mips/CMakeLists.txt
lib/Target/Mips/MipsDelaySlotFiller.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsInstrInfo.td
lib/Target/Mips/MipsSubtarget.cpp
lib/Target/Mips/MipsSubtarget.h
lib/Target/X86/X86FastISel.cpp
lib/Target/X86/X86ISelDAGToDAG.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrControl.td
lib/Target/X86/X86InstrFormats.td
lib/Transforms/IPO/ExtractGV.cpp
lib/Transforms/InstCombine/InstCombineCompares.cpp
lib/Transforms/Utils/SimplifyLibCalls.cpp
test/CodeGen/X86/fast-isel-divrem.ll
test/MC/ARM/data-in-code.ll
tools/Makefile
tools/llvm-extract/llvm-extract.cpp
tools/llvm-link/CMakeLists.txt
tools/opt/CMakeLists.txt
tools/opt/LLVMBuild.txt
tools/opt/Makefile
tools/opt/opt.cpp
Diffstat (limited to 'lib/CodeGen')
66 files changed, 2754 insertions, 1564 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index c7abf7a0c4..4731af5089 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence @@ -202,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } } +static bool isNoopBitcast(Type *T1, Type *T2, + const TargetLowering& TLI) { + return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || + (isa<VectorType>(T1) && isa<VectorType>(T2) && + TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); +} -/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look -/// through it (and any transitive noop operands to it) and return its input -/// value. This is used to determine if a tail call can be formed. -/// -static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { - // If V is not an instruction, it can't be looked through. - const Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V; - - Value *Op = I->getOperand(0); +/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop +/// (i.e., lowers to no machine code), look through it (and any transitive noop +/// operands to it) and check if it has the same noop input value. This is +/// used to determine if a tail call can be formed. +static bool sameNoopInput(const Value *V1, const Value *V2, + SmallVectorImpl<unsigned> &Els1, + SmallVectorImpl<unsigned> &Els2, + const TargetLowering &TLI) { + using std::swap; + bool swapParity = false; + bool equalEls = Els1 == Els2; + while (true) { + if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) { + if (swapParity) + // Revert to original Els1 and Els2 to avoid confusing recursive calls + swap(Els1, Els2); + return true; + } - // Look through truly no-op truncates. - if (isa<TruncInst>(I) && - TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType())) - return getNoopInput(I->getOperand(0), TLI); - - // Look through truly no-op bitcasts. - if (isa<BitCastInst>(I)) { - // No type change at all. - if (Op->getType() == I->getType()) - return getNoopInput(Op, TLI); + // Try to look through V1; if V1 is not an instruction, it can't be looked + // through. + const Instruction *I = dyn_cast<Instruction>(V1); + const Value *NoopInput = 0; + if (I != 0 && I->getNumOperands() > 0) { + Value *Op = I->getOperand(0); + if (isa<TruncInst>(I)) { + // Look through truly no-op truncates. + if (TLI.isTruncateFree(Op->getType(), I->getType())) + NoopInput = Op; + } else if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call + for (User::const_op_iterator i = I->op_begin(), + // Skip Callee + e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke + for (User::const_op_iterator i = I->op_begin(), + // Skip BB, BB, Callee + e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } + } - // Pointer to pointer cast. - if (Op->getType()->isPointerTy() && I->getType()->isPointerTy()) - return getNoopInput(Op, TLI); - - if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) && - TLI.isTypeLegal(EVT::getEVT(Op->getType())) && - TLI.isTypeLegal(EVT::getEVT(I->getType()))) - return getNoopInput(Op, TLI); - } - - // Look through inttoptr. - if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) { - // Make sure this isn't a truncating or extending cast. We could support - // this eventually, but don't bother for now. - if (TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) - return getNoopInput(Op, TLI); - } + if (NoopInput) { + V1 = NoopInput; + continue; + } - // Look through ptrtoint. - if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) { - // Make sure this isn't a truncating or extending cast. We could support - // this eventually, but don't bother for now. - if (TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) - return getNoopInput(Op, TLI); + // If we already swapped, avoid infinite loop + if (swapParity) + break; + + // Otherwise, swap V1<->V2, Els1<->Els2 + swap(V1, V2); + swap(Els1, Els2); + swapParity = !swapParity; } + for (unsigned n = 0; n < 2; ++n) { + if (isa<InsertValueInst>(V1)) { + if (isa<StructType>(V1->getType())) { + // Look through insertvalue + unsigned i, e; + for (i = 0, e = cast<StructType>(V1->getType())->getNumElements(); + i != e; ++i) { + const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i); + if (InScalar == 0) + break; + Els1.push_back(i); + if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { + Els1.pop_back(); + break; + } + Els1.pop_back(); + } + if (i == e) { + if (swapParity) + swap(Els1, Els2); + return true; + } + } + } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) { + const ExtractValueInst *EVI = cast<ExtractValueInst>(V1); + unsigned i = Els1.back(); + // If the scalar value being inserted is an extractvalue of the right + // index from the call, then everything is good. + if (isa<StructType>(EVI->getOperand(0)->getType()) && + EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { + // Look through extractvalue + Els1.pop_back(); + if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { + Els1.push_back(i); + if (swapParity) + swap(Els1, Els2); + return true; + } + Els1.push_back(i); + } + } - // Otherwise it's not something we can look through. - return V; -} + swap(V1, V2); + swap(Els1, Els2); + swapParity = !swapParity; + } + if (swapParity) + swap(Els1, Els2); + return false; +} /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with @@ -265,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ +bool llvm::isInTailCallPosition(ImmutableCallSite CS, + const TargetLowering &TLI) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -323,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; - // Otherwise, make sure the unmodified return value of I is the return value. - // We handle two cases: multiple return values + scalars. - Value *RetVal = Ret->getOperand(0); - if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType())) - // Handle scalars first. - return getNoopInput(Ret->getOperand(0), TLI) == I; - - // If this is an aggregate return, look through the insert/extract values and - // see if each is transparent. - for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(RetVal, i); - if (InScalar == 0) return false; - InScalar = getNoopInput(InScalar, TLI); - - // If the scalar value being inserted is an extractvalue of the right index - // from the call, then everything is good. - const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar); - if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 || - EVI->getIndices()[0] != i) - return false; - } - - return true; + // Otherwise, make sure the return value and I have the same value + SmallVector<unsigned, 4> Els1, Els2; + return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 0f381c6d0c..c73071e12b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const { return *TM.getDataLayout(); } +StringRef AsmPrinter::getTargetTriple() const { + return TM.getTargetTriple(); +} + /// getCurrentSection() - Return the current section we are emitting to. const MCSection *AsmPrinter::getCurrentSection() const { - return OutStreamer.getCurrentSection(); + return OutStreamer.getCurrentSection().first; } @@ -834,7 +838,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (int Offset = MLoc.getOffset()) { + if (MLoc.isIndirect()) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -845,7 +849,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(Offset); + EmitSLEB128(MLoc.getOffset()); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -1256,7 +1260,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.used") { if (MAI->hasNoDeadStrip()) // No need to emit this at all. - EmitLLVMUsedList(GV->getInitializer()); + EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); return true; } @@ -1299,11 +1303,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. -void AsmPrinter::EmitLLVMUsedList(const Constant *List) { +void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(List); - if (InitList == 0) return; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 156acace55..31e42d47cf 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, const MCSymbol *SectionLabel) const { // On COFF targets, we have to emit the special .secrel32 directive. - if (MAI->getDwarfSectionOffsetDirective()) { + if (MAI->needsDwarfSectionOffsetDirective()) { OutStreamer.EmitCOFFSecRel32(Label); return; } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 58fe2ed9d3..8d15c069c6 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp + ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4ded2818ed..673867ada1 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,9 +112,20 @@ DIE::~DIE() { delete Children[i]; } +/// Climb up the parent chain to get the compile unit DIE to which this DIE +/// belongs. +DIE *DIE::getCompileUnit() const { + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + #ifndef NDEBUG -void DIE::print(raw_ostream &O, unsigned IncIndent) { - IndentCount += IncIndent; +void DIE::print(raw_ostream &O, unsigned IndentCount) const { const std::string Indent(IndentCount, ' '); bool isBlock = Abbrev.getTag() == 0; @@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) { O << "Size: " << Size << "\n"; } - const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData(); IndentCount += 2; for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -153,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount -= 2; for (unsigned j = 0, M = Children.size(); j < M; ++j) { - Children[j]->print(O, 4); + Children[j]->print(O, IndentCount+4); } if (!isBlock) O << "\n"; - IndentCount -= IncIndent; } void DIE::dump() { @@ -313,7 +323,7 @@ void DIEEntry::print(raw_ostream &O) { /// unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { if (!Size) { - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); } @@ -332,7 +342,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 35d7959ac1..3c06001686 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -66,7 +66,7 @@ namespace llvm { /// Data - Raw data bytes for abbreviation. /// - SmallVector<DIEAbbrevData, 8> Data; + SmallVector<DIEAbbrevData, 12> Data; public: DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} @@ -75,7 +75,7 @@ namespace llvm { uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } - const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; } + const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } @@ -108,7 +108,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIE - A structured debug information entry. Has an abbreviation which - /// describes it's organization. + /// describes its organization. class DIEValue; class DIE { @@ -133,14 +133,13 @@ namespace llvm { /// Attribute values. /// - SmallVector<DIEValue*, 32> Values; + SmallVector<DIEValue*, 12> Values; // Private data for print() mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), - IndentCount(0) {} + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} virtual ~DIE(); // Accessors. @@ -150,8 +149,11 @@ namespace llvm { unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } - const SmallVector<DIEValue*, 32> &getValues() const { return Values; } + const SmallVectorImpl<DIEValue*> &getValues() const { return Values; } DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -176,7 +178,7 @@ namespace llvm { } #ifndef NDEBUG - void print(raw_ostream &O, unsigned IncIndent = 0); + void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); #endif }; @@ -232,9 +234,10 @@ namespace llvm { /// static unsigned BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { - if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; - if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; - if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; } else { if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d9f6b5eb0a..89abcffd93 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -35,7 +35,7 @@ using namespace llvm; CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0) { + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -241,7 +241,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { if (Line == 0) return; unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory()); + V.getContext().getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -257,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -276,7 +278,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { return; unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory()); + SP.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -293,7 +295,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { if (Line == 0) return; unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory()); + Ty.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -311,7 +313,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { return; DIFile File = Ty.getFile(); unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), - File.getDirectory()); + File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -329,7 +331,8 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -682,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, return true; } -/// addTemplateParams - Add template parameters in buffer. +/// addTemplateParams - Add template parameters into buffer. void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { @@ -704,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { return getOrCreateNameSpace(DINameSpace(Context)); else if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else + else return getDIE(Context); } @@ -1363,7 +1366,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { } } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { - // AT_const_value was added when the static memeber was created. To avoid + // AT_const_value was added when the static member was created. To avoid // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) @@ -1669,33 +1672,6 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - // This is only for backward compatibility. - StringRef PropertyName = DT.getObjCPropertyName(); - if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - StringRef GetterName = DT.getObjCPropertyGetterName(); - if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); - StringRef SetterName = DT.getObjCPropertySetterName(); - if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); - unsigned PropertyAttributes = 0; - if (DT.isReadOnlyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; - if (DT.isReadWriteObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; - if (DT.isAssignObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; - if (DT.isRetainObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; - if (DT.isCopyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; - if (DT.isNonAtomicObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; - if (PropertyAttributes) - addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); - } return MemberDie; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77bf6a9e50..8f08c63e64 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -87,13 +87,13 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + /// getLowerBoundDefault - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); - public: CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *); @@ -103,6 +103,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } @@ -120,6 +121,7 @@ public: return AccelTypes; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } @@ -367,6 +369,9 @@ public: /// createStaticMemberDIE - Create new static data member DIE. DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + private: // DIEValueAllocator - All DIEValues are allocated through this allocator. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 87659ef667..73bba6989f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -55,7 +55,7 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::init(false)); static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", - cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::Hidden, cl::init(false), cl::desc("Generate DWARF pubnames section")); namespace { @@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; + + struct CompareFirst { + template <typename T> bool operator()(const T &lhs, const T &rhs) const { + return lhs.first < rhs.first; + } + }; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; + DwarfAddrSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; // Turn on accelerator tables and older gdb compatibility // for Darwin. - bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); if (DarwinGDBCompat == Default) { if (IsDarwin) IsDarwinGDBCompat = true; @@ -352,11 +359,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); SPCU->addDie(SPDie); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -528,7 +540,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DILocation DL(Scope->getInlinedAt()); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), + TheCU->getUniqueID())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed @@ -590,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { } else { // There is no need to emit empty lexical block DIE. - if (Children.empty()) + std::pair<ImportedEntityMap::const_iterator, + ImportedEntityMap::const_iterator> Range = std::equal_range( + ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), + std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), + CompareFirst()); + if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) + constructImportedModuleDIE(TheCU, i->second, ScopeDIE); } if (!ScopeDIE) return NULL; @@ -617,19 +637,28 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // SourceIds map. This can update DirectoryNames and SourceFileNames maps // as well. unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, - StringRef DirName) { + StringRef DirName, unsigned CUID) { + // If we use .loc in assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + CUID = 0; + // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return getOrCreateSourceID("<stdin>", StringRef()); + return getOrCreateSourceID("<stdin>", StringRef(), CUID); // TODO: this might not belong here. See if we can factor this better. if (DirName == CompilationDir) DirName = ""; - unsigned SrcId = SourceIdMap.size()+1; + // FileIDCUMap stores the current ID for the given compile unit. + unsigned SrcId = FileIDCUMap[CUID] + 1; - // We look up the file/dir pair by concatenating them with a zero byte. + // We look up the CUID/file/dir by concatenating them with a zero byte. SmallString<128> NamePair; + NamePair += utostr(CUID); + NamePair += '\0'; NamePair += DirName; NamePair += '\0'; // Zero bytes are not allowed in paths. NamePair += FileName; @@ -638,8 +667,9 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, if (Ent.getValue() != SrcId) return Ent.getValue(); + FileIDCUMap[CUID] = SrcId; // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); return SrcId; } @@ -650,21 +680,27 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - // Call this to emit a .file directive if it wasn't emitted for the source - // file this CU comes from yet. - getOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, DIUnit.getLanguage(), Die, Asm, this, &InfoHolder); + + FileIDCUMap[NewCU->getUniqueID()] = 0; + // Call this to emit a .file directive if it wasn't emitted for the source + // file this CU comes from yet. + getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0 (or a NULL label) for this. - NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + // into an entity. We're using 0 (or a NULL label) for this. For + // split dwarf it's in the skeleton CU so omit it here. + if (!useSplitDwarf()) + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", @@ -672,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, NewCU->getUniqueID()); + // Use a single line table if we are using .loc and generating assembly. + bool UseTheFirstCU = + (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || + (NewCU->getUniqueID() == 0); + // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - NewCU->getUniqueID() == 0 ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); - else if (NewCU->getUniqueID() == 0) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + if (!useSplitDwarf()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + UseTheFirstCU ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); + else if (UseTheFirstCU) + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + else + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, DwarfLineSectionSym); + } - if (!CompilationDir.empty()) + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!useSplitDwarf() && !CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -702,13 +749,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(N); - } - InfoHolder.addUnit(NewCU); CUMap.insert(std::make_pair(N, NewCU)); @@ -742,80 +782,39 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, TheCU->addGlobalName(SP.getName(), SubprogramDie); } -// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty. -void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) { - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, + const MDNode *N) { + DIImportedModule Module(N); + if (!Module.Verify()) + return; + if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) + constructImportedModuleDIE(TheCU, Module, D); } -// Collect debug info using DebugInfoFinder. -// FIXME - Remove this when dragonegg switches to DIBuilder. -bool DwarfDebug::collectLegacyDebugInfo(const Module *M) { - DebugInfoFinder DbgFinder; - DbgFinder.processModule(*M); - - bool HasDebugInfo = false; - // Scan all the compile-units to see if there are any marked as the main - // unit. If not, we do not generate debug info. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - if (DICompileUnit(*I).isMain()) { - HasDebugInfo = true; - break; - } - } - if (!HasDebugInfo) return false; - - // Emit initial sections so we can refer to them later. - emitSectionLabels(); - - // Create all the compile unit DIEs. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) - constructCompileUnit(*I); - - // Create DIEs for each global variable. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - // Create DIEs for each subprogram. - for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - return HasDebugInfo; +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + DIE *Context) { + DIImportedModule Module(N); + if (!Module.Verify()) + return; + return constructImportedModuleDIE(TheCU, Module, Context); +} + +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, + const DIImportedModule &Module, + DIE *Context) { + assert(Module.Verify() && + "Use one of the MDNode * overloads to handle invalid metadata"); + assert(Context && "Should always have a context for an imported_module"); + DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module); + TheCU->insertDIE(Module, IMDie); + DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace()); + unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), + Module.getContext().getDirectory(), + TheCU->getUniqueID()); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie); + Context->addChild(IMDie); } // Emit all Dwarf sections that should come prior to the content. Create @@ -830,30 +829,48 @@ void DwarfDebug::beginModule() { // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - // Emit initial sections so we can reference labels later. - emitSectionLabels(); - - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); - DIArray GVs = CUNode.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); - DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); - DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); - } - } else if (!collectLegacyDebugInfo(M)) + if (!CU_Nodes) return; - collectInfoFromNamedMDNodes(M); + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CUNode(CU_Nodes->getOperand(i)); + CompileUnit *CU = constructCompileUnit(CUNode); + DIArray ImportedModules = CUNode.getImportedModules(); + for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + ScopesWithImportedEntities.push_back(std::make_pair( + DIImportedModule(ImportedModules.getElement(i)).getContext(), + ImportedModules.getElement(i))); + std::sort(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), CompareFirst()); + DIArray GVs = CUNode.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) + CU->createGlobalVariableDIE(GVs.getElement(i)); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + constructSubprogramDIE(CU, SPs.getElement(i)); + DIArray EnumTypes = CUNode.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + DIArray RetainedTypes = CUNode.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + // Emit imported_modules last so that the relevant context is already + // available. + for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + constructImportedModuleDIE(CU, ImportedModules.getElement(i)); + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, 0); + // Now construct the skeleton CU associated. + constructSkeletonCU(CUNode); + } + } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); @@ -1157,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, } if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { MachineLocation MLoc; - MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + // TODO: Currently an offset of 0 in a DBG_VALUE means + // we need to generate a direct register value. + // There is no way to specify an indirect value with offset 0. + if (MI->getOperand(1).getImm() == 0) + MLoc.set(MI->getOperand(0).getReg()); + else + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); } if (MI->getOperand(0).isImm()) @@ -1197,16 +1220,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = LScopes.getCurrentFunctionScope(); - else { - if (DV.getVersion() <= LLVMDebugVersion9) - Scope = LScopes.findLexicalScope(MInsn->getDebugLoc()); - else { - if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); - else - Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); - } - } + else if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); + else + Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1430,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + // Use a single line table if we are using .loc and generating assembly. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + else + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); @@ -1707,7 +1729,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, } else llvm_unreachable("Unexpected scope info"); - Src = getOrCreateSourceID(Fn, Dir); + Src = getOrCreateSourceID(Fn, Dir, + Asm->OutStreamer.getContext().getDwarfCompileUnitID()); } Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); } @@ -1735,8 +1758,8 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Start the size with the size of abbreviation code. Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); - const SmallVector<DIEValue*, 32> &Values = Die->getValues(); - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); // Size the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -1761,15 +1784,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Compute the size and offset of all the DIEs. void DwarfUnits::computeSizeAndOffsets() { - for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; } } @@ -1799,9 +1826,12 @@ void DwarfDebug::emitSectionLabels() { emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); - if (useSplitDwarf()) + if (useSplitDwarf()) { DwarfStrDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + DwarfAddrSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); + } DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); @@ -1826,8 +1856,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { dwarf::TagString(Abbrev->getTag())); Asm->EmitULEB128(AbbrevNumber); - const SmallVector<DIEValue*, 32> &Values = Die->getValues(); - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -1843,6 +1873,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } Asm->EmitInt32(Addr); break; } @@ -1908,7 +1945,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, const MCSection *ASection, const MCSymbol *ASectionSym) { Asm->OutStreamer.SwitchSection(USection); - for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { CompileUnit *TheCU = *I; DIE *Die = TheCU->getCUDie(); @@ -1940,6 +1977,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2324,7 +2374,7 @@ void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; - for (SmallVector<DotDebugLocEntry, 4>::iterator + for (SmallVectorImpl<DotDebugLocEntry>::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I) { DotDebugLocEntry &Entry = *I; @@ -2338,7 +2388,7 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector<DotDebugLocEntry, 4>::iterator + for (SmallVectorImpl<DotDebugLocEntry>::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry &Entry = *I; @@ -2431,7 +2481,7 @@ void DwarfDebug::emitDebugRanges() { Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVector<const MCSymbol *, 8>::iterator + for (SmallVectorImpl<const MCSymbol *>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) @@ -2489,13 +2539,13 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { const MDNode *Node = *I; DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); - SmallVector<InlineInfoLabels, 4> &Labels = II->second; + SmallVectorImpl<InlineInfoLabels> &Labels = II->second; DISubprogram SP(Node); StringRef LName = SP.getLinkageName(); StringRef Name = SP.getName(); @@ -2514,7 +2564,7 @@ void DwarfDebug::emitDebugInlineInfo() { DwarfStrSectionSym); Asm->EmitULEB128(Labels.size(), "Inline count"); - for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), + for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(LI->second->getOffset()); @@ -2549,9 +2599,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // This should be a unique identifier when we want to build .dwp files. NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); - // FIXME: The addr base should be relative for each compile unit, however, - // this one is going to be 0 anyhow. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); + // Relocate to the beginning of the addr_base section, else 0 for the + // beginning of the one for this compile unit. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, + DwarfAddrSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, + dwarf::DW_FORM_sec_offset, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. @@ -2559,6 +2614,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. + // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, DwarfLineSectionSym); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7b56815040..24f758dda9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -274,6 +274,10 @@ public: /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); }; /// \brief Collects and handles dwarf debug information. @@ -305,7 +309,9 @@ class DwarfDebug { // A list of all the unique abbreviations in use. std::vector<DIEAbbrev *> Abbreviations; - // Source id map, i.e. pair of source filename and directory, + // Stores the current file ID for a given compile unit. + DenseMap <unsigned, unsigned> FileIDCUMap; + // Source id map, i.e. CUID, source filename and directory, // separated by a zero byte, mapped to a unique id. StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; @@ -386,7 +392,7 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; @@ -427,6 +433,10 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; + typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + private: void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -549,6 +559,18 @@ private: /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N); + + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + DIE *Context); + + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, + const DIImportedModule &Module, + DIE *Context); + /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. @@ -596,14 +618,6 @@ public: DwarfDebug(AsmPrinter *A, Module *M); ~DwarfDebug(); - /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum - /// and llvm.dbg.ty - void collectInfoFromNamedMDNodes(const Module *M); - - /// \brief Collect debug info using DebugInfoFinder. - /// FIXME - Remove this when DragonEgg switches to DIBuilder. - bool collectLegacyDebugInfo(const Module *M); - /// \brief Emit all Dwarf sections that should come prior to the /// content. void beginModule(); @@ -626,7 +640,8 @@ public: /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. - unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName); + unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, + unsigned CUID); /// \brief Recursively Emits a debug information entry. void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 0000000000..a8fb66dcf1 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,120 @@ +//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGCPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGCPrinter() { } + +void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } + +void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { + MCStreamer &OS = AP.OutStreamer; + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + + // Put this in a custom .note section. + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() + .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, + SectionKind::getDataRel())); + + // For each function... + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + GCFunctionInfo &MD = **FI; + + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_<FUNCTIONNAME>; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.EmitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? + MD.getFunction().arg_size() - RegisteredArgs : 0; + OS.AddComment("stack arity"); + AP.EmitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.EmitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.EmitInt16(LI->StackOffset / IntPtrSize); + } + } +} diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index e8b5b4fe8d..4a99184f5e 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -85,7 +85,9 @@ public: virtual unsigned getNumberOfRegisters(bool Vector) const; virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, @@ -193,27 +195,34 @@ unsigned BasicTTI::getMaximumUnrollFactor() const { return 1; } -unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { +unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const { // Check if any of the operands are vector operands. int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + // Assume that floating point arithmetic operations cost twice as much as + // integer operations. + unsigned OpCost = (IsFloat ? 2 : 1); + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some + // If the type is split to multiple registers, assume that there is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; + return LT.first * 2 * OpCost; + return LT.first * 1 * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { // If the operation is custom lowered then assume // thare the code is twice as expensive. - return LT.first * 2; + return LT.first * 2 * OpCost; } // Else, assume that we need to scalarize this op. @@ -226,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { } // We don't know anything about this scalar instruction. - return 1; + return OpCost; } unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, @@ -379,22 +388,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys) const { - // assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + unsigned ISD = 0; + switch (IID) { + default: { + // Assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + + return ScalarCalls + ScalarizationCost; + } + // Look for intrinsics that can be lowered directly or turned into a scalar + // intrinsic call. + case Intrinsic::sqrt: ISD = ISD::FSQRT; break; + case Intrinsic::sin: ISD = ISD::FSIN; break; + case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::exp: ISD = ISD::FEXP; break; + case Intrinsic::exp2: ISD = ISD::FEXP2; break; + case Intrinsic::log: ISD = ISD::FLOG; break; + case Intrinsic::log10: ISD = ISD::FLOG10; break; + case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::floor: ISD = ISD::FFLOOR; break; + case Intrinsic::ceil: ISD = ISD::FCEIL; break; + case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::pow: ISD = ISD::FPOW; break; + case Intrinsic::fma: ISD = ISD::FMA; break; + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + } + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; } - return ScalarCalls + ScalarizationCost; + + // Else, assume that we need to scalarize this intrinsic. For math builtins + // this will emit a costly libcall, adding call overhead and spills. Make it + // very expensive. + if (RetTy->isVectorTy()) { + unsigned Num = RetTy->getVectorNumElements(); + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), + Tys); + return 10 * Cost * Num; + } + + // This is going to be turned into a library call, make it expensive. + return 10; } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index ddc7adab49..56aa3309d3 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -7,13 +7,13 @@ add_llvm_library(LLVMCodeGen CalcSpillWeights.cpp CallingConvLower.cpp CodeGen.cpp - CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DFAPacketizer.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp + ErlangGC.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index dee339a458..38ae17d231 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; - // Find the best physreg hist and the best virtreg hint. + // Find the best physreg hint and the best virtreg hint. float bestPhys = 0, bestVirt = 0; unsigned hintPhys = 0, hintVirt = 0; diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index c897f3e391..d4cc1a8654 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, // No stack is used. StackOffset = 0; - clearFirstByValReg(); + clearByValRegsInfo(); clearHasByValInRegPosition(); // @LOCALMOD. UsedRegs.resize((TRI.getNumRegs()+31)/32); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a33b672044..c641991d40 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; @@ -22,7 +23,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); - initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp deleted file mode 100644 index 24518443a7..0000000000 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ /dev/null @@ -1,423 +0,0 @@ -//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass that optimizes code placement and aligns loop -// headers to target-specific alignment boundaries. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "code-placement" -#include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -STATISTIC(NumLoopsAligned, "Number of loops aligned"); -STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); -STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); - -namespace { - class CodePlacementOpt : public MachineFunctionPass { - const MachineLoopInfo *MLI; - const TargetInstrInfo *TII; - const TargetLowering *TLI; - - public: - static char ID; - CodePlacementOpt() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineLoopInfo>(); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - bool HasFallthrough(MachineBasicBlock *MBB); - bool HasAnalyzableTerminator(MachineBasicBlock *MBB); - void Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End); - bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L); - bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L); - bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); - bool OptimizeIntraLoopEdges(MachineFunction &MF); - bool AlignLoops(MachineFunction &MF); - bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); - }; - - char CodePlacementOpt::ID = 0; -} // end anonymous namespace - -char &llvm::CodePlacementOptID = CodePlacementOpt::ID; -INITIALIZE_PASS(CodePlacementOpt, "code-placement", - "Code Placement Optimizer", false, false) - -/// HasFallthrough - Test whether the given branch has a fallthrough, either as -/// a plain fallthrough or as a fallthrough case of a conditional branch. -/// -bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // This conditional branch has no fallthrough. - if (FBB) - return false; - // An unconditional branch has no fallthrough. - if (Cond.empty() && TBB) - return false; - // It has a fallthrough. - return true; -} - -/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. -/// This is called before major changes are begun to test whether it will be -/// possible to complete the changes. -/// -/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed -/// whenever possible. -/// -bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { - // Conservatively ignore EH landing pads. - if (MBB->isLandingPad()) return false; - - // Aggressively handle return blocks and similar constructs. - if (MBB->succ_empty()) return true; - - // Ask the target's AnalyzeBranch if it can handle this block. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - // Make sure the terminator is understood. - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // Ignore blocks which look like they might have EH-related control flow. - // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't - // recognize the possibility of a control transfer through an unwind. - // Such blocks contain EH_LABEL instructions, however they may be in the - // middle of the block. Instead of searching for them, just check to see - // if the CFG disagrees with AnalyzeBranch. - if (1u + !Cond.empty() != MBB->succ_size()) - return false; - // Make sure we have the option of reversing the condition. - if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) - return false; - return true; -} - -/// Splice - Move the sequence of instructions [Begin,End) to just before -/// InsertPt. Update branch instructions as needed to account for broken -/// fallthrough edges and to take advantage of newly exposed fallthrough -/// opportunities. -/// -void CodePlacementOpt::Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End) { - assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && - "Splice can't change the entry block!"); - MachineFunction::iterator OldBeginPrior = prior(Begin); - MachineFunction::iterator OldEndPrior = prior(End); - - MF.splice(InsertPt, Begin, End); - - prior(Begin)->updateTerminator(); - OldBeginPrior->updateTerminator(); - OldEndPrior->updateTerminator(); -} - -/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump -/// to the loop top to the top of the loop so that they have a fall through. -/// This can introduce a branch on entry to the loop, but it can eliminate a -/// branch within the loop. See the @simple case in -/// test/CodeGen/X86/loop_blocks.ll for an example of this. -bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - - bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); - - if (TopMBB == MF.begin() || - HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { - new_top: - for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), - PE = TopMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - if (Pred == TopMBB) continue; - if (HasFallthrough(Pred)) continue; - if (!L->contains(Pred)) continue; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (Pred == MF.begin()) - continue; - if (!HasAnalyzableTerminator(Pred)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() - << " to top of loop.\n"); - Changed = true; - - // Move it and all the blocks that can reach it via fallthrough edges - // exclusively, to keep existing fallthrough edges intact. - MachineFunction::iterator Begin = Pred; - MachineFunction::iterator End = llvm::next(Begin); - while (Begin != MF.begin()) { - MachineFunction::iterator Prior = prior(Begin); - if (Prior == MF.begin()) - break; - // Stop when a non-fallthrough edge is found. - if (!HasFallthrough(Prior)) - break; - // Stop if a block which could fall-through out of the loop is found. - if (Prior->isSuccessor(End)) - break; - // If we've reached the top, stop scanning. - if (Prior == MachineFunction::iterator(TopMBB)) { - // We know top currently has a fall through (because we just checked - // it) which would be lost if we do the transformation, so it isn't - // worthwhile to do the transformation unless it would expose a new - // fallthrough edge. - if (!Prior->isSuccessor(End)) - goto next_pred; - // Otherwise we can stop scanning and proceed to move the blocks. - break; - } - // If we hit a switch or something complicated, don't move anything - // for this predecessor. - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) - break; - // Ok, the block prior to Begin will be moved along with the rest. - // Extend the range to include it. - Begin = Prior; - ++NumIntraMoved; - } - - // Move the blocks. - Splice(MF, TopMBB, Begin, End); - - // Update TopMBB. - TopMBB = L->getTopBlock(); - - // We have a new loop top. Iterate on it. We shouldn't have to do this - // too many times if BranchFolding has done a reasonable job. - goto new_top; - next_pred:; - } - } - - // If the loop previously didn't exit with a fall-through and it now does, - // we eliminated a branch. - if (Changed && - !BotHasFallthrough && - HasFallthrough(L->getBottomBlock())) { - ++NumIntraElim; - } - - return Changed; -} - -/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the -/// portion of the loop contiguous with the header. This usually makes the loop -/// contiguous, provided that AnalyzeBranch can handle all the relevant -/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll -/// for an example of this. -bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock *BotMBB = L->getBottomBlock(); - - // Determine a position to move orphaned loop blocks to. If TopMBB is not - // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid losing that fallthrough. Otherwise append - // them to the bottom, even if it previously had a fallthrough, on the theory - // that it's worth an extra branch to keep the loop contiguous. - MachineFunction::iterator InsertPt = - llvm::next(MachineFunction::iterator(BotMBB)); - bool InsertAtTop = false; - if (TopMBB != MF.begin() && - !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && - HasFallthrough(BotMBB)) { - InsertPt = TopMBB; - InsertAtTop = true; - } - - // Keep a record of which blocks are in the portion of the loop contiguous - // with the loop header. - SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; - for (MachineFunction::iterator I = TopMBB, - E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) - ContiguousBlocks.insert(I); - - // Find non-contigous blocks and fix them. - if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) - for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - MachineBasicBlock *BB = *BI; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (!HasAnalyzableTerminator(BB)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) - continue; - - // If the layout predecessor is part of the loop, this block will be - // processed along with it. This keeps them in their relative order. - if (BB != MF.begin() && - L->contains(prior(MachineFunction::iterator(BB)))) - continue; - - // Check to see if this block is already contiguous with the main - // portion of the loop. - if (!ContiguousBlocks.insert(BB)) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() - << " to be contiguous with loop.\n"); - Changed = true; - - // Process this block and all loop blocks contiguous with it, to keep - // them in their relative order. - MachineFunction::iterator Begin = BB; - MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); - for (; End != MF.end(); ++End) { - if (!L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - ContiguousBlocks.insert(End); - ++NumIntraMoved; - } - - // If we're inserting at the bottom of the loop, and the code we're - // moving originally had fall-through successors, bring the sucessors - // up with the loop blocks to preserve the fall-through edges. - if (!InsertAtTop) - for (; End != MF.end(); ++End) { - if (L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - if (!HasFallthrough(prior(End))) break; - } - - // Move the blocks. This may invalidate TopMBB and/or BotMBB, but - // we don't need them anymore at this point. - Splice(MF, InsertPt, Begin, End); - } - - return Changed; -} - -/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -/// This code takes the approach of making minor changes to the existing -/// layout to fix specific loop-oriented problems. Also, it depends on -/// AnalyzeBranch, which can't understand complex control instructions. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - - // Do optimization for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - // Do optimization for this loop. - Changed |= EliminateUnconditionalJumpsToTop(MF, L); - Changed |= MoveDiscontiguousLoopBlocks(MF, L); - - return Changed; -} - -/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { - bool Changed = false; - - if (!TLI->shouldOptimizeCodePlacement()) - return Changed; - - // Do optimization for each loop in the function. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - if (!(*I)->getParentLoop()) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - return Changed; -} - -/// AlignLoops - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { - const Function *F = MF.getFunction(); - if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) - return false; - - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return false; // Don't care about loop alignment. - - bool Changed = false; - - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - return Changed; -} - -/// AlignLoop - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, - unsigned Align) { - bool Changed = false; - - // Do alignment for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - L->getTopBlock()->setAlignment(Align); - Changed = true; - ++NumLoopsAligned; - - return Changed; -} - -bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { - MLI = &getAnalysis<MachineLoopInfo>(); - if (MLI->empty()) - return false; // No loops. - - TLI = MF.getTarget().getTargetLowering(); - TII = MF.getTarget().getInstrInfo(); - - bool Changed = OptimizeIntraLoopEdges(MF); - - Changed |= AlignLoops(MF); - - return Changed; -} diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index fac207e3b3..5447df09cb 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; DEBUG(dbgs() << "If-converting " << *PI.PHI); - assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); @@ -593,6 +592,7 @@ public: EarlyIfConverter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnMachineFunction(MachineFunction &MF); + const char *getPassName() const { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp new file mode 100644 index 0000000000..8a1e2d9c99 --- /dev/null +++ b/lib/CodeGen/ErlangGC.cpp @@ -0,0 +1,81 @@ +//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Erlang/OTP runtime-compatible garbage collector +// (e.g. defines safe points, root initialization etc.) +// +// The frametable emitter is in ErlangGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGC : public GCStrategy { + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + public: + ErlangGC(); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + }; + +} + +static GCRegistry::Add<ErlangGC> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGC() { } + +ErlangGC::ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; + CustomSafePoints = true; +} + +MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + + if (MI->getDesc().isCall()) { + + // Do not treat tail call sites as safe points. + if (MI->getDesc().isTerminator()) + continue; + + /* Code copied from VisitCallPoint(...) */ + MachineBasicBlock::iterator RAI = MI; ++RAI; + MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); + FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); + } + + return false; +} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 9958d7daad..8264d6dbab 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return false; } + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + if (Kind == ICSimpleFalse) if (TII->ReverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); @@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB); } else { PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); @@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { return false; } + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) if (TII->ReverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); @@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); @@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough && + !NextBBI->BB->hasAddressTaken()) { MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { @@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return false; } + if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken()) + // Conservatively abort if-conversion if either BB has its address taken. + return false; + // Put the predicated instructions from the 'true' block before the // instructions from the 'false' block, unless the true block would clobber // the predicate, in which case, do the opposite. @@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // tail, add a unconditional branch to it. if (TailBB) { BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; - bool CanMergeTail = !TailBBI.HasFallThrough; + bool CanMergeTail = !TailBBI.HasFallThrough && + !TailBBI.BB->hasAddressTaken(); // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. unsigned NumPreds = TailBB->pred_size(); @@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, /// i.e., when FromBBI's branch is being moved, add those successor edges to /// ToBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { + assert(!FromBBI.BB->hasAddressTaken() && + "Removing a BB whose address is taken!"); + ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index c6d1a18dbd..35295fe858 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() { Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. - for (unsigned i = RegsToSpill.size(); i != 0; --i) { - unsigned Reg = RegsToSpill[i-1]; - if (!LIS.hasInterval(Reg)) { - RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + unsigned ResultPos = 0; + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + if (!LIS.hasInterval(Reg)) continue; - } + LiveInterval &LI = LIS.getInterval(Reg); - if (!LI.empty()) + if (LI.empty()) { + Edit->eraseVirtReg(Reg); continue; - Edit->eraseVirtReg(Reg); - RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + } + + RegsToSpill[ResultPos++] = Reg; } + RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 07f0ccf52f..d894f664dc 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Just drop the annotation, but forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + case Intrinsic::var_annotation: break; // Strip out annotate intrinsic diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 22b35d5271..f1b8394811 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -972,9 +972,9 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(unsigned Reg) { - SlotIndex LastUse = NewIdx; if (TargetRegisterInfo::isVirtualRegister(Reg)) { + SlotIndex LastUse = NewIdx; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg), UE = MRI.use_nodbg_end(); @@ -984,30 +984,42 @@ private: if (InstSlot > LastUse && InstSlot < OldIdx) LastUse = InstSlot; } - } else { - MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); - MachineBasicBlock::iterator MII(MI); - ++MII; - MachineBasicBlock* MBB = MI->getParent(); - for (; MII != MBB->end(); ++MII){ - if (MII->isDebugValue()) - continue; - if (LIS.getInstructionIndex(MII) < OldIdx) - break; - for (MachineInstr::mop_iterator MOI = MII->operands_begin(), - MOE = MII->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& mop = *MOI; - if (!mop.isReg() || mop.getReg() == 0 || - TargetRegisterInfo::isVirtualRegister(mop.getReg())) - continue; - - if (TRI.hasRegUnit(mop.getReg(), Reg)) - LastUse = LIS.getInstructionIndex(MII); - } - } + return LastUse; + } + + // This is a regunit interval, so scanning the use list could be very + // expensive. Scan upwards from OldIdx instead. + assert(NewIdx < OldIdx && "Expected upwards move"); + SlotIndexes *Indexes = LIS.getSlotIndexes(); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + + // OldIdx may not correspond to an instruction any longer, so set MII to + // point to the next instruction after OldIdx, or MBB->end(). + MachineBasicBlock::iterator MII = MBB->end(); + if (MachineInstr *MI = Indexes->getInstructionFromIndex( + Indexes->getNextNonNullIndex(OldIdx))) + if (MI->getParent() == MBB) + MII = MI; + + MachineBasicBlock::iterator Begin = MBB->begin(); + while (MII != Begin) { + if ((--MII)->isDebugValue()) + continue; + SlotIndex Idx = Indexes->getInstructionIndex(MII); + + // Stop searching when NewIdx is reached. + if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) + return NewIdx; + + // Check if MII uses Reg. + for (MIBundleOperands MO(MII); MO.isValid(); ++MO) + if (MO->isReg() && + TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + TRI.hasRegUnit(MO->getReg(), Reg)) + return Idx; } - return LastUse; + // Didn't reach NewIdx. It must be the first instruction in the block. + return NewIdx; } }; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3b28e6afb6..7793e96c35 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) { + SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 352ef94259..26a117652b 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -46,13 +46,16 @@ namespace { class FrameRef { MachineBasicBlock::iterator MI; // Instr referencing the frame int64_t LocalOffset; // Local offset of the frame idx referenced + int FrameIdx; // The frame index public: - FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : - MI(I), LocalOffset(Offset) {} + FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) : + MI(I), LocalOffset(Offset), FrameIdx(Idx) {} bool operator<(const FrameRef &RHS) const { return LocalOffset < RHS.LocalOffset; } - MachineBasicBlock::iterator getMachineInstr() { return MI; } + MachineBasicBlock::iterator getMachineInstr() const { return MI; } + int64_t getLocalOffset() const { return LocalOffset; } + int getFrameIndex() const { return FrameIdx; } }; class LocalStackSlotPass: public MachineFunctionPass { @@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } static inline bool -lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, - std::pair<unsigned, int64_t> &RegOffset, +lookupCandidateBaseReg(int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, const MachineInstr *MI, const TargetRegisterInfo *TRI) { - unsigned e = Regs.size(); - for (unsigned i = 0; i < e; ++i) { - RegOffset = Regs[i]; - // Check if the relative offset from the where the base register references - // to the target address is in range for the instruction. - int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; - if (TRI->isFrameOffsetLegal(MI, Offset)) - return true; - } - return false; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; + return TRI->isFrameOffsetLegal(MI, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; - // A base register definition is a register + offset pair. - SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; @@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Don't try this with values not in the local block. if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) break; + int Idx = MI->getOperand(i).getIndex(); + int64_t LocalOffset = LocalOffsets[Idx]; + if (!TRI->needsFrameBaseReg(MI, LocalOffset)) + break; FrameReferenceInsns. - push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + push_back(FrameRef(MI, LocalOffset, Idx)); break; } } @@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineBasicBlock *Entry = Fn.begin(); + unsigned BaseReg = 0; + int64_t BaseOffset = 0; + // Loop through the frame references and allocate for them as necessary. for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { - MachineBasicBlock::iterator I = - FrameReferenceInsns[ref].getMachineInstr(); + FrameRef &FR = FrameReferenceInsns[ref]; + MachineBasicBlock::iterator I = FR.getMachineInstr(); MachineInstr *MI = I; - for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { - // Consider replacing all frame index operands that reference - // an object allocated in the local block. - if (MI->getOperand(idx).isFI()) { - int FrameIdx = MI->getOperand(idx).getIndex(); - - assert(MFI->isObjectPreAllocated(FrameIdx) && - "Only pre-allocated locals expected!"); - - DEBUG(dbgs() << "Considering: " << *MI); - if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { - unsigned BaseReg = 0; - int64_t Offset = 0; - int64_t FrameSizeAdjust = - StackGrowsDown ? MFI->getLocalFrameSize() : 0; - - DEBUG(dbgs() << " Replacing FI in: " << *MI); - - // If we have a suitable base register available, use it; otherwise - // create a new one. Note that any offset encoded in the - // instruction itself will be taken into account by the target, - // so we don't have to adjust for it here when reusing a base - // register. - std::pair<unsigned, int64_t> RegOffset; - if (lookupCandidateBaseReg(BaseRegisters, RegOffset, - FrameSizeAdjust, - LocalOffsets[FrameIdx], - MI, TRI)) { - DEBUG(dbgs() << " Reusing base register " << - RegOffset.first << "\n"); - // We found a register to reuse. - BaseReg = RegOffset.first; - Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - - RegOffset.second; - } else { - // No previously defined register was in range, so create a - // new one. - int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); - const MachineFunction *MF = MI->getParent()->getParent(); - const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); - BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - - DEBUG(dbgs() << " Materializing base register " << BaseReg << - " at frame local offset " << - LocalOffsets[FrameIdx] + InstrOffset << "\n"); - - // Tell the target to insert the instruction to initialize - // the base register. - // MachineBasicBlock::iterator InsertionPt = Entry->begin(); - TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, - InstrOffset); - - // The base register already includes any offset specified - // by the instruction, so account for that so it doesn't get - // applied twice. - Offset = -InstrOffset; - - int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + - InstrOffset; - BaseRegisters.push_back( - std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); - ++NumBaseRegisters; - UsedBaseReg = true; - } - assert(BaseReg != 0 && "Unable to allocate virtual base register!"); - - // Modify the instruction to use the new base register rather - // than the frame index operand. - TRI->resolveFrameIndex(I, BaseReg, Offset); - DEBUG(dbgs() << "Resolved: " << *MI); - - ++NumReplacements; - } + int64_t LocalOffset = FR.getLocalOffset(); + int FrameIdx = FR.getFrameIndex(); + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + + unsigned idx = 0; + for (unsigned f = MI->getNumOperands(); idx != f; ++idx) { + if (!MI->getOperand(idx).isFI()) + continue; + + if (FrameIdx == I->getOperand(idx).getIndex()) + break; + } + + assert(idx < MI->getNumOperands() && "Cannot find FI operand"); + + int64_t Offset = 0; + int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, + LocalOffset, MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); + // We found a register to reuse. + Offset = FrameSizeAdjust + LocalOffset - BaseOffset; + } else { + // No previously defined register was in range, so create a // new one. + + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + + int64_t PrevBaseOffset = BaseOffset; + BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; + + // We'd like to avoid creating single-use virtual base registers. + // Because the FrameRefs are in sorted order, and we've already + // processed all FrameRefs before this one, just check whether or not + // the next FrameRef will be able to reuse this new register. If not, + // then don't bother creating it. + bool CanReuse = false; + for (int refn = ref + 1; refn < e; ++refn) { + FrameRef &FRN = FrameReferenceInsns[refn]; + MachineBasicBlock::iterator J = FRN.getMachineInstr(); + MachineInstr *MIN = J; + + CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, + FRN.getLocalOffset(), MIN, TRI); + break; } + + if (!CanReuse) { + BaseOffset = PrevBaseOffset; + continue; + } + + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << LocalOffset + InstrOffset << "\n"); + + // Tell the target to insert the instruction to initialize + // the base register. + // MachineBasicBlock::iterator InsertionPt = Entry->begin(); + TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, + InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + ++NumBaseRegisters; + UsedBaseReg = true; } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; } + return UsedBaseReg; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index fecd1adf2b..71a377df09 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -37,7 +37,7 @@ using namespace llvm; MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false) { + AddressTaken(false), CachedMCSymbol(NULL) { Insts.Parent = this; } @@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() { /// getSymbol - Return the MCSymbol for this basic block. /// MCSymbol *MachineBasicBlock::getSymbol() const { - const MachineFunction *MF = getParent(); - MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); - return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + - Twine(MF->getFunctionNumber()) + "_" + - Twine(getNumber())); + if (!CachedMCSymbol) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); + } + + return CachedMCSymbol; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 3b09c6b779..bfba503b35 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); +static cl::opt<unsigned> AlignAllBlock("align-all-blocks", + cl::desc("Force the alignment of all " + "blocks in the function."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -1061,7 +1067,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // Align this block if the layout predecessor's edge into this block is - // cold relative to the block. When this is true, othe predecessors make up + // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); @@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BlockToChain.clear(); ChainAllocator.DestroyAll(); + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + FI->setAlignment(AlignAllBlock); + // We always return true as we have no way to track whether the final order // differs from the original order. return true; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5e04f2d8a3..04321f3292 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 0ea9ae0fcc..8af9d053b1 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) { if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (InitList == 0) return; + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (const Function *F = diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index a777f52cb2..68372f6c90 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -15,6 +15,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/raw_os_ostream.h" + using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) @@ -37,6 +39,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } @@ -105,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ /// clearVirtRegs - Remove all virtual registers (after physreg assignment). void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && - "Vreg use list non-empty still?"); + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (!VRegInfo[Reg].second) + continue; + verifyUseList(Reg); + llvm_unreachable("Remaining virtual register operands"); + } #endif VRegInfo.clear(); } +void MachineRegisterInfo::verifyUseList(unsigned Reg) const { +#ifndef NDEBUG + bool Valid = true; + for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { + MachineOperand *MO = &I.getOperand(); + MachineInstr *MI = MO->getParent(); + if (!MI) { + errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + << " has no parent instruction.\n"; + Valid = false; + } + MachineOperand *MO0 = &MI->getOperand(0); + unsigned NumOps = MI->getNumOperands(); + if (!(MO >= MO0 && MO < MO0+NumOps)) { + errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + << " doesn't belong to parent MI: " << *MI; + Valid = false; + } + if (!MO->isReg()) { + errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + << " is not a register\n"; + Valid = false; + } + if (MO->getReg() != Reg) { + errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + << *MO << " is the wrong register\n"; + Valid = false; + } + } + assert(Valid && "Invalid use list"); +#endif +} + +void MachineRegisterInfo::verifyUseLists() const { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + verifyUseList(i); +#endif +} + /// Add MO to the linked list of operands for its register. void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { assert(!MO->isOnRegUseList() && "Already on list"); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 589fa1fa02..fff6b2b4c0 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -49,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Experimental heuristics +// FIXME: remove this flag after initial testing. It should always be a good +// thing. +static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, + cl::desc("Constrain vreg copies."), cl::init(true)); + static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -57,6 +63,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); +static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -197,6 +206,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis<LiveIntervals>(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + if (VerifyScheduling) { + DEBUG(LIS->print(dbgs())); + MF->verify(this, "Before machine scheduling."); + } RegClassInfo->runOnMachineFunction(*MF); // Select the scheduler, or set the default. @@ -285,6 +298,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } Scheduler->finalizeSchedule(); DEBUG(LIS->print(dbgs())); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); return true; } @@ -294,7 +309,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << " " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -312,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() { delete SchedImpl; } +bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { + return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} + bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { if (SuccSU != &ExitSU) { // Do not use WillCreateCycle, it assumes SD scheduling. @@ -393,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// This is normally called from the main scheduler loop but may also be invoked +/// by the scheduling strategy to perform additional code motion. void ScheduleDAGMI::moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. @@ -494,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { if ((int)NewMaxPressure[ID] > MaxUnits) MaxUnits = NewMaxPressure[ID]; } + DEBUG( + for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (NewMaxPressure[i] > Limit ) { + dbgs() << " " << TRI->getRegPressureSetName(i) << ": " + << NewMaxPressure[i] << " > " << Limit << "\n"; + } + }); } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -894,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// +// CopyConstrain - DAG post-processing to encourage copy elimination. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// the one use that defines the copy's source vreg, most likely an induction +/// variable increment. +class CopyConstrain : public ScheduleDAGMutation { + // Transient state. + SlotIndex RegionBeginIdx; + // RegionEndIdx is the slot index of the last non-debug instruction in the + // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. + SlotIndex RegionEndIdx; +public: + CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} + + virtual void apply(ScheduleDAGMI *DAG); + +protected: + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); +}; +} // anonymous + +/// constrainLocalCopy handles two possibilities: +/// 1) Local src: +/// I0: = dst +/// I1: src = ... +/// I2: = dst +/// I3: dst = src (copy) +/// (create pred->succ edges I0->I1, I2->I1) +/// +/// 2) Local copy: +/// I0: dst = src (copy) +/// I1: = dst +/// I2: src = ... +/// I3: = dst +/// (create pred->succ edges I1->I2, I3->I2) +/// +/// Although the MachineScheduler is currently constrained to single blocks, +/// this algorithm should handle extended blocks. An EBB is a set of +/// contiguously numbered blocks such that the previous block in the EBB is +/// always the single predecessor. +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { + LiveIntervals *LIS = DAG->getLIS(); + MachineInstr *Copy = CopySU->getInstr(); + + // Check for pure vreg copies. + unsigned SrcReg = Copy->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return; + + unsigned DstReg = Copy->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return; + + // Check if either the dest or source is local. If it's live across a back + // edge, it's not local. Note that if both vregs are live across the back + // edge, we cannot successfully contrain the copy without cyclic scheduling. + unsigned LocalReg = DstReg; + unsigned GlobalReg = SrcReg; + LiveInterval *LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { + LocalReg = SrcReg; + GlobalReg = DstReg; + LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) + return; + } + LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); + + // Find the global segment after the start of the local LI. + LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); + // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a + // local live range. We could create edges from other global uses to the local + // start, but the coalescer should have already eliminated these cases, so + // don't bother dealing with it. + if (GlobalSegment == GlobalLI->end()) + return; + + // If GlobalSegment is killed at the LocalLI->start, the call to find() + // returned the next global segment. But if GlobalSegment overlaps with + // LocalLI->start, then advance to the next segement. If a hole in GlobalLI + // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. + if (GlobalSegment->contains(LocalLI->beginIndex())) + ++GlobalSegment; + + if (GlobalSegment == GlobalLI->end()) + return; + + // Check if GlobalLI contains a hole in the vicinity of LocalLI. + if (GlobalSegment != GlobalLI->begin()) { + // Two address defs have no hole. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + GlobalSegment->start)) { + return; + } + // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise + // it would be a disconnected component in the live range. + assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + "Disconnected LRG within the scheduling region."); + } + MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); + if (!GlobalDef) + return; + + SUnit *GlobalSU = DAG->getSUnit(GlobalDef); + if (!GlobalSU) + return; + + // GlobalDef is the bottom of the GlobalLI hole. Open the hole by + // constraining the uses of the last local def to precede GlobalDef. + SmallVector<SUnit*,8> LocalUses; + const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); + MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); + SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); + for (SUnit::const_succ_iterator + I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + continue; + if (I->getSUnit() == GlobalSU) + continue; + if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + return; + LocalUses.push_back(I->getSUnit()); + } + // Open the top of the GlobalLI hole by constraining any earlier global uses + // to precede the start of LocalLI. + SmallVector<SUnit*,8> GlobalUses; + MachineInstr *FirstLocalDef = + LIS->getInstructionFromIndex(LocalLI->beginIndex()); + SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); + for (SUnit::const_pred_iterator + I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { + if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + continue; + if (I->getSUnit() == FirstLocalSU) + continue; + if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + return; + GlobalUses.push_back(I->getSUnit()); + } + DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); + // Add the weak edges. + for (SmallVectorImpl<SUnit*>::const_iterator + I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + << GlobalSU->NodeNum << ")\n"); + DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); + } + for (SmallVectorImpl<SUnit*>::const_iterator + I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + << FirstLocalSU->NodeNum << ")\n"); + DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); + } +} + +/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// copy elimination. +void CopyConstrain::apply(ScheduleDAGMI *DAG) { + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); + if (FirstPos == DAG->end()) + return; + RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); + RegionEndIdx = DAG->getLIS()->getInstructionIndex( + &*priorNonDebug(DAG->end(), DAG->begin())); + + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->isCopy()) + continue; + + constrainLocalCopy(SU, DAG); + } +} + +//===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -905,7 +1112,7 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, SingleExcess, SingleCritical, Cluster, + NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, NodeOrder}; @@ -1180,8 +1387,10 @@ protected: const RegPressureTracker &RPTracker, SchedCandidate &Candidate); + void reschedulePhysRegCopies(SUnit *SU, bool isTop); + #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); + void traceCandidate(const SchedCandidate &Cand); #endif }; } // namespace @@ -1232,8 +1441,6 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); - DAG->computeDFSResult(); - // Initialize resource counts. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or @@ -1330,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); + DEBUG(dbgs() << " " << Available.getName() + << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); if (L > RemLatency) RemLatency = L; } @@ -1340,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { RemLatency = L; } unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + DEBUG(dbgs() << " " << Available.getName() + << " ExpectedLatency " << ExpectedLatency + << " CP Limit " << CriticalPathLimit << '\n'); if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); + DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); } } @@ -1392,8 +1604,8 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { CheckPending = true; IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - DEBUG(dbgs() << " *** " << Available.getName() << " cycle " - << CurrCycle << '\n'); + DEBUG(dbgs() << " " << Available.getName() + << " Cycle: " << CurrCycle << '\n'); } /// Add the given processor resource to this scheduled zone. @@ -1560,7 +1772,8 @@ void ConvergingScheduler::balanceZones( if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() + << " reduce " << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name << '\n'); } @@ -1571,7 +1784,8 @@ void ConvergingScheduler::balanceZones( if ((int)(OppositeZone.ExpectedCount - OppositeCount) > (int)SchedModel->getLatencyFactor()) { OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() + << " demand " << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name << '\n'); } @@ -1595,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits( if (Top.CritResIdx != Rem.CritResIdx) { TopCand.Policy.ReduceResIdx = Top.CritResIdx; BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << "Reduce scheduled " + DEBUG(dbgs() << " Reduce scheduled " << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); } return; @@ -1612,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits( && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { TopCand.Policy.ReduceLatency = true; BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency << " + " << Bot.ExpectedLatency << '\n'); } return; @@ -1651,7 +1865,7 @@ initResourceDelta(const ScheduleDAGMI *DAG, } /// Return true if this heuristic determines order. -static bool tryLess(unsigned TryVal, unsigned CandVal, +static bool tryLess(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { @@ -1667,7 +1881,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal, return false; } -static bool tryGreater(unsigned TryVal, unsigned CandVal, +static bool tryGreater(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { @@ -1687,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } +/// Minimize physical register live ranges. Regalloc wants them adjacent to +/// their physreg def/use. +/// +/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf +/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled +/// with the operation that produces or consumes the physreg. We'll do this when +/// regalloc has support for parallel copies. +static int biasPhysRegCopy(const SUnit *SU, bool isTop) { + const MachineInstr *MI = SU->getInstr(); + if (!MI->isCopy()) + return 0; + + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + return 0; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1714,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand.Reason = NodeOrder; return; } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), + biasPhysRegCopy(Cand.SU, Zone.isTop()), + TryCand, Cand, PhysRegCopy)) + return; + // Avoid exceeding the target's limit. if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) @@ -1740,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, TryCand, Cand, Cluster)) return; - // Currently, weak edges are for clustering, so we hard-code that reason. - // However, deferring the current TryCand will not change Cand's reason. + + // Weak edges are for clustering and other constraints. + // + // Deferring TryCand here does not change Cand's reason. This is good in the + // sense that a bad candidate shouldn't affect a previous candidate's + // goodness, but bad in that it is assymetric and depends on queue order. CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), - TryCand, Cand, Cluster)) { + TryCand, Cand, Weak)) { Cand.Reason = OrigReason; return; } @@ -1816,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS, // Avoid increasing the max critical pressure in the scheduled region. if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << "RP excess top - bot: " + DEBUG(dbgs() << " RP excess top - bot: " << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; } // Avoid increasing the max critical pressure in the scheduled region. if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << "RP critical top - bot: " + DEBUG(dbgs() << " RP critical top - bot: " << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) << '\n'); return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; } // Avoid increasing the max pressure of the entire region. if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << "RP current top - bot: " + DEBUG(dbgs() << " RP current top - bot: " << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) << '\n'); return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; @@ -1842,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr( ConvergingScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; case SingleExcess: return "REG-EXCESS"; case SingleCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; case SingleMax: return "REG-MAX "; case MultiPressure: return "REG-MULTI "; case ResourceReduce: return "RES-REDUCE"; @@ -1859,9 +2113,7 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, - const SchedBoundary &Zone) { - const char *Label = getReasonStr(Cand.Reason); +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { PressureElement P; unsigned ResIdx = 0; unsigned Latency = 0; @@ -1896,21 +2148,21 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, Latency = Cand.SU->getDepth(); break; } - dbgs() << Label << " " << Zone.Available.getName() << " "; + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease - << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) + << ":" << P.UnitIncrease << " "; else - dbgs() << " "; + dbgs() << " "; if (ResIdx) - dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; else - dbgs() << " "; + dbgs() << " "; if (Latency) - dbgs() << Latency << " cycles "; + dbgs() << " " << Latency << " cycles "; else - dbgs() << " "; - Cand.SU->dump(DAG); + dbgs() << " "; + dbgs() << '\n'; } #endif @@ -1939,15 +2191,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); Cand.setBest(TryCand); - DEBUG(traceCandidate(Cand, Zone)); + DEBUG(traceCandidate(Cand)); } } } static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") - << " SU(" << Cand.SU->NodeNum << ") " + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } @@ -1957,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } CandPolicy NoPolicy; @@ -2058,24 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling Instruction in cycle " - << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; - SU->dump(DAG)); + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); return SU; } +void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { + + MachineBasicBlock::iterator InsertPos = SU->getInstr(); + if (!isTop) + ++InsertPos; + SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; + + // Find already scheduled copies with a single physreg dependence and move + // them just above the scheduled instruction. + for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + continue; + SUnit *DepSU = I->getSUnit(); + if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) + continue; + MachineInstr *Copy = DepSU->getInstr(); + if (!Copy->isCopy()) + continue; + DEBUG(dbgs() << " Rescheduling physreg copy "; + I->getSUnit()->dump(DAG)); + DAG->moveInstruction(Copy, InsertPos); + } +} + /// Update the scheduler's state after scheduling a node. This is the same node /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update /// it's state based on the current cycle before MachineSchedStrategy does. +/// +/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling +/// them here. See comments in biasPhysRegCopy. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = Top.CurrCycle; Top.bumpNode(SU); + if (SU->hasPhysRegUses) + reschedulePhysRegCopies(SU, true); } else { SU->BotReadyCycle = Bot.CurrCycle; Bot.bumpNode(SU); + if (SU->hasPhysRegDefs) + reschedulePhysRegCopies(SU, false); } } @@ -2086,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { "-misched-topdown incompatible with -misched-bottomup"); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + if (EnableCopyConstrain) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); if (EnableLoadCluster) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) @@ -2171,16 +2459,16 @@ public: /// Callback to select the highest priority node from the ready Q. virtual SUnit *pickNode(bool &IsTopNode) { if (ReadyQ.empty()) return NULL; - pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; - DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " - << *SU->getInstr() + DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " << " ILP: " << DAG->getDFSResult()->getILP(SU) << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" << DAG->getDFSResult()->getSubtreeLevel( - DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); + DAG->getDFSResult()->getSubtreeID(SU)) << '\n' + << "Scheduling " << *SU->getInstr()); return SU; } diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f77a7b17ed..00f702c846 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF->getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); return false; } @@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { return FBI; // Compute resource usage in the block. - // FIXME: Compute per-functional unit counts. FBI->HasCalls = false; unsigned InstrCount = 0; + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector<unsigned, 32> PRCycles(PRKinds); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { const MachineInstr *MI = I; @@ -96,11 +103,43 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { ++InstrCount; if (MI->isCall()) FBI->HasCalls = true; + + // Count processor resources used. + if (!SchedModel.hasInstrSchedModel()) + continue; + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } } FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + return FBI; } +ArrayRef<unsigned> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); + return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds, + PRKinds); +} + + //===----------------------------------------------------------------------===// // Ensemble utility functions //===----------------------------------------------------------------------===// @@ -108,6 +147,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) : MTM(*ct) { BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); } // Virtual destructor serves as an anchor. @@ -123,21 +165,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { void MachineTraceMetrics::Ensemble:: computeDepthResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources from trace above. The top block is simple. if (!TBI->Pred) { TBI->InstrDepth = 0; TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); return; } // Compute from the block above. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; } // Update resource-related information in the TraceBlockInfo for MBB. @@ -145,22 +198,33 @@ computeDepthResources(const MachineBasicBlock *MBB) { void MachineTraceMetrics::Ensemble:: computeHeightResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources for the current block. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); // The trace tail is done. if (!TBI->Succ) { TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); return; } // Compute from the block below. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); TBI->InstrHeight += SuccTBI->InstrHeight; TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; } // Check if depth resources for MBB are valid and return the TBI. @@ -181,6 +245,35 @@ getHeightResources(const MachineBasicBlock *MBB) const { return TBI->hasValidHeight() ? TBI : 0; } +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); + return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds, + PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); + return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds, + PRKinds); +} + //===----------------------------------------------------------------------===// // Trace Selection Strategies //===----------------------------------------------------------------------===// @@ -677,7 +770,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.isEarlierInSameTrace(TBI)) + if (!DefTBI.isUsefulDominator(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -713,11 +806,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) { SmallVector<DataDep, 8> Deps; while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Also compute the critical path length through MBB when possible. if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); @@ -740,7 +846,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.isEarlierInSameTrace(TBI)) + if (!DepTBI.isUsefulDominator(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; @@ -928,6 +1034,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) { TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Get dependencies from PHIs in the trace successor. const MachineBasicBlock *Succ = TBI.Succ; // If MBB is the last block in the trace, and it has a back-edge to the @@ -1058,27 +1176,66 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { } unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { - // For now, we compute the resource depth from instruction count / issue - // width. Eventually, we should compute resource depth per functional unit - // and return the max. + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth; if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } + unsigned MachineTraceMetrics::Trace:: -getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks, + ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const { + // Add up resources above and below the center block. + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + for (unsigned I = 0; I != ExtraInstrs.size(); ++I) { + const MCSchedClassDesc* SC = ExtraInstrs[I]; + if (!SC->isValid()) + continue; + for (TargetSchedModel::ProcResIter + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx != K) + continue; + PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K)); + } + } + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 4b1230029a..037043f641 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() { if (MInfo.Succs.size() != I->succ_size()) report("MBB has duplicate entries in its successor list.", I); } + + // Check that the register use lists are sane. + MRI->verifyUseLists(); } // Does iterator point to a and b as the first two elements? diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index b79f9f9816..bfbc0623f9 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" @@ -40,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); static cl::opt<bool> DisableBlockPlacement("disable-block-placement", - cl::Hidden, cl::desc("Disable the probability-driven block placement, and " - "re-enable the old code placement pass")); + cl::Hidden, cl::desc("Disable probability-driven block placement")); static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); -static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, - cl::desc("Disable code placement")); static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, @@ -97,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. /// These should be converted to boolOrDefault in order to use applyOverride. -static AnalysisID applyDisable(AnalysisID PassID, bool Override) { +static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, + bool Override) { if (Override) - return 0; + return IdentifyingPassPtr(); return PassID; } @@ -107,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) { /// flags with ternary conditions. TargetID is passed through by default. The /// pass is suppressed when the option is false. When the option is true, the /// StandardID is selected if the target provides no default. -static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, - AnalysisID StandardID) { +static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, + cl::boolOrDefault Override, + AnalysisID StandardID) { switch (Override) { case cl::BOU_UNSET: return TargetID; case cl::BOU_TRUE: - if (TargetID) + if (TargetID.isValid()) return TargetID; if (StandardID == 0) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: - return 0; + return IdentifyingPassPtr(); } llvm_unreachable("Invalid command line option state"); } @@ -136,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, /// StandardID may be a pseudo ID. In that case TargetID is the name of the real /// pass to run. This allows multiple options to control a single pass depending /// on where in the pipeline that pass is added. -static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { +static IdentifyingPassPtr overridePass(AnalysisID StandardID, + IdentifyingPassPtr TargetID) { if (StandardID == &PostRASchedulerID) return applyDisable(TargetID, DisablePostRA); @@ -150,10 +149,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) - return applyDisable(TargetID, DisableCodePlace); - - if (StandardID == &CodePlacementOptID) - return applyDisable(TargetID, DisableCodePlace); + return applyDisable(TargetID, DisableBlockPlacement); if (StandardID == &StackSlotColoringID) return applyDisable(TargetID, DisableSSC); @@ -207,11 +203,11 @@ public: // user interface. For example, a target may disable a standard pass by // default by substituting a pass ID of zero, and the user may still enable // that standard pass with an explicit command line option. - DenseMap<AnalysisID,AnalysisID> TargetPasses; + DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses; /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass /// is inserted after each instance of the first one. - SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses; + SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses; }; } // namespace llvm @@ -246,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, - AnalysisID InsertedPassID) { - assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); - std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID); + IdentifyingPassPtr InsertedPassID) { + assert(((!InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getID()) || + (InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getInstance()->getPassID())) && + "Insert a pass after itself!"); + std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID); Impl->InsertedPasses.push_back(P); } @@ -272,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) { } void TargetPassConfig::substitutePass(AnalysisID StandardID, - AnalysisID TargetID) { + IdentifyingPassPtr TargetID) { Impl->TargetPasses[StandardID] = TargetID; } -AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { - DenseMap<AnalysisID, AnalysisID>::const_iterator +IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator I = Impl->TargetPasses.find(ID); if (I == Impl->TargetPasses.end()) return ID; @@ -310,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) { /// Add a CodeGen pass at this point in the pipeline after checking for target /// and command line overrides. +/// +/// addPass cannot return a pointer to the pass instance because is internal the +/// PassManager and the instance we create here may already be freed. AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { - AnalysisID TargetID = getPassSubstitution(PassID); - AnalysisID FinalID = overridePass(PassID, TargetID); - if (FinalID == 0) - return FinalID; - - Pass *P = Pass::createPass(FinalID); - if (!P) - llvm_unreachable("Pass ID not registered"); - addPass(P); + IdentifyingPassPtr TargetID = getPassSubstitution(PassID); + IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); + if (!FinalPtr.isValid()) + return 0; + + Pass *P; + if (FinalPtr.isInstance()) + P = FinalPtr.getInstance(); + else { + P = Pass::createPass(FinalPtr.getID()); + if (!P) + llvm_unreachable("Pass ID not registered"); + } + AnalysisID FinalID = P->getPassID(); + addPass(P); // Ends the lifetime of P. + // Add the passes after the pass P if there is any. - for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator + for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { - assert((*I).second && "Illegal Pass ID!"); - Pass *NP = Pass::createPass((*I).second); - assert(NP && "Pass ID not registered"); + assert((*I).second.isValid() && "Illegal Pass ID!"); + Pass *NP; + if ((*I).second.isInstance()) + NP = (*I).second.getInstance(); + else { + NP = Pass::createPass((*I).second.getID()); + assert(NP && "Pass ID not registered"); + } addPass(NP); } } @@ -694,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&VirtRegRewriterID); printAndVerify("After Virtual Register Rewriter"); - // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, - // but eventually, all users of it should probably be moved to addPostRA and - // it can go away. Currently, it's the intended place for targets to run - // FinalizeMachineBundles, because passes other than MachineScheduling an - // RegAlloc itself may not be aware of bundles. - if (addFinalizeRegAlloc()) - printAndVerify("After RegAlloc finalization"); - // Perform stack slot coloring and post-ra machine LICM. // // FIXME: Re-enable coloring with register when it's capable of adding @@ -743,16 +750,7 @@ bool TargetPassConfig::addGCPasses() { /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID PassID = 0; - if (!DisableBlockPlacement) { - // MachineBlockPlacement is a new pass which subsumes the functionality of - // CodPlacementOpt. The old code placement pass can be restored by - // disabling block placement, but eventually it will be removed. - PassID = addPass(&MachineBlockPlacementID); - } else { - PassID = addPass(&CodePlacementOptID); - } - if (PassID) { + if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 7ae43ef57e..337b9790a5 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -39,7 +39,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <climits> @@ -57,7 +56,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", false, false) -STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -103,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); @@ -552,9 +550,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } // FIXME: Once this is working, then enable flag will change to a target @@ -597,7 +597,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -619,7 +619,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -635,9 +635,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // stack pointer. if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || !RegInfo->useFPForScavengingIndex(Fn))) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -820,14 +822,28 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - unsigned VirtReg = 0; - unsigned ScratchReg = 0; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(NULL)) + I = BB->begin(); + MachineInstr *MI = I; + MachineBasicBlock::iterator J = llvm::next(I); + MachineBasicBlock::iterator P = I == BB->begin() ? + MachineBasicBlock::iterator(NULL) : llvm::prior(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -837,29 +853,47 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - ++NumVirtualFrameRegs; - - // Have we already allocated a scratch register for this virtual? - if (Reg != VirtReg) { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // Scavenge a new scratch register - VirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - ScratchReg = RS->scavengeRegister(RC, I, SPAdj); - ++NumScavengedRegs; - } + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(ScratchReg); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setUsed(ScratchReg); } } - RS->forward(I); - ++I; + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != llvm::prior(J)) { + BB->splice(J, BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + } else + ++I; } } } diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 3053119f4d..7fcfe9e88b 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cstdlib> #include <queue> @@ -64,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase MachineFunction *MF; // state - std::auto_ptr<Spiller> SpillerInstance; + OwningPtr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, CompSpillWeight> Queue; diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6344a736ab..9eed1fc62a 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -41,7 +41,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" #include <queue> using namespace llvm; @@ -79,7 +78,7 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - std::auto_ptr<Spiller> SpillerInstance; + OwningPtr<Spiller> SpillerInstance; std::priority_queue<std::pair<unsigned, unsigned> > Queue; unsigned NextCascade; @@ -167,8 +166,8 @@ class RAGreedy : public MachineFunctionPass, }; // splitting state. - std::auto_ptr<SplitAnalysis> SA; - std::auto_ptr<SplitEditor> SE; + OwningPtr<SplitAnalysis> SA; + OwningPtr<SplitEditor> SE; /// Cached per-block interference maps InterferenceCache IntfCache; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 607edac24b..15a88e224f 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -89,8 +90,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0) - : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { + RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); @@ -121,7 +122,7 @@ private: typedef std::set<unsigned> RegSet; - std::auto_ptr<PBQPBuilder> builder; + OwningPtr<PBQPBuilder> builder; char *customPassID; @@ -132,7 +133,7 @@ private: const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; - std::auto_ptr<Spiller> spiller; + OwningPtr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { return allowedSet[option - 1]; } -std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, - const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - const RegSet &vregs) { +PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); - std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + OwningPtr<PBQPRAProblem> p(new PBQPRAProblem()); PBQP::Graph &g = p->getGraph(); RegSet pregs; @@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } } - return p; + return p.take(); } void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, @@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts( } } -std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( - MachineFunction *mf, +PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs); + OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( } } - return p; + return p.take(); } void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, @@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - std::auto_ptr<PBQPRAProblem> problem = - builder->build(mf, lis, loopInfo, vregsToAlloc); + OwningPtr<PBQPRAProblem> problem( + builder->build(mf, lis, loopInfo, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { @@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { } FunctionPass* llvm::createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder> builder, + OwningPtr<PBQPBuilder> &builder, char *customPassID) { return new RegAllocPBQP(builder, customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - if (pbqpCoalescing) { - return createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing())); - } // else - return createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder>(new PBQPBuilder())); + OwningPtr<PBQPBuilder> Builder; + if (pbqpCoalescing) + Builder.reset(new PBQPBuilderWithCoalescing()); + else + Builder.reset(new PBQPBuilder()); + return createPBQPRegisterAllocator(Builder); } #undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index e2488adcdc..d85646dd3c 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 6da901f81d..f82ccbe84d 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -45,9 +45,11 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = NULL; + } // All registers started out unused. RegsAvailable.set(); @@ -108,27 +110,11 @@ void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(*SubRegs); } -void RegScavenger::forward() { - // Move ptr forward. - if (!Tracking) { - MBBI = MBB->begin(); - Tracking = true; - } else { - assert(MBBI != MBB->end() && "Already past the end of the basic block!"); - MBBI = llvm::next(MBBI); - } - assert(MBBI != MBB->end() && "Already at the end of the basic block!"); +void RegScavenger::determineKillsAndDefs() { + assert(Tracking && "Must be tracking to determine kills and defs"); MachineInstr *MI = MBBI; - - if (MI == ScavengeRestore) { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; - } - - if (MI->isDebugValue()) - return; + assert(!MI->isDebugValue() && "Debug values have no kills or defs"); // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. @@ -145,7 +131,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -162,6 +148,53 @@ void RegScavenger::forward() { addRegWithSubRegs(DefRegs, Reg); } } +} + +void RegScavenger::unprocess() { + assert(Tracking && "Cannot unprocess because we're not tracking"); + + MachineInstr *MI = MBBI; + if (!MI->isDebugValue()) { + determineKillsAndDefs(); + + // Commit the changes. + setUsed(KillRegs); + setUnused(DefRegs); + } + + if (MBBI == MBB->begin()) { + MBBI = MachineBasicBlock::iterator(NULL); + Tracking = false; + } else + --MBBI; +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); + MBBI = llvm::next(MBBI); + } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + + MachineInstr *MI = MBBI; + + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + if (I->Restore != MI) + continue; + + I->Reg = 0; + I->Restore = NULL; + } + + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); // Verify uses and defs. #ifndef NDEBUG @@ -170,7 +203,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -360,37 +393,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } - assert(ScavengedReg == 0 && - "Scavenger slot is live, unable to scavenge another register!"); + // Find an available scavenging slot. + unsigned SI; + for (SI = 0; SI < Scavenged.size(); ++SI) + if (Scavenged[SI].Reg == 0) + break; + + if (SI == Scavenged.size()) { + // We need to scavenge a register but have no spill slot, the target + // must know how to do it (if not, we'll assert below). + Scavenged.push_back(ScavengedInfo()); + } // Avoid infinite regress - ScavengedReg = SReg; + Scavenged[SI].Reg = SReg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. - assert(ScavengingFrameIndex >= 0 && + assert(Scavenged[SI].FrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); MachineBasicBlock::iterator II = prior(I); unsigned FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, + RC, TRI); II = prior(UseMI); FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - ScavengeRestore = prior(UseMI); + Scavenged[SI].Restore = prior(UseMI); // Doing this here leads to infinite regress. - // ScavengedReg = SReg; - ScavengedRC = RC; + // Scavenged[SI].Reg = SReg; DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 45b4f68570..07e5b470fb 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -329,8 +329,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; - dbgs() << " Depth : " << Depth << "\n"; - dbgs() << " Height : " << Height << "\n"; + dbgs() << " Depth : " << getDepth() << "\n"; + dbgs() << " Height : " << getHeight() << "\n"; if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; @@ -367,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 71e7a21ef2..e4da6a41ee 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); else { + // Set the hasPhysRegDefs only for physreg defs that have a use within + // the scheduling region. + SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); Dep.setMinLatency( @@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } if (!MO.isDef()) { + SU->hasPhysRegUses = true; // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ec52d7e906..2e09ec08fd 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -205,6 +205,7 @@ namespace { SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); @@ -243,7 +244,6 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); - SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); @@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); - case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + EVT VT = LHS.getValueType(); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. - if (VT.isVector() && !LegalOperations) { + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(true) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) @@ -4496,8 +4538,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, DAG.getSetCC(N->getDebugLoc(), TLI.getSetCCResultType(VT), @@ -5835,14 +5877,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // No FP constant should be created after legalization as Instruction + // Selection pass has hard time in dealing with FP constant. + // + // We don't need test this condition for transformation like following, as + // the DAG being transformed implies it is legal to take FP constant as + // operand. + // + // (fadd (fmul c, x), x) -> (fmul c+1, x) + // + bool AllowNewFpConst = (Level < AfterLegalizeDAG); + // If allow, fold (fadd (fneg x), x) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { return DAG.getConstantFP(0.0, VT); } // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { return DAG.getConstantFP(0.0, VT); } @@ -5944,7 +5997,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N0.getOpcode() == ISD::FADD) { + if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul 3.0, x) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && @@ -5954,7 +6007,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N1.getOpcode() == ISD::FADD) { + if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul 3.0, x) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && @@ -5965,7 +6018,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) - if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + if (AllowNewFpConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -6709,7 +6763,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && - TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); @@ -6810,9 +6865,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { MVT::Other, Chain, Tmp, N2); } - // visitXOR has changed XOR's operands. - Op0 = TheXor->getOperand(0); - Op1 = TheXor->getOperand(1); + // visitXOR has changed XOR's operands or replaced the XOR completely, + // bail out. + return SDValue(N, 0); } } @@ -7097,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && "Expected BasePtr operand"); - APInt OV = - cast<ConstantSDNode>(Offset)->getAPIntValue(); - if (AM == ISD::PRE_DEC) - OV = -OV; + // We need to replace ptr0 in the following expression: + // x0 * offset0 + y0 * ptr0 = t0 + // knowing that + // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) + // + // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the + // indexed load/store and the expresion that needs to be re-written. + // + // Therefore, we have: + // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); - APInt CNV = CN->getAPIntValue(); - if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) - CNV += OV; - else - CNV -= OV; + int X0, X1, Y0, Y1; + APInt Offset0 = CN->getAPIntValue(); + APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); - SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); - SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); - if (OffsetIdx == 0) - std::swap(NewOp1, NewOp2); + X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; - SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; + + APInt CNV = Offset0; + if (X0 < 0) CNV = -CNV; + if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; + else CNV = CNV - Offset1; + + // We can now generate the new expression. + SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + + SDValue NewUse = DAG.getNode(Opcode, OtherUses[i]->getDebugLoc(), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); @@ -7698,16 +7768,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -/// Returns the base pointer and an integer offset from that object. -static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { - if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - SDValue Base = Ptr->getOperand(0); - return std::make_pair(Base, Offset); +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; } - return std::make_pair(Ptr, 0); -} + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // Just Base or possibly anything else. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Base + offset. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. @@ -7754,16 +7890,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; - // This holds the base pointer and the offset in bytes from the base pointer. - std::pair<SDValue, int64_t> BasePtr = - GetPointerBaseAndOffset(St->getBasePtr()); + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.first.getNode()) + if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.first.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.getOpcode() == ISD::UNDEF) return false; // Save the LoadSDNodes that we find in the chain. @@ -7785,11 +7921,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - std::pair<SDValue, int64_t> Ptr = - GetPointerBaseAndOffset(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (Ptr.first.getNode() != BasePtr.first.getNode()) + if (!Ptr.equalBaseIndex(BasePtr)) break; // Check that the alignment is the same. @@ -7815,7 +7950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); // Find the next memory operand in the chain. If the next operand in the // chain is a store then move up and continue the scan with the next @@ -7902,6 +8037,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); if (TLI.isTypeLegal(StoreTy)) LastLegalType = i+1; + // Or check whether a truncstore is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) + LastLegalType = i+1; + } // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); @@ -8012,7 +8155,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. - SDValue LdBasePtr; + BaseIndexOffset LdBasePtr; for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); @@ -8038,21 +8181,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - std::pair<SDValue, int64_t> LdPtr = - GetPointerBaseAndOffset(Ld->getBasePtr()); - + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.getNode()) { + if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. - if (LdPtr.first != LdBasePtr) + if (!LdPtr.equalBaseIndex(LdBasePtr)) break; } else { // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr.first; + LdBasePtr = LdPtr; } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); } if (LoadNodes.size() < 2) @@ -8087,6 +8228,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); if (TLI.isTypeLegal(StoreTy)) LastLegalIntegerType = i + 1; + // Or check whether a truncstore and extload is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + LastLegalIntegerType = i+1; + } } // Only use vector types if the vector type is larger than the integer type. @@ -8970,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(N->getValueType(0)); + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() == ISD::UNDEF) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + + // Check the source type is the same as the type of the result. + // If not, this concat may extend the vector, so we can not + // optimize it away. + if (SingleSource.getValueType() != N->getValueType(0)) + return SDValue(); + } + + unsigned IdentityIndex = i * PartNumElem; + ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + if (CS->getZExtValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + return SDValue(); } @@ -8977,12 +9174,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + DebugLoc dl = N->getDebugLoc(); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); EVT SmallVT = V->getOperand(1).getValueType(); - if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); // Only handle cases where both indexes are constants with the same type. @@ -8995,31 +9212,57 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: - // indices are equal => V1 + // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); } } - if (V->getOpcode() == ISD::CONCAT_VECTORS) { - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. - if (V->getOperand(0).getValueType() != NVT) + return SDValue(); +} + +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + SmallVector<SDValue, 4> Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + // Make sure we're dealing with a copy. + unsigned Begin = I * NumElemsPerConcat; + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - unsigned NumElems = NVT.getVectorNumElements(); - assert((Idx % NumElems) == 0 && - "IDX in concat is not a multiple of the result vector length."); - return V->getOperand(Idx / NumElems); + + for (unsigned J = 1; J != NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + } + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); } - return SDValue(); + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + Ops.size()); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { @@ -9123,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.getOpcode() == ISD::UNDEF || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + SDValue V = partitionShuffleOfConcats(N, DAG); + + if (V.getNode()) + return V; + } + // If this shuffle node is simply a swizzle of another shuffle node, // and it reverses the swizzle of the previous shuffle then we can // optimize shuffle(shuffle(x, undef), undef) -> x. @@ -9159,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { - if (!TLI.getShouldFoldAtomicFences()) - return SDValue(); - - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)), atomic.getResNo()); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)), - atomic.getResNo()); - default: - return SDValue(); - } -} - /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 04f5b32e04..288499ac6f 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -696,6 +696,13 @@ bool FastISel::SelectCall(const User *I) { UpdateValueMap(Call, ResultReg); return true; } + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; + } } // Usually, it does not make sense to initialize a value, @@ -1176,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + assert (MaterialReg != 0 && "Unable to materialize imm."); + if (MaterialReg == 0) return 0; } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1496,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return true; } + +bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { + assert(LI->hasOneUse() && + "tryToFoldLoad expected a LoadInst with a single use"); + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) + return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = getRegForValue(LI); + if (LoadReg == 0) + return false; + + // We can't fold if this vreg has no uses or more than one use. Multiple uses + // may mean that the instruction got lowered to multiple MIs, or the use of + // the loaded value ended up being multiple operands of the result. + if (!MRI.hasOneUse(LoadReg)) + return false; + + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes; make + // sure they get inserted in a logical place before the new instruction. + FuncInfo.InsertPt = User; + FuncInfo.MBB = User->getParent(); + + // Ask the target to try folding the load. + return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); +} + + diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f085e444b6..2a1d8c2819 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: - case ISD::MEMBARRIER: { + case ISD::ATOMIC_FENCE: { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; @@ -3632,8 +3631,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(VT.isVector())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + Tmp3); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 1ee21921b4..de217d8571 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { return VT == MVT::f32 ? Call_F32 : VT == MVT::f64 ? Call_F64 : VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : VT == MVT::ppcf128 ? Call_PPCF128 : RTLIB::UNKNOWN_LIBCALL; } @@ -156,6 +158,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, + RTLIB::ADD_F128, RTLIB::ADD_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -167,6 +170,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, + RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -220,6 +224,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, + RTLIB::COS_F128, RTLIB::COS_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -232,6 +237,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, + RTLIB::DIV_F128, RTLIB::DIV_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -243,6 +249,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, + RTLIB::EXP_F128, RTLIB::EXP_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -254,6 +261,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, + RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -265,6 +273,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -276,6 +285,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, + RTLIB::LOG_F128, RTLIB::LOG_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -287,6 +297,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, + RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -298,6 +309,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, + RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -311,6 +323,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, + RTLIB::FMA_F128, RTLIB::FMA_PPCF128), NVT, Ops, 3, false, N->getDebugLoc()); } @@ -323,6 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, + RTLIB::MUL_F128, RTLIB::MUL_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -334,6 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -347,6 +362,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -384,6 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, + RTLIB::POW_F128, RTLIB::POW_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -397,6 +414,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, + RTLIB::POWI_F128, RTLIB::POWI_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -409,6 +427,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, + RTLIB::REM_F128, RTLIB::REM_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -420,6 +439,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, + RTLIB::RINT_F128, RTLIB::RINT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -431,6 +451,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, + RTLIB::SIN_F128, RTLIB::SIN_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -442,6 +463,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, + RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -454,6 +476,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -465,6 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -800,6 +824,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -839,7 +864,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -848,7 +874,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -859,6 +886,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, RTLIB::COPYSIGN_F32, RTLIB::COPYSIGN_F64, RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, RTLIB::COPYSIGN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -868,7 +896,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_PPCF128), + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -880,6 +909,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, + RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -890,7 +920,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -899,7 +930,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -907,8 +939,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -917,7 +950,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -926,7 +960,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -934,8 +969,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32,RTLIB::LOG10_F64, - RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -947,6 +983,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, + RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, N->getDebugLoc()); @@ -960,6 +997,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, + RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -972,6 +1010,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -997,7 +1036,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_PPCF128), + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1006,7 +1046,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1015,7 +1066,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1024,7 +1076,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1033,7 +1086,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1045,6 +1099,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -1055,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 182b7f3e68..cd2f060ce0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -515,7 +515,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { // Only use the result of getSetCCResultType if it is legal, // otherwise just use the promoted result type (NVT). if (!TLI.isTypeLegal(SVT)) - SVT = NVT; + SVT = NVT; DebugLoc dl = N->getDebugLoc(); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && @@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), - GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); + SDValue Res = GetPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -549,22 +550,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { // The input value must be properly sign extended. SDValue Res = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), - Res.getValueType(), Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. - EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Res = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { @@ -775,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; - case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; case ISD::VSELECT: @@ -959,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, N->getOperand(1), Idx), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { - SDValue NewOps[6]; - DebugLoc dl = N->getDebugLoc(); - NewOps[0] = N->getOperand(0); - for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { - SDValue Flag = GetPromotedInteger(N->getOperand(i)); - NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); - } - return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. @@ -2101,8 +2091,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // have an illegal type. Fix that first by casting the operand, otherwise // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); - MVT ShiftTy = TLI.getShiftAmountTy(VT); - assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) && + EVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getScalarType().getSizeInBits() >= + Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index e26d1656e8..b6436bf427 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) + for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } return true; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 7de42ea012..1c4274a910 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -270,7 +270,6 @@ private: SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); - SDValue PromoteIntOp_MEMBARRIER(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); @@ -465,6 +464,7 @@ private: void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -530,6 +530,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_EXTEND(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -580,6 +581,7 @@ private: SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 09a50d9263..04c6bfd0c2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + Res = ScalarizeVecOp_EXTEND(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } +/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs +/// to be scalarized, it must be <1 x ty>. Extend the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SmallVector<SDValue, 1> Ops(1); + Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], 1); +} + /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - /// use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { @@ -1026,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1042,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FTRUNC: - case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -1252,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { DebugLoc DL = N->getDebugLoc(); - // The input operands all must have the same type, and we know the result the - // result type is valid. Convert this to a buildvector which extracts all the + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the // input elements. // TODO: If the input elements are power-two vectors, we could convert this to // a new CONCAT_VECTORS node with elements that are half-wide. @@ -1273,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); + unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + DebugLoc DL = N->getDebugLoc(); + + // Extract the halves of the input via extract_subvector. + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); + SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(0)); + SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(NumElements/2)); + // Truncate them to 1/2 the element size. + EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index d2269f8acc..7e7b8974be 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -33,8 +33,10 @@ class SDNodeOrdering { public: SDNodeOrdering() {} - void add(const SDNode *Node, unsigned O) { - OrderMap[Node] = O; + void add(const SDNode *Node, unsigned NewOrder) { + unsigned &OldOrder = OrderMap[Node]; + if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) + OldOrder = NewOrder; } void remove(const SDNode *Node) { DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index addfccbd00..c009cfcc51 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -904,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { SUnit *OldSU = Sequence.back(); while (true) { Sequence.pop_back(); - if (SU->isSucc(OldSU)) - // Don't try to remove SU from AvailableQueue. - SU->isAvailable = false; // FIXME: use ready cycle instead of height CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); @@ -1363,8 +1360,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0]) - << " SU #" << CurSU->NodeNum << '\n'); + DEBUG(dbgs() << " Interfering reg " << + (LRegs[0] == TRI->getNumRegs() ? "CallResource" + : TRI->getName(LRegs[0])) + << " SU #" << CurSU->NodeNum << '\n'); std::pair<LRegsMapT::iterator, bool> LRegsPair = LRegsMap.insert(std::make_pair(CurSU, LRegs)); if (LRegsPair.second) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index db8ae6ea06..15235c8ac3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1518,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); - if (ISD::isZEXTLoad(Op.getNode())) { + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); @@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ break; } - // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { - unsigned ExtType = LD->getExtensionType(); - switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp; + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } } } @@ -2781,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } // Handle the scalar case first. - if (Outputs.size() == 1) + if (Scalar1 && Scalar2) return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. @@ -2912,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, "Shift operators return type must be the same as their first arg"); assert(VT.isInteger() && N2.getValueType().isInteger() && "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); // Verify that the shift amount VT is bit enough to hold valid shift // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses @@ -4702,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const std::vector<EVT> &ResultTys, + ArrayRef<EVT> ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); @@ -5246,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5261,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5269,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * @@ -5290,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5298,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5307,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5324,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5333,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, - const std::vector<EVT> &ResultTys, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<EVT> ResultTys, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; void *IP = 0; + const SDValue *Ops = OpsArray.data(); + unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b7a7e2e133..9d02fc7323 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } else { Ctx.emitError(ErrMsg); } - report_fatal_error("Cannot handle scalar-to-vector conversion!"); + return DAG.getUNDEF(ValueVT); } if (ValueVT.getVectorNumElements() == 1 && @@ -1872,13 +1872,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { assert(Fn->getIntrinsicID() == Intrinsic::donothing); - // If donothing has a landingpad, we should clear CurrentCallSite. - if (LandingPad) { - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - unsigned CallSiteIndex = MMI.getCurrentCallSite(); - if (CallSiteIndex) - MMI.setCurrentCallSite(0); - } // Ignore invokes to @llvm.donothing: jump directly to the next BB. } else LowerCallTo(&I, getValue(Callee), false, LandingPad); @@ -2661,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -4921,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isOperationLegalOrCustom(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5042,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Drop the intrinsic, but forward the value + setValue(&I, getValue(I.getOperand(0))); + return 0; case Intrinsic::var_annotation: // Discard annotate attributes return 0; @@ -5262,6 +5259,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; + Entry.isReturned = false; Entry.Alignment = Align; Args.push_back(Entry); RetTy = Type::getVoidTy(FTy->getContext()); @@ -5279,13 +5277,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.Alignment = CS.getParamAlignment(attrInd); + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); + Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -6190,6 +6189,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" " don't know how to handle tied " "indirect register inputs"); + report_fatal_error("Cannot handle indirect register inputs!"); } RegsForValue MatchedRegs; @@ -6198,10 +6198,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) - MatchedRegs.Regs.push_back - (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); - + i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); + else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + " type register class is not natively supported!"); + report_fatal_error("inline asm error: This value type register " + "class is not natively supported!"); + } + } // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag, CS.getInstruction()); @@ -6421,6 +6428,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); @@ -6474,6 +6503,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; + // Conservatively only handle 'returned' on non-vectors for now + if (Args[i].isReturned && !Op.getValueType().isVector()) { + assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && + "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && + CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) + Flags.setReturned(); + } + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); @@ -6493,28 +6542,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } - // Handle the incoming return values from the call. - CLI.Ins.clear(); - SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); - } - } - SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); @@ -6621,9 +6648,7 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { - // If this is the entry block, emit arguments. - const Function &F = *LLVMBB->getParent(); +void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); const DataLayout *TD = TLI.getDataLayout(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 2ce34930a3..50b5bccf7c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1dcd6cdf2c..02b838234d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -58,18 +58,21 @@ #include <algorithm> using namespace llvm; +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); - -#ifndef NDEBUG STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); -STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); -STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); +STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); +STATISTIC(NumFastIselFailLowerArguments, + "Number of entry blocks where fast isel failed to lower arguments"); +#ifndef NDEBUG static cl::opt<bool> EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, cl::desc("Enable extra verbose messages in the \"fast\" " "instruction selector")); + // Terminators STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); @@ -363,6 +366,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -734,7 +738,7 @@ public: } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins: BB#" + DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() << " '" << FuncInfo->MBB->getName() << "'\n"); @@ -777,8 +781,12 @@ void SelectionDAGISel::DoInstructionSelection() { if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. - if (ResNode) + if (ResNode) { + // Propagate ordering + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); + ReplaceUses(Node, ResNode); + } // If after the replacement this node is not used any more, // remove this dead node. @@ -789,7 +797,7 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->setRoot(Dummy.getValue()); } - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); } @@ -819,84 +827,6 @@ void SelectionDAGISel::PrepareEHLandingPad() { if (Reg) MBB->addLiveIn(Reg); } -/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified -/// load into the specified FoldInst. Note that we could have a sequence where -/// multiple LLVM IR instructions are folded into the same machineinstr. For -/// example we could have: -/// A: x = load i32 *P -/// B: y = icmp A, 42 -/// C: br y, ... -/// -/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and -/// any other folded instructions) because it is between A and C. -/// -/// If we succeed in folding the load into the operation, return true. -/// -bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, - const Instruction *FoldInst, - FastISel *FastIS) { - // We know that the load has a single use, but don't know what it is. If it - // isn't one of the folded instructions, then we can't succeed here. Handle - // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - - const Instruction *TheUser = LI->use_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. - // Stay in the right block. - TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. - // If there are multiple or no uses of this instruction, then bail out. - if (!TheUser->hasOneUse()) - return false; - - TheUser = TheUser->use_back(); - } - - // If we didn't find the fold instruction, then we failed to collapse the - // sequence. - if (TheUser != FoldInst) - return false; - - // Don't try to fold volatile loads. Target has to deal with alignment - // constraints. - if (LI->isVolatile()) return false; - - // Figure out which vreg this is going into. If there is no assigned vreg yet - // then there actually was no reference to it. Perhaps the load is referenced - // by a dead instruction. - unsigned LoadReg = FastIS->getRegForValue(LI); - if (LoadReg == 0) - return false; - - // Check to see what the uses of this vreg are. If it has no uses, or more - // than one use (at the machine instr level) then we can't fold it. - MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); - if (RI == RegInfo->reg_end()) - return false; - - // See if there is exactly one use of the vreg. If there are multiple uses, - // then the instruction got lowered to multiple machine instructions or the - // use of the loaded value ended up being multiple operands of the result, in - // either case, we can't fold this. - MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; - if (PostRI != RegInfo->reg_end()) - return false; - - assert(RI.getOperand().isUse() && - "The only use of the vreg must be a use, we haven't emitted the def!"); - - MachineInstr *User = &*RI; - - // Set the insertion point properly. Folding the load can cause generation of - // other random instructions (like sign extends) for addressing modes, make - // sure they get inserted in a logical place before the new instruction. - FuncInfo->InsertPt = User; - FuncInfo->MBB = User->getParent(); - - // Ask the target to try folding the load. - return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); -} - /// isFoldedOrDeadInstruction - Return true if the specified instruction is /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. @@ -1024,13 +954,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs.insert(LLVMBB); } - FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; - FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. @@ -1044,17 +972,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + // Lower any arguments needed in this block if this is the entry block. if (!FastIS->LowerArguments()) { - + // Fast isel failed to lower these arguments + ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) - // The "fast" selector couldn't lower these arguments. For the - // purpose of debugging, just abort. llvm_unreachable("FastISel didn't lower all arguments"); - // Call target indepedent SDISel argument lowering code if the target - // specific routine is not successful. - LowerArguments(LLVMBB); + // Use SelectionDAG argument lowering + LowerArguments(Fn); CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); CodeGenAndEmitDAG(); @@ -1087,7 +1015,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { --NumFastIselRemaining; - DEBUG(++NumFastIselSuccess); + ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. // Try to fold the load if so. @@ -1099,11 +1027,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { + FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; - DEBUG(++NumFastIselSuccess); + ++NumFastIselSuccess; } continue; } @@ -1142,21 +1070,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Recompute NumFastIselRemaining as Selection DAG instruction // selection may have handled the call, input args, etc. unsigned RemainingNow = std::distance(Begin, BI); - (void) RemainingNow; - DEBUG(NumFastIselFailures += NumFastIselRemaining - RemainingNow); - DEBUG(NumFastIselRemaining = RemainingNow); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + NumFastIselRemaining = RemainingNow; continue; } if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { // Don't abort, and use a different message for terminator misses. - DEBUG(NumFastIselFailures += NumFastIselRemaining); + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed terminator: "; Inst->dump(); } } else { - DEBUG(NumFastIselFailures += NumFastIselRemaining); + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; Inst->dump(); @@ -1172,8 +1099,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } else { // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) - LowerArguments(LLVMBB); + if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + LowerArguments(Fn); + } } if (Begin != BI) @@ -1668,9 +1597,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - std::vector<EVT> VTs; - VTs.push_back(MVT::Other); - VTs.push_back(MVT::Glue); + EVT VTs[] = { MVT::Other, MVT::Glue }; SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); @@ -1767,7 +1694,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); - DEBUG(errs() << "ISEL: Match complete!\n"); + DEBUG(dbgs() << "ISEL: Match complete!\n"); } enum ChainResult { @@ -2272,9 +2199,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SmallVector<SDNode*, 3> ChainNodesMatched; SmallVector<SDNode*, 3> GlueResultNodesMatched; - DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); - errs() << '\n'); + dbgs() << '\n'); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -2286,7 +2213,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; - DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start @@ -2353,10 +2280,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!Result) break; - DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); - DEBUG(++NumDAGIselRetries); + ++NumDAGIselRetries; // Otherwise, we know that this case of the Scope is guaranteed to fail, // move to the next case. @@ -2483,7 +2410,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } @@ -2515,7 +2442,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); continue; } @@ -2604,11 +2531,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { - int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); - Imm = CurDAG->getTargetConstant(Val, Imm.getValueType()); + const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); + Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); + Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); @@ -2783,7 +2710,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), - VTList, Ops.data(), Ops.size()); + VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { @@ -2859,9 +2786,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG(errs() << " " + DEBUG(dbgs() << " " << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") - << " node: "; Res->dump(CurDAG); errs() << "\n"); + << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! if (Opcode == OPC_MorphNodeTo) { @@ -2936,8 +2863,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. - DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); - DEBUG(++NumDAGIselRetries); + DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + ++NumDAGIselRetries; while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); @@ -2956,7 +2883,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 9ab491808f..2feea59c03 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, // Debugging level for shrink wrapping. enum ShrinkWrapDebugLevel { - None, BasicInfo, Iterations, Details + Disabled, BasicInfo, Iterations, Details }; static cl::opt<enum ShrinkWrapDebugLevel> ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, cl::desc("Print shrink wrapping debugging information"), cl::values( - clEnumVal(None , "disable debug output"), + clEnumVal(Disabled , "disable debug output"), clEnumVal(BasicInfo , "print basic DF sets"), clEnumVal(Iterations, "print SR sets for each iteration"), clEnumVal(Details , "print all DF sets"), diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index b58bb85e49..3903743878 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector<ReturnInst*, 16> Returns; - SmallVector<InvokeInst*, 16> Invokes; + SmallVector<ReturnInst*, 16> Returns; + SmallVector<InvokeInst*, 16> Invokes; SmallSetVector<LandingPadInst*, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (Function *Callee = II->getCalledFunction()) + if (Callee->isIntrinsic() && + Callee->getIntrinsicID() == Intrinsic::donothing) { + // Remove the NOP invoke. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + continue; + } + Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 320128a999..c5bbba3ffc 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -29,6 +29,7 @@ #define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index c10e3190f6..4b7e4609e5 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -67,14 +67,14 @@ DisableColoring("no-stack-coloring", /// code. If this flag is enabled, we try to save the user. static cl::opt<bool> ProtectFromEscapedAllocas("protect-from-escaped-allocas", - cl::init(false), cl::Hidden, - cl::desc("Do not optimize lifetime zones that are broken")); + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); -STATISTIC(EscapedAllocas, - "Number of allocas that escaped the lifetime region"); +STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); //===----------------------------------------------------------------------===// // StackColoring Pass @@ -574,7 +574,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { SlotIndex Index = Indexes->getInstructionIndex(I); LiveInterval *Interval = Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && - "Found instruction usage outside of live range."); + "Found instruction usage outside of live range."); } #endif @@ -738,9 +738,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { std::stable_sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); - bool Chanded = true; - while (Chanded) { - Chanded = false; + bool Changed = true; + while (Changed) { + Changed = false; for (unsigned I = 0; I < NumSlots; ++I) { if (SortedSlots[I] == -1) continue; @@ -757,7 +757,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { - Chanded = true; + Changed = true; First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f3be37c9ee..fbef34772b 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -31,7 +31,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 2a02f6a3c0..8074d167f4 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + initActions(); + + // Perform these initializations only once. + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + PredictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +void TargetLoweringBase::initActions() { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); memset(CondCodeActions, 0, sizeof(CondCodeActions)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); // Set default actions for various operations. for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { @@ -702,50 +745,17 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. // setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); - - IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; - MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize - = MaxStoresPerMemmoveOptSize = 4; - BenefitFromCodePlacementOpt = false; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; - SelectIsExpensive = false; - IntDivIsCheap = false; - Pow2DivIsCheap = false; - JumpIsExpensive = false; - PredictableSelectIsExpensive = false; - StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; - BooleanContents = UndefinedBooleanContent; - BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; - MinStackArgumentAlignment = 1; - ShouldFoldAtomicFences = false; - InsertFencesForAtomic = false; - SupportJumpTables = true; - MinimumJumpTableEntries = 4; - - InitLibcallNames(LibcallRoutineNames, TM); - InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); } -TargetLoweringBase::~TargetLoweringBase() { - delete &TLOF; +MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize(0)); } -MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return getScalarShiftAmountTy(LHSTy); } /// canOpTrap - Returns true if the operation can trap for the value type. @@ -905,6 +915,15 @@ void TargetLoweringBase::computeRegisterProperties() { ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } + // Decide how to handle f128. If the target does not have native f128 support, + // expand it to i128 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f128)) { + NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::f128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + } + // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index bdff954d61..f91688531b 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; using namespace dwarf; @@ -540,11 +539,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -597,6 +591,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -759,8 +757,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; - if (Kind.isThreadLocal()) - return ".tls$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } if (Kind.isWriteable()) return ".data$"; return ".rdata$"; @@ -796,3 +797,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return getDataSection(); } +void TargetLoweringObjectFileCOFF:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const { + MDNode *LinkerOptions = 0; + + // Look for the "Linker Options" flag, since it's the only one we support. + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + StringRef Key = MFE.Key->getString(); + Value *Val = MFE.Val; + if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + break; + } + } + if (!LinkerOptions) + return; + + // Emit the linker options to the linker .drectve section. According to the + // spec, this section is a space-separated string containing flags for linker. + const MCSection *Sec = getDrectveSection(); + Streamer.SwitchSection(Sec); + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + StringRef Op = MDOption->getString(); + // Lead with a space for consistency with our dllexport implementation. + std::string Escaped(" "); + if (Op.find(" ") != StringRef::npos) { + // The PE-COFF spec says args with spaces must be quoted. It doesn't say + // how to escape quotes, but it probably uses this algorithm: + // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx + // FIXME: Reuse escaping code from Support/Windows/Program.inc + Escaped.push_back('\"'); + Escaped.append(Op); + Escaped.push_back('\"'); + } else { + Escaped.append(Op); + } + Streamer.EmitBytes(Escaped); + } + } +} diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 0f59d0169e..435a5e7e0b 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } +bool TargetOptions::operator==(const TargetOptions &TO) { +#define ARE_EQUAL(X) X == TO.X + return + ARE_EQUAL(UnsafeFPMath) && + ARE_EQUAL(NoInfsFPMath) && + ARE_EQUAL(NoNaNsFPMath) && + ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && + ARE_EQUAL(UseSoftFloat) && + ARE_EQUAL(NoZerosInBSS) && + ARE_EQUAL(JITExceptionHandling) && + ARE_EQUAL(JITEmitDebugInfo) && + ARE_EQUAL(JITEmitDebugInfoToDisk) && + ARE_EQUAL(GuaranteedTailCallOpt) && + ARE_EQUAL(DisableTailCalls) && + ARE_EQUAL(StackAlignmentOverride) && + ARE_EQUAL(RealignStack) && + ARE_EQUAL(SSPBufferSize) && + ARE_EQUAL(EnableFastISel) && + ARE_EQUAL(PositionIndependentExecutable) && + ARE_EQUAL(EnableSegmentedStacks) && + ARE_EQUAL(UseInitArray) && + ARE_EQUAL(TrapFuncName) && + ARE_EQUAL(FloatABIType) && + ARE_EQUAL(AllowFPOpFusion); +#undef ARE_EQUAL +} diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index f31f67d58c..1bf14dbcef 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const { // Get the definition's scheduling class descriptor from this machine model. unsigned SchedClass = MI->getDesc().getSchedClass(); const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + if (!SCDesc->isValid()) + return SCDesc; #ifndef NDEBUG unsigned NIter = 0; @@ -240,7 +242,10 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return DefMI->isTransient() ? 0 : 1; + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 26c5fe4dcb..7ca2beef65 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -43,11 +43,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -59,6 +59,12 @@ STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); +// Temporary flag to disable rescheduling. +static cl::opt<bool> +EnableRescheduling("twoaddr-reschedule", + cl::desc("Coalesce copies by rescheduling (default=true)"), + cl::init(true), cl::Hidden); + namespace { class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; @@ -427,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() - ? MI.getNumOperands() : MCID.getNumOperands(); - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) continue; @@ -1145,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (rescheduleMIBelowKill(mi, nmi, regB)) { + if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1164,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. - if (rescheduleKillAboveMI(mi, nmi, regB)) { + if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) { ++NumReSchedUps; return true; } |