aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
committerEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
commitc6cf05cb5108f356dde97c01ee4188b0671d4542 (patch)
tree436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/CodeGen
parentc75199c649c739aade160289d93f257edc798cde (diff)
parent7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff)
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts: docs/LangRef.rst include/llvm/CodeGen/CallingConvLower.h include/llvm/IRReader/IRReader.h include/llvm/Target/TargetMachine.h lib/CodeGen/CallingConvLower.cpp lib/IRReader/IRReader.cpp lib/IRReader/LLVMBuild.txt lib/IRReader/Makefile lib/LLVMBuild.txt lib/Makefile lib/Support/MemoryBuffer.cpp lib/Support/Unix/PathV2.inc lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMTargetMachine.cpp lib/Target/Mips/CMakeLists.txt lib/Target/Mips/MipsDelaySlotFiller.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsInstrInfo.td lib/Target/Mips/MipsSubtarget.cpp lib/Target/Mips/MipsSubtarget.h lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrControl.td lib/Target/X86/X86InstrFormats.td lib/Transforms/IPO/ExtractGV.cpp lib/Transforms/InstCombine/InstCombineCompares.cpp lib/Transforms/Utils/SimplifyLibCalls.cpp test/CodeGen/X86/fast-isel-divrem.ll test/MC/ARM/data-in-code.ll tools/Makefile tools/llvm-extract/llvm-extract.cpp tools/llvm-link/CMakeLists.txt tools/opt/CMakeLists.txt tools/opt/LLVMBuild.txt tools/opt/Makefile tools/opt/opt.cpp
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/Analysis.cpp226
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp24
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h25
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp50
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h11
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp372
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h37
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp120
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp102
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/CodeGen/CodeGen.cpp2
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp423
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp2
-rw-r--r--lib/CodeGen/ErlangGC.cpp81
-rw-r--r--lib/CodeGen/IfConversion.cpp29
-rw-r--r--lib/CodeGen/InlineSpiller.cpp19
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp60
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp205
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp18
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp14
-rw-r--r--lib/CodeGen/MachineFunction.cpp48
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp3
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp55
-rw-r--r--lib/CodeGen/MachineScheduler.cpp376
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp181
-rw-r--r--lib/CodeGen/MachineVerifier.cpp3
-rw-r--r--lib/CodeGen/Passes.cpp108
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp94
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp3
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp7
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp45
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp1
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp109
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp451
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp90
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp86
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeOrdering.h6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp82
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp175
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp4
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp13
-rw-r--r--lib/CodeGen/SpillPlacement.cpp1
-rw-r--r--lib/CodeGen/StackColoring.cpp18
-rw-r--r--lib/CodeGen/StackProtector.cpp1
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp99
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp63
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp26
-rw-r--r--lib/CodeGen/TargetSchedule.cpp7
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp17
66 files changed, 2754 insertions, 1564 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index c7abf7a0c4..4731af5089 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -202,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+static bool isNoopBitcast(Type *T1, Type *T2,
+ const TargetLowering& TLI) {
+ return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+ (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+ TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
-/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
-/// through it (and any transitive noop operands to it) and return its input
-/// value. This is used to determine if a tail call can be formed.
-///
-static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
- // If V is not an instruction, it can't be looked through.
- const Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
-
- Value *Op = I->getOperand(0);
+/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop
+/// (i.e., lowers to no machine code), look through it (and any transitive noop
+/// operands to it) and check if it has the same noop input value. This is
+/// used to determine if a tail call can be formed.
+static bool sameNoopInput(const Value *V1, const Value *V2,
+ SmallVectorImpl<unsigned> &Els1,
+ SmallVectorImpl<unsigned> &Els2,
+ const TargetLowering &TLI) {
+ using std::swap;
+ bool swapParity = false;
+ bool equalEls = Els1 == Els2;
+ while (true) {
+ if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) {
+ if (swapParity)
+ // Revert to original Els1 and Els2 to avoid confusing recursive calls
+ swap(Els1, Els2);
+ return true;
+ }
- // Look through truly no-op truncates.
- if (isa<TruncInst>(I) &&
- TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
- return getNoopInput(I->getOperand(0), TLI);
-
- // Look through truly no-op bitcasts.
- if (isa<BitCastInst>(I)) {
- // No type change at all.
- if (Op->getType() == I->getType())
- return getNoopInput(Op, TLI);
+ // Try to look through V1; if V1 is not an instruction, it can't be looked
+ // through.
+ const Instruction *I = dyn_cast<Instruction>(V1);
+ const Value *NoopInput = 0;
+ if (I != 0 && I->getNumOperands() > 0) {
+ Value *Op = I->getOperand(0);
+ if (isa<TruncInst>(I)) {
+ // Look through truly no-op truncates.
+ if (TLI.isTruncateFree(Op->getType(), I->getType()))
+ NoopInput = Op;
+ } else if (isa<BitCastInst>(I)) {
+ // Look through truly no-op bitcasts.
+ if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+ NoopInput = Op;
+ } else if (isa<GetElementPtrInst>(I)) {
+ // Look through getelementptr
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ NoopInput = Op;
+ } else if (isa<IntToPtrInst>(I)) {
+ // Look through inttoptr.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<PtrToIntInst>(I)) {
+ // Look through ptrtoint.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<CallInst>(I)) {
+ // Look through call
+ for (User::const_op_iterator i = I->op_begin(),
+ // Skip Callee
+ e = I->op_end() - 1;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ } else if (isa<InvokeInst>(I)) {
+ // Look through invoke
+ for (User::const_op_iterator i = I->op_begin(),
+ // Skip BB, BB, Callee
+ e = I->op_end() - 3;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ }
+ }
- // Pointer to pointer cast.
- if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
- return getNoopInput(Op, TLI);
-
- if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
- TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
- TLI.isTypeLegal(EVT::getEVT(I->getType())))
- return getNoopInput(Op, TLI);
- }
-
- // Look through inttoptr.
- if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
- // Make sure this isn't a truncating or extending cast. We could support
- // this eventually, but don't bother for now.
- if (TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(Op->getType())->getBitWidth())
- return getNoopInput(Op, TLI);
- }
+ if (NoopInput) {
+ V1 = NoopInput;
+ continue;
+ }
- // Look through ptrtoint.
- if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
- // Make sure this isn't a truncating or extending cast. We could support
- // this eventually, but don't bother for now.
- if (TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(I->getType())->getBitWidth())
- return getNoopInput(Op, TLI);
+ // If we already swapped, avoid infinite loop
+ if (swapParity)
+ break;
+
+ // Otherwise, swap V1<->V2, Els1<->Els2
+ swap(V1, V2);
+ swap(Els1, Els2);
+ swapParity = !swapParity;
}
+ for (unsigned n = 0; n < 2; ++n) {
+ if (isa<InsertValueInst>(V1)) {
+ if (isa<StructType>(V1->getType())) {
+ // Look through insertvalue
+ unsigned i, e;
+ for (i = 0, e = cast<StructType>(V1->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i);
+ if (InScalar == 0)
+ break;
+ Els1.push_back(i);
+ if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) {
+ Els1.pop_back();
+ break;
+ }
+ Els1.pop_back();
+ }
+ if (i == e) {
+ if (swapParity)
+ swap(Els1, Els2);
+ return true;
+ }
+ }
+ } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) {
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(V1);
+ unsigned i = Els1.back();
+ // If the scalar value being inserted is an extractvalue of the right
+ // index from the call, then everything is good.
+ if (isa<StructType>(EVI->getOperand(0)->getType()) &&
+ EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) {
+ // Look through extractvalue
+ Els1.pop_back();
+ if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) {
+ Els1.push_back(i);
+ if (swapParity)
+ swap(Els1, Els2);
+ return true;
+ }
+ Els1.push_back(i);
+ }
+ }
- // Otherwise it's not something we can look through.
- return V;
-}
+ swap(V1, V2);
+ swap(Els1, Els2);
+ swapParity = !swapParity;
+ }
+ if (swapParity)
+ swap(Els1, Els2);
+ return false;
+}
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
@@ -265,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,
+ const TargetLowering &TLI) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -323,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
return false;
- // Otherwise, make sure the unmodified return value of I is the return value.
- // We handle two cases: multiple return values + scalars.
- Value *RetVal = Ret->getOperand(0);
- if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
- // Handle scalars first.
- return getNoopInput(Ret->getOperand(0), TLI) == I;
-
- // If this is an aggregate return, look through the insert/extract values and
- // see if each is transparent.
- for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
- i != e; ++i) {
- const Value *InScalar = FindInsertedValue(RetVal, i);
- if (InScalar == 0) return false;
- InScalar = getNoopInput(InScalar, TLI);
-
- // If the scalar value being inserted is an extractvalue of the right index
- // from the call, then everything is good.
- const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
- if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
- EVI->getIndices()[0] != i)
- return false;
- }
-
- return true;
+ // Otherwise, make sure the return value and I have the same value
+ SmallVector<unsigned, 4> Els1, Els2;
+ return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI);
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0f381c6d0c..c73071e12b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const {
return *TM.getDataLayout();
}
+StringRef AsmPrinter::getTargetTriple() const {
+ return TM.getTargetTriple();
+}
+
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
- return OutStreamer.getCurrentSection();
+ return OutStreamer.getCurrentSection().first;
}
@@ -834,7 +838,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
// caller might be in the middle of an dwarf expression. We should
// probably assert that Reg >= 0 once debug info generation is more mature.
- if (int Offset = MLoc.getOffset()) {
+ if (MLoc.isIndirect()) {
if (Reg < 32) {
OutStreamer.AddComment(
dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
@@ -845,7 +849,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
OutStreamer.AddComment(Twine(Reg));
EmitULEB128(Reg);
}
- EmitSLEB128(Offset);
+ EmitSLEB128(MLoc.getOffset());
} else {
if (Reg < 32) {
OutStreamer.AddComment(
@@ -1256,7 +1260,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
if (GV->getName() == "llvm.used") {
if (MAI->hasNoDeadStrip()) // No need to emit this at all.
- EmitLLVMUsedList(GV->getInitializer());
+ EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
return true;
}
@@ -1299,11 +1303,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
/// global in the specified llvm.used list for which emitUsedDirectiveFor
/// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
- if (InitList == 0) return;
-
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
const GlobalValue *GV =
dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 156acace55..31e42d47cf 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
const MCSymbol *SectionLabel) const {
// On COFF targets, we have to emit the special .secrel32 directive.
- if (MAI->getDwarfSectionOffsetDirective()) {
+ if (MAI->needsDwarfSectionOffsetDirective()) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 58fe2ed9d3..8d15c069c6 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
DwarfCompileUnit.cpp
DwarfDebug.cpp
DwarfException.cpp
+ ErlangGCPrinter.cpp
OcamlGCPrinter.cpp
Win64Exception.cpp
)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4ded2818ed..673867ada1 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,9 +112,20 @@ DIE::~DIE() {
delete Children[i];
}
+/// Climb up the parent chain to get the compile unit DIE to which this DIE
+/// belongs.
+DIE *DIE::getCompileUnit() const {
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
#ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IncIndent) {
- IndentCount += IncIndent;
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
const std::string Indent(IndentCount, ' ');
bool isBlock = Abbrev.getTag() == 0;
@@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
O << "Size: " << Size << "\n";
}
- const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
IndentCount += 2;
for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -153,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount -= 2;
for (unsigned j = 0, M = Children.size(); j < M; ++j) {
- Children[j]->print(O, 4);
+ Children[j]->print(O, IndentCount+4);
}
if (!isBlock) O << "\n";
- IndentCount -= IncIndent;
}
void DIE::dump() {
@@ -313,7 +323,7 @@ void DIEEntry::print(raw_ostream &O) {
///
unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
if (!Size) {
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
}
@@ -332,7 +342,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
}
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 35d7959ac1..3c06001686 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -66,7 +66,7 @@ namespace llvm {
/// Data - Raw data bytes for abbreviation.
///
- SmallVector<DIEAbbrevData, 8> Data;
+ SmallVector<DIEAbbrevData, 12> Data;
public:
DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
@@ -75,7 +75,7 @@ namespace llvm {
uint16_t getTag() const { return Tag; }
unsigned getNumber() const { return Number; }
uint16_t getChildrenFlag() const { return ChildrenFlag; }
- const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
void setTag(uint16_t T) { Tag = T; }
void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
void setNumber(unsigned N) { Number = N; }
@@ -108,7 +108,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIE - A structured debug information entry. Has an abbreviation which
- /// describes it's organization.
+ /// describes its organization.
class DIEValue;
class DIE {
@@ -133,14 +133,13 @@ namespace llvm {
/// Attribute values.
///
- SmallVector<DIEValue*, 32> Values;
+ SmallVector<DIEValue*, 12> Values;
// Private data for print()
mutable unsigned IndentCount;
public:
explicit DIE(unsigned Tag)
- : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
- IndentCount(0) {}
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {}
virtual ~DIE();
// Accessors.
@@ -150,8 +149,11 @@ namespace llvm {
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
const std::vector<DIE *> &getChildren() const { return Children; }
- const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+ const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
@@ -176,7 +178,7 @@ namespace llvm {
}
#ifndef NDEBUG
- void print(raw_ostream &O, unsigned IncIndent = 0);
+ void print(raw_ostream &O, unsigned IndentCount = 0) const;
void dump();
#endif
};
@@ -232,9 +234,10 @@ namespace llvm {
///
static unsigned BestForm(bool IsSigned, uint64_t Int) {
if (IsSigned) {
- if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
- if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
- if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
} else {
if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d9f6b5eb0a..89abcffd93 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
DwarfDebug *DW, DwarfUnits *DWU)
: UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(0) {
+ IndexTyDie(0), DebugInfoOffset(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -241,7 +241,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
if (Line == 0)
return;
unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
- V.getContext().getDirectory());
+ V.getContext().getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -257,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -276,7 +278,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
return;
unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
- SP.getDirectory());
+ SP.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -293,7 +295,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
if (Line == 0)
return;
unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
- Ty.getDirectory());
+ Ty.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -311,7 +313,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
return;
DIFile File = Ty.getFile();
unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
- File.getDirectory());
+ File.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -329,7 +331,8 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
return;
StringRef FN = NS.getFilename();
- unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -682,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
return true;
}
-/// addTemplateParams - Add template parameters in buffer.
+/// addTemplateParams - Add template parameters into buffer.
void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
// Add template parameters.
for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
@@ -704,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
return getOrCreateNameSpace(DINameSpace(Context));
else if (Context.isSubprogram())
return getOrCreateSubprogramDIE(DISubprogram(Context));
- else
+ else
return getDIE(Context);
}
@@ -1363,7 +1366,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
}
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
- // AT_const_value was added when the static memeber was created. To avoid
+ // AT_const_value was added when the static member was created. To avoid
// emitting AT_const_value multiple times, we only add AT_const_value when
// it is not a static member.
if (!IsStaticMember)
@@ -1669,33 +1672,6 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
if (DT.isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
- // This is only for backward compatibility.
- StringRef PropertyName = DT.getObjCPropertyName();
- if (!PropertyName.empty()) {
- addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
- StringRef GetterName = DT.getObjCPropertyGetterName();
- if (!GetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
- StringRef SetterName = DT.getObjCPropertySetterName();
- if (!SetterName.empty())
- addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
- unsigned PropertyAttributes = 0;
- if (DT.isReadOnlyObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
- if (DT.isReadWriteObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
- if (DT.isAssignObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
- if (DT.isRetainObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
- if (DT.isCopyObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
- if (DT.isNonAtomicObjCProperty())
- PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
- if (PropertyAttributes)
- addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
- PropertyAttributes);
- }
return MemberDie;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 77bf6a9e50..8f08c63e64 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -87,13 +87,13 @@ class CompileUnit {
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
/// getLowerBoundDefault - Return the default lower bound for an array. If the
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
- /// getOrCreateContextDIE - Get context owner's DIE.
- DIE *getOrCreateContextDIE(DIDescriptor Context);
-
public:
CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
DwarfUnits *);
@@ -103,6 +103,7 @@ public:
unsigned getUniqueID() const { return UniqueID; }
unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -120,6 +121,7 @@ public:
return AccelTypes;
}
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
@@ -367,6 +369,9 @@ public:
/// createStaticMemberDIE - Create new static data member DIE.
DIE *createStaticMemberDIE(DIDerivedType DT);
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIDescriptor Context);
+
private:
// DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 87659ef667..73bba6989f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -55,7 +55,7 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::init(false));
static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::Hidden, cl::init(false),
cl::desc("Generate DWARF pubnames section"));
namespace {
@@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
+
+ struct CompareFirst {
+ template <typename T> bool operator()(const T &lhs, const T &rhs) const {
+ return lhs.first < rhs.first;
+ }
+ };
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+ DwarfAddrSectionSym = 0;
DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
// Turn on accelerator tables and older gdb compatibility
// for Darwin.
- bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
if (DarwinGDBCompat == Default) {
if (IsDarwin)
IsDarwinGDBCompat = true;
@@ -352,11 +359,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// If we're updating an abstract DIE, then we will be adding the children and
// object pointer later on. But what we don't want to do is process the
// concrete DIE twice.
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
// Pick up abstract subprogram DIE.
SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsSPDIE);
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
SPCU->addDie(SPDie);
} else {
DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -528,7 +540,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DILocation DL(Scope->getInlinedAt());
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
- getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
+ TheCU->getUniqueID()));
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
// Add name to the name table, we do this here because we're guaranteed
@@ -590,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
}
else {
// There is no need to emit empty lexical block DIE.
- if (Children.empty())
+ std::pair<ImportedEntityMap::const_iterator,
+ ImportedEntityMap::const_iterator> Range = std::equal_range(
+ ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+ std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
+ CompareFirst());
+ if (Children.empty() && Range.first == Range.second)
return NULL;
ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i)
+ constructImportedModuleDIE(TheCU, i->second, ScopeDIE);
}
if (!ScopeDIE) return NULL;
@@ -617,19 +637,28 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
// SourceIds map. This can update DirectoryNames and SourceFileNames maps
// as well.
unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
- StringRef DirName) {
+ StringRef DirName, unsigned CUID) {
+ // If we use .loc in assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ CUID = 0;
+
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
- return getOrCreateSourceID("<stdin>", StringRef());
+ return getOrCreateSourceID("<stdin>", StringRef(), CUID);
// TODO: this might not belong here. See if we can factor this better.
if (DirName == CompilationDir)
DirName = "";
- unsigned SrcId = SourceIdMap.size()+1;
+ // FileIDCUMap stores the current ID for the given compile unit.
+ unsigned SrcId = FileIDCUMap[CUID] + 1;
- // We look up the file/dir pair by concatenating them with a zero byte.
+ // We look up the CUID/file/dir by concatenating them with a zero byte.
SmallString<128> NamePair;
+ NamePair += utostr(CUID);
+ NamePair += '\0';
NamePair += DirName;
NamePair += '\0'; // Zero bytes are not allowed in paths.
NamePair += FileName;
@@ -638,8 +667,9 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
if (Ent.getValue() != SrcId)
return Ent.getValue();
+ FileIDCUMap[CUID] = SrcId;
// Print out a .file directive to specify files for .loc directives.
- Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
return SrcId;
}
@@ -650,21 +680,27 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
- // Call this to emit a .file directive if it wasn't emitted for the source
- // file this CU comes from yet.
- getOrCreateSourceID(FN, CompilationDir);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
DIUnit.getLanguage(), Die, Asm,
this, &InfoHolder);
+
+ FileIDCUMap[NewCU->getUniqueID()] = 0;
+ // Call this to emit a .file directive if it wasn't emitted for the source
+ // file this CU comes from yet.
+ getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+
NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
NewCU->addString(Die, dwarf::DW_AT_name, FN);
+
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
- // into an entity. We're using 0 (or a NULL label) for this.
- NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+ // into an entity. We're using 0 (or a NULL label) for this. For
+ // split dwarf it's in the skeleton CU so omit it here.
+ if (!useSplitDwarf())
+ NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
@@ -672,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
NewCU->getUniqueID());
+ // Use a single line table if we are using .loc and generating assembly.
+ bool UseTheFirstCU =
+ (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) ||
+ (NewCU->getUniqueID() == 0);
+
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section.
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- NewCU->getUniqueID() == 0 ?
- Asm->GetTempSymbol("section_line") : LineTableStartSym);
- else if (NewCU->getUniqueID() == 0)
- NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
- else
- NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- LineTableStartSym, DwarfLineSectionSym);
+ if (!useSplitDwarf()) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ UseTheFirstCU ?
+ Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ else if (UseTheFirstCU)
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ LineTableStartSym, DwarfLineSectionSym);
+ }
- if (!CompilationDir.empty())
+ // If we're using split dwarf the compilation dir is going to be in the
+ // skeleton CU and so we don't need to duplicate it here.
+ if (!useSplitDwarf() && !CompilationDir.empty())
NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -702,13 +749,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
if (!FirstCU)
FirstCU = NewCU;
- if (useSplitDwarf()) {
- // This should be a unique identifier when we want to build .dwp files.
- NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
- // Now construct the skeleton CU associated.
- constructSkeletonCU(N);
- }
-
InfoHolder.addUnit(NewCU);
CUMap.insert(std::make_pair(N, NewCU));
@@ -742,80 +782,39 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
TheCU->addGlobalName(SP.getName(), SubprogramDie);
}
-// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty.
-void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
- if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
- constructSubprogramDIE(CU, N);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- const MDNode *N = NMD->getOperand(i);
- if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
- CU->createGlobalVariableDIE(N);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
- CU->getOrCreateTypeDIE(Ty);
- }
-
- if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIType Ty(NMD->getOperand(i));
- if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
- CU->getOrCreateTypeDIE(Ty);
- }
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+ const MDNode *N) {
+ DIImportedModule Module(N);
+ if (!Module.Verify())
+ return;
+ if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
+ constructImportedModuleDIE(TheCU, Module, D);
}
-// Collect debug info using DebugInfoFinder.
-// FIXME - Remove this when dragonegg switches to DIBuilder.
-bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(*M);
-
- bool HasDebugInfo = false;
- // Scan all the compile-units to see if there are any marked as the main
- // unit. If not, we do not generate debug info.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
- if (DICompileUnit(*I).isMain()) {
- HasDebugInfo = true;
- break;
- }
- }
- if (!HasDebugInfo) return false;
-
- // Emit initial sections so we can refer to them later.
- emitSectionLabels();
-
- // Create all the compile unit DIEs.
- for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I)
- constructCompileUnit(*I);
-
- // Create DIEs for each global variable.
- for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
- E = DbgFinder.global_variable_end(); I != E; ++I) {
- const MDNode *N = *I;
- if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
- CU->createGlobalVariableDIE(N);
- }
-
- // Create DIEs for each subprogram.
- for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
- E = DbgFinder.subprogram_end(); I != E; ++I) {
- const MDNode *N = *I;
- if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
- constructSubprogramDIE(CU, N);
- }
-
- return HasDebugInfo;
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+ DIE *Context) {
+ DIImportedModule Module(N);
+ if (!Module.Verify())
+ return;
+ return constructImportedModuleDIE(TheCU, Module, Context);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+ const DIImportedModule &Module,
+ DIE *Context) {
+ assert(Module.Verify() &&
+ "Use one of the MDNode * overloads to handle invalid metadata");
+ assert(Context && "Should always have a context for an imported_module");
+ DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module);
+ TheCU->insertDIE(Module, IMDie);
+ DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace());
+ unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
+ Module.getContext().getDirectory(),
+ TheCU->getUniqueID());
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
+ TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie);
+ Context->addChild(IMDie);
}
// Emit all Dwarf sections that should come prior to the content. Create
@@ -830,30 +829,48 @@ void DwarfDebug::beginModule() {
// If module has named metadata anchors then use them, otherwise scan the
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (CU_Nodes) {
- // Emit initial sections so we can reference labels later.
- emitSectionLabels();
-
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- DICompileUnit CUNode(CU_Nodes->getOperand(i));
- CompileUnit *CU = constructCompileUnit(CUNode);
- DIArray GVs = CUNode.getGlobalVariables();
- for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU->createGlobalVariableDIE(GVs.getElement(i));
- DIArray SPs = CUNode.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
- constructSubprogramDIE(CU, SPs.getElement(i));
- DIArray EnumTypes = CUNode.getEnumTypes();
- for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
- DIArray RetainedTypes = CUNode.getRetainedTypes();
- for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
- CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
- }
- } else if (!collectLegacyDebugInfo(M))
+ if (!CU_Nodes)
return;
- collectInfoFromNamedMDNodes(M);
+ // Emit initial sections so we can reference labels later.
+ emitSectionLabels();
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CUNode(CU_Nodes->getOperand(i));
+ CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray ImportedModules = CUNode.getImportedModules();
+ for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+ ScopesWithImportedEntities.push_back(std::make_pair(
+ DIImportedModule(ImportedModules.getElement(i)).getContext(),
+ ImportedModules.getElement(i)));
+ std::sort(ScopesWithImportedEntities.begin(),
+ ScopesWithImportedEntities.end(), CompareFirst());
+ DIArray GVs = CUNode.getGlobalVariables();
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+ CU->createGlobalVariableDIE(GVs.getElement(i));
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+ constructSubprogramDIE(CU, SPs.getElement(i));
+ DIArray EnumTypes = CUNode.getEnumTypes();
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ DIArray RetainedTypes = CUNode.getRetainedTypes();
+ for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+ CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ // Emit imported_modules last so that the relevant context is already
+ // available.
+ for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+ constructImportedModuleDIE(CU, ImportedModules.getElement(i));
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then construct a skeleton CU based upon it.
+ if (useSplitDwarf()) {
+ // This should be a unique identifier when we want to build .dwp files.
+ CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, 0);
+ // Now construct the skeleton CU associated.
+ constructSkeletonCU(CUNode);
+ }
+ }
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
@@ -1157,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
}
if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
MachineLocation MLoc;
- MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ // TODO: Currently an offset of 0 in a DBG_VALUE means
+ // we need to generate a direct register value.
+ // There is no way to specify an indirect value with offset 0.
+ if (MI->getOperand(1).getImm() == 0)
+ MLoc.set(MI->getOperand(0).getReg());
+ else
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
}
if (MI->getOperand(0).isImm())
@@ -1197,16 +1220,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(MF->getFunction()))
Scope = LScopes.getCurrentFunctionScope();
- else {
- if (DV.getVersion() <= LLVMDebugVersion9)
- Scope = LScopes.findLexicalScope(MInsn->getDebugLoc());
- else {
- if (MDNode *IA = DV.getInlinedAt())
- Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
- else
- Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
- }
- }
+ else if (MDNode *IA = DV.getInlinedAt())
+ Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+ else
+ Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
@@ -1430,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
- Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ // Use a single line table if we are using .loc and generating assembly.
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+ else
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
@@ -1707,7 +1729,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
} else
llvm_unreachable("Unexpected scope info");
- Src = getOrCreateSourceID(Fn, Dir);
+ Src = getOrCreateSourceID(Fn, Dir,
+ Asm->OutStreamer.getContext().getDwarfCompileUnitID());
}
Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
}
@@ -1735,8 +1758,8 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Start the size with the size of abbreviation code.
Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
- const SmallVector<DIEValue*, 32> &Values = Die->getValues();
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
// Size the DIE attribute values.
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -1761,15 +1784,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Compute the size and offset of all the DIEs.
void DwarfUnits::computeSizeAndOffsets() {
- for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
unsigned Offset =
sizeof(int32_t) + // Length of Compilation Unit Info
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t); // Pointer Size (in bytes)
- computeSizeAndOffset((*I)->getCUDie(), Offset);
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
}
}
@@ -1799,9 +1826,12 @@ void DwarfDebug::emitSectionLabels() {
emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
- if (useSplitDwarf())
+ if (useSplitDwarf()) {
DwarfStrDWOSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+ DwarfAddrSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+ }
DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
@@ -1826,8 +1856,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
dwarf::TagString(Abbrev->getTag()));
Asm->EmitULEB128(AbbrevNumber);
- const SmallVector<DIEValue*, 32> &Values = Die->getValues();
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
// Emit the DIE attribute values.
for (unsigned i = 0, N = Values.size(); i < N; ++i) {
@@ -1843,6 +1873,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
Asm->EmitInt32(Addr);
break;
}
@@ -1908,7 +1945,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
const MCSection *ASection,
const MCSymbol *ASectionSym) {
Asm->OutStreamer.SwitchSection(USection);
- for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
CompileUnit *TheCU = *I;
DIE *Die = TheCU->getCUDie();
@@ -1940,6 +1977,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
}
}
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -2324,7 +2374,7 @@ void DwarfDebug::emitDebugLoc() {
if (DotDebugLocEntries.empty())
return;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I) {
DotDebugLocEntry &Entry = *I;
@@ -2338,7 +2388,7 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getDataLayout().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
DotDebugLocEntry &Entry = *I;
@@ -2431,7 +2481,7 @@ void DwarfDebug::emitDebugRanges() {
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
unsigned char Size = Asm->getDataLayout().getPointerSize();
- for (SmallVector<const MCSymbol *, 8>::iterator
+ for (SmallVectorImpl<const MCSymbol *>::iterator
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
if (*I)
@@ -2489,13 +2539,13 @@ void DwarfDebug::emitDebugInlineInfo() {
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
- for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
E = InlinedSPNodes.end(); I != E; ++I) {
const MDNode *Node = *I;
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
= InlineInfo.find(Node);
- SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+ SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
DISubprogram SP(Node);
StringRef LName = SP.getLinkageName();
StringRef Name = SP.getName();
@@ -2514,7 +2564,7 @@ void DwarfDebug::emitDebugInlineInfo() {
DwarfStrSectionSym);
Asm->EmitULEB128(Labels.size(), "Inline count");
- for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+ for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
LE = Labels.end(); LI != LE; ++LI) {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(LI->second->getOffset());
@@ -2549,9 +2599,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
// This should be a unique identifier when we want to build .dwp files.
NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
- // FIXME: The addr base should be relative for each compile unit, however,
- // this one is going to be 0 anyhow.
- NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+ // Relocate to the beginning of the addr_base section, else 0 for the
+ // beginning of the one for this compile unit.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
+ DwarfAddrSectionSym);
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base,
+ dwarf::DW_FORM_sec_offset, 0);
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
// into an entity. We're using 0, or a NULL label for this.
@@ -2559,6 +2614,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
+ // FIXME: Should handle multiple compile units.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
DwarfLineSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 7b56815040..24f758dda9 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -274,6 +274,10 @@ public:
/// \brief Returns the address pool.
AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
};
/// \brief Collects and handles dwarf debug information.
@@ -305,7 +309,9 @@ class DwarfDebug {
// A list of all the unique abbreviations in use.
std::vector<DIEAbbrev *> Abbreviations;
- // Source id map, i.e. pair of source filename and directory,
+ // Stores the current file ID for a given compile unit.
+ DenseMap <unsigned, unsigned> FileIDCUMap;
+ // Source id map, i.e. CUID, source filename and directory,
// separated by a zero byte, mapped to a unique id.
StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
@@ -386,7 +392,7 @@ class DwarfDebug {
// section offsets and are created by EmitSectionLabels.
MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
- MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
@@ -427,6 +433,10 @@ class DwarfDebug {
// Holder for the skeleton information.
DwarfUnits SkeletonHolder;
+ typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
+ ImportedEntityMap;
+ ImportedEntityMap ScopesWithImportedEntities;
+
private:
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -549,6 +559,18 @@ private:
/// \brief Construct subprogram DIE.
void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N);
+
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+ DIE *Context);
+
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU,
+ const DIImportedModule &Module,
+ DIE *Context);
+
/// \brief Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
@@ -596,14 +618,6 @@ public:
DwarfDebug(AsmPrinter *A, Module *M);
~DwarfDebug();
- /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum
- /// and llvm.dbg.ty
- void collectInfoFromNamedMDNodes(const Module *M);
-
- /// \brief Collect debug info using DebugInfoFinder.
- /// FIXME - Remove this when DragonEgg switches to DIBuilder.
- bool collectLegacyDebugInfo(const Module *M);
-
/// \brief Emit all Dwarf sections that should come prior to the
/// content.
void beginModule();
@@ -626,7 +640,8 @@ public:
/// \brief Look up the source id with the given directory and source file
/// names. If none currently exists, create a new id and insert it in the
/// SourceIds map.
- unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName);
+ unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
+ unsigned CUID);
/// \brief Recursively Emits a debug information entry.
void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 0000000000..a8fb66dcf1
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGCPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+ MCStreamer &OS = AP.OutStreamer;
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ // Put this in a custom .note section.
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+ .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getDataRel()));
+
+ // For each function...
+ for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.EmitInt16(MD.size());
+
+ // And each safe point...
+ for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+ ++PI) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = PI->Label;
+ AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+ MD.getFunction().arg_size() - RegisteredArgs : 0;
+ OS.AddComment("stack arity");
+ AP.EmitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.EmitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index e8b5b4fe8d..4a99184f5e 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -85,7 +85,9 @@ public:
virtual unsigned getNumberOfRegisters(bool Vector) const;
virtual unsigned getMaximumUnrollFactor() const;
virtual unsigned getRegisterBitWidth(bool Vector) const;
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const;
virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -193,27 +195,34 @@ unsigned BasicTTI::getMaximumUnrollFactor() const {
return 1;
}
-unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const {
// Check if any of the operands are vector operands.
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+ // Assume that floating point arithmetic operations cost twice as much as
+ // integer operations.
+ unsigned OpCost = (IsFloat ? 2 : 1);
+
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that thre is some
+ // If the type is split to multiple registers, assume that there is some
// overhead to this.
// TODO: Once we have extract/insert subvector cost we need to use them.
if (LT.first > 1)
- return LT.first * 2;
- return LT.first * 1;
+ return LT.first * 2 * OpCost;
+ return LT.first * 1 * OpCost;
}
if (!TLI->isOperationExpand(ISD, LT.second)) {
// If the operation is custom lowered then assume
// thare the code is twice as expensive.
- return LT.first * 2;
+ return LT.first * 2 * OpCost;
}
// Else, assume that we need to scalarize this op.
@@ -226,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
}
// We don't know anything about this scalar instruction.
- return 1;
+ return OpCost;
}
unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
@@ -379,22 +388,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) const {
- // assume that we need to scalarize this intrinsic.
- unsigned ScalarizationCost = 0;
- unsigned ScalarCalls = 1;
- if (RetTy->isVectorTy()) {
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ unsigned ISD = 0;
+ switch (IID) {
+ default: {
+ // Assume that we need to scalarize this intrinsic.
+ unsigned ScalarizationCost = 0;
+ unsigned ScalarCalls = 1;
+ if (RetTy->isVectorTy()) {
+ ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
}
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+ if (Tys[i]->isVectorTy()) {
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ }
+
+ return ScalarCalls + ScalarizationCost;
+ }
+ // Look for intrinsics that can be lowered directly or turned into a scalar
+ // intrinsic call.
+ case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
+ case Intrinsic::sin: ISD = ISD::FSIN; break;
+ case Intrinsic::cos: ISD = ISD::FCOS; break;
+ case Intrinsic::exp: ISD = ISD::FEXP; break;
+ case Intrinsic::exp2: ISD = ISD::FEXP2; break;
+ case Intrinsic::log: ISD = ISD::FLOG; break;
+ case Intrinsic::log10: ISD = ISD::FLOG10; break;
+ case Intrinsic::log2: ISD = ISD::FLOG2; break;
+ case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::floor: ISD = ISD::FFLOOR; break;
+ case Intrinsic::ceil: ISD = ISD::FCEIL; break;
+ case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
+ case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::pow: ISD = ISD::FPOW; break;
+ case Intrinsic::fma: ISD = ISD::FMA; break;
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ }
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
}
- return ScalarCalls + ScalarizationCost;
+
+ // Else, assume that we need to scalarize this intrinsic. For math builtins
+ // this will emit a costly libcall, adding call overhead and spills. Make it
+ // very expensive.
+ if (RetTy->isVectorTy()) {
+ unsigned Num = RetTy->getVectorNumElements();
+ unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+ Tys);
+ return 10 * Cost * Num;
+ }
+
+ // This is going to be turned into a library call, make it expensive.
+ return 10;
}
unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ddc7adab49..56aa3309d3 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -7,13 +7,13 @@ add_llvm_library(LLVMCodeGen
CalcSpillWeights.cpp
CallingConvLower.cpp
CodeGen.cpp
- CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DFAPacketizer.cpp
DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
EarlyIfConversion.cpp
EdgeBundles.cpp
+ ErlangGC.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index dee339a458..38ae17d231 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
float totalWeight = 0;
SmallPtrSet<MachineInstr*, 8> visited;
- // Find the best physreg hist and the best virtreg hint.
+ // Find the best physreg hint and the best virtreg hint.
float bestPhys = 0, bestVirt = 0;
unsigned hintPhys = 0, hintVirt = 0;
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index c897f3e391..d4cc1a8654 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
// No stack is used.
StackOffset = 0;
- clearFirstByValReg();
+ clearByValRegsInfo();
clearHasByValInRegPosition(); // @LOCALMOD.
UsedRegs.resize((TRI.getNumRegs()+31)/32);
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a33b672044..c641991d40 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
@@ -22,7 +23,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCalculateSpillWeightsPass(Registry);
- initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
deleted file mode 100644
index 24518443a7..0000000000
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass that optimizes code placement and aligns loop
-// headers to target-specific alignment boundaries.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-STATISTIC(NumLoopsAligned, "Number of loops aligned");
-STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
-STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
-
-namespace {
- class CodePlacementOpt : public MachineFunctionPass {
- const MachineLoopInfo *MLI;
- const TargetInstrInfo *TII;
- const TargetLowering *TLI;
-
- public:
- static char ID;
- CodePlacementOpt() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineLoopInfo>();
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- bool HasFallthrough(MachineBasicBlock *MBB);
- bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
- void Splice(MachineFunction &MF,
- MachineFunction::iterator InsertPt,
- MachineFunction::iterator Begin,
- MachineFunction::iterator End);
- bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
- MachineLoop *L);
- bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
- MachineLoop *L);
- bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
- bool OptimizeIntraLoopEdges(MachineFunction &MF);
- bool AlignLoops(MachineFunction &MF);
- bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
- };
-
- char CodePlacementOpt::ID = 0;
-} // end anonymous namespace
-
-char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
-INITIALIZE_PASS(CodePlacementOpt, "code-placement",
- "Code Placement Optimizer", false, false)
-
-/// HasFallthrough - Test whether the given branch has a fallthrough, either as
-/// a plain fallthrough or as a fallthrough case of a conditional branch.
-///
-bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
- return false;
- // This conditional branch has no fallthrough.
- if (FBB)
- return false;
- // An unconditional branch has no fallthrough.
- if (Cond.empty() && TBB)
- return false;
- // It has a fallthrough.
- return true;
-}
-
-/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
-/// This is called before major changes are begun to test whether it will be
-/// possible to complete the changes.
-///
-/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
-/// whenever possible.
-///
-bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
- // Conservatively ignore EH landing pads.
- if (MBB->isLandingPad()) return false;
-
- // Aggressively handle return blocks and similar constructs.
- if (MBB->succ_empty()) return true;
-
- // Ask the target's AnalyzeBranch if it can handle this block.
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- // Make sure the terminator is understood.
- if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
- return false;
- // Ignore blocks which look like they might have EH-related control flow.
- // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
- // recognize the possibility of a control transfer through an unwind.
- // Such blocks contain EH_LABEL instructions, however they may be in the
- // middle of the block. Instead of searching for them, just check to see
- // if the CFG disagrees with AnalyzeBranch.
- if (1u + !Cond.empty() != MBB->succ_size())
- return false;
- // Make sure we have the option of reversing the condition.
- if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
- return false;
- return true;
-}
-
-/// Splice - Move the sequence of instructions [Begin,End) to just before
-/// InsertPt. Update branch instructions as needed to account for broken
-/// fallthrough edges and to take advantage of newly exposed fallthrough
-/// opportunities.
-///
-void CodePlacementOpt::Splice(MachineFunction &MF,
- MachineFunction::iterator InsertPt,
- MachineFunction::iterator Begin,
- MachineFunction::iterator End) {
- assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
- "Splice can't change the entry block!");
- MachineFunction::iterator OldBeginPrior = prior(Begin);
- MachineFunction::iterator OldEndPrior = prior(End);
-
- MF.splice(InsertPt, Begin, End);
-
- prior(Begin)->updateTerminator();
- OldBeginPrior->updateTerminator();
- OldEndPrior->updateTerminator();
-}
-
-/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
-/// to the loop top to the top of the loop so that they have a fall through.
-/// This can introduce a branch on entry to the loop, but it can eliminate a
-/// branch within the loop. See the @simple case in
-/// test/CodeGen/X86/loop_blocks.ll for an example of this.
-bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
- MachineBasicBlock *TopMBB = L->getTopBlock();
-
- bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
-
- if (TopMBB == MF.begin() ||
- HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
- new_top:
- for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
- PE = TopMBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
- if (Pred == TopMBB) continue;
- if (HasFallthrough(Pred)) continue;
- if (!L->contains(Pred)) continue;
-
- // Verify that we can analyze all the loop entry edges before beginning
- // any changes which will require us to be able to analyze them.
- if (Pred == MF.begin())
- continue;
- if (!HasAnalyzableTerminator(Pred))
- continue;
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
- continue;
-
- // Move the block.
- DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
- << " to top of loop.\n");
- Changed = true;
-
- // Move it and all the blocks that can reach it via fallthrough edges
- // exclusively, to keep existing fallthrough edges intact.
- MachineFunction::iterator Begin = Pred;
- MachineFunction::iterator End = llvm::next(Begin);
- while (Begin != MF.begin()) {
- MachineFunction::iterator Prior = prior(Begin);
- if (Prior == MF.begin())
- break;
- // Stop when a non-fallthrough edge is found.
- if (!HasFallthrough(Prior))
- break;
- // Stop if a block which could fall-through out of the loop is found.
- if (Prior->isSuccessor(End))
- break;
- // If we've reached the top, stop scanning.
- if (Prior == MachineFunction::iterator(TopMBB)) {
- // We know top currently has a fall through (because we just checked
- // it) which would be lost if we do the transformation, so it isn't
- // worthwhile to do the transformation unless it would expose a new
- // fallthrough edge.
- if (!Prior->isSuccessor(End))
- goto next_pred;
- // Otherwise we can stop scanning and proceed to move the blocks.
- break;
- }
- // If we hit a switch or something complicated, don't move anything
- // for this predecessor.
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
- break;
- // Ok, the block prior to Begin will be moved along with the rest.
- // Extend the range to include it.
- Begin = Prior;
- ++NumIntraMoved;
- }
-
- // Move the blocks.
- Splice(MF, TopMBB, Begin, End);
-
- // Update TopMBB.
- TopMBB = L->getTopBlock();
-
- // We have a new loop top. Iterate on it. We shouldn't have to do this
- // too many times if BranchFolding has done a reasonable job.
- goto new_top;
- next_pred:;
- }
- }
-
- // If the loop previously didn't exit with a fall-through and it now does,
- // we eliminated a branch.
- if (Changed &&
- !BotHasFallthrough &&
- HasFallthrough(L->getBottomBlock())) {
- ++NumIntraElim;
- }
-
- return Changed;
-}
-
-/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
-/// portion of the loop contiguous with the header. This usually makes the loop
-/// contiguous, provided that AnalyzeBranch can handle all the relevant
-/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
-/// for an example of this.
-bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
- MachineBasicBlock *TopMBB = L->getTopBlock();
- MachineBasicBlock *BotMBB = L->getBottomBlock();
-
- // Determine a position to move orphaned loop blocks to. If TopMBB is not
- // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
- // to the top of the loop to avoid losing that fallthrough. Otherwise append
- // them to the bottom, even if it previously had a fallthrough, on the theory
- // that it's worth an extra branch to keep the loop contiguous.
- MachineFunction::iterator InsertPt =
- llvm::next(MachineFunction::iterator(BotMBB));
- bool InsertAtTop = false;
- if (TopMBB != MF.begin() &&
- !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
- HasFallthrough(BotMBB)) {
- InsertPt = TopMBB;
- InsertAtTop = true;
- }
-
- // Keep a record of which blocks are in the portion of the loop contiguous
- // with the loop header.
- SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
- for (MachineFunction::iterator I = TopMBB,
- E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
- ContiguousBlocks.insert(I);
-
- // Find non-contigous blocks and fix them.
- if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
- for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- MachineBasicBlock *BB = *BI;
-
- // Verify that we can analyze all the loop entry edges before beginning
- // any changes which will require us to be able to analyze them.
- if (!HasAnalyzableTerminator(BB))
- continue;
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
- continue;
-
- // If the layout predecessor is part of the loop, this block will be
- // processed along with it. This keeps them in their relative order.
- if (BB != MF.begin() &&
- L->contains(prior(MachineFunction::iterator(BB))))
- continue;
-
- // Check to see if this block is already contiguous with the main
- // portion of the loop.
- if (!ContiguousBlocks.insert(BB))
- continue;
-
- // Move the block.
- DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
- << " to be contiguous with loop.\n");
- Changed = true;
-
- // Process this block and all loop blocks contiguous with it, to keep
- // them in their relative order.
- MachineFunction::iterator Begin = BB;
- MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
- for (; End != MF.end(); ++End) {
- if (!L->contains(End)) break;
- if (!HasAnalyzableTerminator(End)) break;
- ContiguousBlocks.insert(End);
- ++NumIntraMoved;
- }
-
- // If we're inserting at the bottom of the loop, and the code we're
- // moving originally had fall-through successors, bring the sucessors
- // up with the loop blocks to preserve the fall-through edges.
- if (!InsertAtTop)
- for (; End != MF.end(); ++End) {
- if (L->contains(End)) break;
- if (!HasAnalyzableTerminator(End)) break;
- if (!HasFallthrough(prior(End))) break;
- }
-
- // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
- // we don't need them anymore at this point.
- Splice(MF, InsertPt, Begin, End);
- }
-
- return Changed;
-}
-
-/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-/// This code takes the approach of making minor changes to the existing
-/// layout to fix specific loop-oriented problems. Also, it depends on
-/// AnalyzeBranch, which can't understand complex control instructions.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
-
- // Do optimization for nested loops.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
- // Do optimization for this loop.
- Changed |= EliminateUnconditionalJumpsToTop(MF, L);
- Changed |= MoveDiscontiguousLoopBlocks(MF, L);
-
- return Changed;
-}
-
-/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
- bool Changed = false;
-
- if (!TLI->shouldOptimizeCodePlacement())
- return Changed;
-
- // Do optimization for each loop in the function.
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I)
- if (!(*I)->getParentLoop())
- Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
- return Changed;
-}
-
-/// AlignLoops - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
- const Function *F = MF.getFunction();
- if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
- return false;
-
- unsigned Align = TLI->getPrefLoopAlignment();
- if (!Align)
- return false; // Don't care about loop alignment.
-
- bool Changed = false;
-
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I)
- Changed |= AlignLoop(MF, *I, Align);
-
- return Changed;
-}
-
-/// AlignLoop - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
- unsigned Align) {
- bool Changed = false;
-
- // Do alignment for nested loops.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= AlignLoop(MF, *I, Align);
-
- L->getTopBlock()->setAlignment(Align);
- Changed = true;
- ++NumLoopsAligned;
-
- return Changed;
-}
-
-bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
- MLI = &getAnalysis<MachineLoopInfo>();
- if (MLI->empty())
- return false; // No loops.
-
- TLI = MF.getTarget().getTargetLowering();
- TII = MF.getTarget().getInstrInfo();
-
- bool Changed = OptimizeIntraLoopEdges(MF);
-
- Changed |= AlignLoops(MF);
-
- return Changed;
-}
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index fac207e3b3..5447df09cb 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
DEBUG(dbgs() << "If-converting " << *PI.PHI);
- assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
unsigned DstReg = PI.PHI->getOperand(0).getReg();
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
@@ -593,6 +592,7 @@ public:
EarlyIfConverter() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const;
bool runOnMachineFunction(MachineFunction &MF);
+ const char *getPassName() const { return "Early If-Conversion"; }
private:
bool tryConvertIf(MachineBasicBlock*);
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 0000000000..8a1e2d9c99
--- /dev/null
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGC : public GCStrategy {
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+ public:
+ ErlangGC();
+ bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+ };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+
+ if (MI->getDesc().isCall()) {
+
+ // Do not treat tail call sites as safe points.
+ if (MI->getDesc().isTerminator())
+ continue;
+
+ /* Code copied from VisitCallPoint(...) */
+ MachineBasicBlock::iterator RAI = MI; ++RAI;
+ MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+ FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 9958d7daad..8264d6dbab 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
return false;
}
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
if (Kind == ICSimpleFalse)
if (TII->ReverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
@@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB);
} else {
PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
@@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
return false;
}
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
if (TII->ReverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
@@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// block. By not merging them, we make it possible to iteratively
// ifcvt the blocks.
if (!HasEarlyExit &&
- NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
+ !NextBBI->BB->hasAddressTaken()) {
MergeBlocks(BBI, *NextBBI);
FalseBBDead = true;
} else {
@@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return false;
}
+ if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+ // Conservatively abort if-conversion if either BB has its address taken.
+ return false;
+
// Put the predicated instructions from the 'true' block before the
// instructions from the 'false' block, unless the true block would clobber
// the predicate, in which case, do the opposite.
@@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// tail, add a unconditional branch to it.
if (TailBB) {
BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
- bool CanMergeTail = !TailBBI.HasFallThrough;
+ bool CanMergeTail = !TailBBI.HasFallThrough &&
+ !TailBBI.BB->hasAddressTaken();
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
unsigned NumPreds = TailBB->pred_size();
@@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
/// i.e., when FromBBI's branch is being moved, add those successor edges to
/// ToBBI.
void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ assert(!FromBBI.BB->hasAddressTaken() &&
+ "Removing a BB whose address is taken!");
+
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index c6d1a18dbd..35295fe858 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() {
Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
// Get rid of deleted and empty intervals.
- for (unsigned i = RegsToSpill.size(); i != 0; --i) {
- unsigned Reg = RegsToSpill[i-1];
- if (!LIS.hasInterval(Reg)) {
- RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ unsigned ResultPos = 0;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ if (!LIS.hasInterval(Reg))
continue;
- }
+
LiveInterval &LI = LIS.getInterval(Reg);
- if (!LI.empty())
+ if (LI.empty()) {
+ Edit->eraseVirtReg(Reg);
continue;
- Edit->eraseVirtReg(Reg);
- RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+
+ RegsToSpill[ResultPos++] = Reg;
}
+ RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 07f0ccf52f..d894f664dc 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
break;
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Just drop the annotation, but forward the value
+ CI->replaceAllUsesWith(CI->getOperand(0));
+ break;
+
case Intrinsic::var_annotation:
break; // Strip out annotate intrinsic
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 22b35d5271..f1b8394811 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -972,9 +972,9 @@ private:
// Return the last use of reg between NewIdx and OldIdx.
SlotIndex findLastUseBefore(unsigned Reg) {
- SlotIndex LastUse = NewIdx;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SlotIndex LastUse = NewIdx;
for (MachineRegisterInfo::use_nodbg_iterator
UI = MRI.use_nodbg_begin(Reg),
UE = MRI.use_nodbg_end();
@@ -984,30 +984,42 @@ private:
if (InstSlot > LastUse && InstSlot < OldIdx)
LastUse = InstSlot;
}
- } else {
- MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
- MachineBasicBlock::iterator MII(MI);
- ++MII;
- MachineBasicBlock* MBB = MI->getParent();
- for (; MII != MBB->end(); ++MII){
- if (MII->isDebugValue())
- continue;
- if (LIS.getInstructionIndex(MII) < OldIdx)
- break;
- for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
- MOE = MII->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& mop = *MOI;
- if (!mop.isReg() || mop.getReg() == 0 ||
- TargetRegisterInfo::isVirtualRegister(mop.getReg()))
- continue;
-
- if (TRI.hasRegUnit(mop.getReg(), Reg))
- LastUse = LIS.getInstructionIndex(MII);
- }
- }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(NewIdx < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugValue())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(MII);
+
+ // Stop searching when NewIdx is reached.
+ if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
+ return NewIdx;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
+ if (MO->isReg() &&
+ TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx;
}
- return LastUse;
+ // Didn't reach NewIdx. It must be the first instruction in the block.
+ return NewIdx;
}
};
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 3b28e6afb6..7793e96c35 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
/// OrigIdx are also available with the same value at UseIdx.
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
- SlotIndex UseIdx) {
+ SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 352ef94259..26a117652b 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -46,13 +46,16 @@ namespace {
class FrameRef {
MachineBasicBlock::iterator MI; // Instr referencing the frame
int64_t LocalOffset; // Local offset of the frame idx referenced
+ int FrameIdx; // The frame index
public:
- FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
- MI(I), LocalOffset(Offset) {}
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
+ MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
bool operator<(const FrameRef &RHS) const {
return LocalOffset < RHS.LocalOffset;
}
- MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+ int64_t getLocalOffset() const { return LocalOffset; }
+ int getFrameIndex() const { return FrameIdx; }
};
class LocalStackSlotPass: public MachineFunctionPass {
@@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
static inline bool
-lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
- std::pair<unsigned, int64_t> &RegOffset,
+lookupCandidateBaseReg(int64_t BaseOffset,
int64_t FrameSizeAdjust,
int64_t LocalFrameOffset,
const MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- unsigned e = Regs.size();
- for (unsigned i = 0; i < e; ++i) {
- RegOffset = Regs[i];
- // Check if the relative offset from the where the base register references
- // to the target address is in range for the instruction.
- int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
- if (TRI->isFrameOffsetLegal(MI, Offset))
- return true;
- }
- return false;
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+ return TRI->isFrameOffsetLegal(MI, Offset);
}
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
- // A base register definition is a register + offset pair.
- SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
-
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
@@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Don't try this with values not in the local block.
if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
break;
+ int Idx = MI->getOperand(i).getIndex();
+ int64_t LocalOffset = LocalOffsets[Idx];
+ if (!TRI->needsFrameBaseReg(MI, LocalOffset))
+ break;
FrameReferenceInsns.
- push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ push_back(FrameRef(MI, LocalOffset, Idx));
break;
}
}
@@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineBasicBlock *Entry = Fn.begin();
+ unsigned BaseReg = 0;
+ int64_t BaseOffset = 0;
+
// Loop through the frame references and allocate for them as necessary.
for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
- MachineBasicBlock::iterator I =
- FrameReferenceInsns[ref].getMachineInstr();
+ FrameRef &FR = FrameReferenceInsns[ref];
+ MachineBasicBlock::iterator I = FR.getMachineInstr();
MachineInstr *MI = I;
- for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
- // Consider replacing all frame index operands that reference
- // an object allocated in the local block.
- if (MI->getOperand(idx).isFI()) {
- int FrameIdx = MI->getOperand(idx).getIndex();
-
- assert(MFI->isObjectPreAllocated(FrameIdx) &&
- "Only pre-allocated locals expected!");
-
- DEBUG(dbgs() << "Considering: " << *MI);
- if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
- unsigned BaseReg = 0;
- int64_t Offset = 0;
- int64_t FrameSizeAdjust =
- StackGrowsDown ? MFI->getLocalFrameSize() : 0;
-
- DEBUG(dbgs() << " Replacing FI in: " << *MI);
-
- // If we have a suitable base register available, use it; otherwise
- // create a new one. Note that any offset encoded in the
- // instruction itself will be taken into account by the target,
- // so we don't have to adjust for it here when reusing a base
- // register.
- std::pair<unsigned, int64_t> RegOffset;
- if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
- FrameSizeAdjust,
- LocalOffsets[FrameIdx],
- MI, TRI)) {
- DEBUG(dbgs() << " Reusing base register " <<
- RegOffset.first << "\n");
- // We found a register to reuse.
- BaseReg = RegOffset.first;
- Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
- RegOffset.second;
- } else {
- // No previously defined register was in range, so create a
- // new one.
- int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
- const MachineFunction *MF = MI->getParent()->getParent();
- const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
- BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
- DEBUG(dbgs() << " Materializing base register " << BaseReg <<
- " at frame local offset " <<
- LocalOffsets[FrameIdx] + InstrOffset << "\n");
-
- // Tell the target to insert the instruction to initialize
- // the base register.
- // MachineBasicBlock::iterator InsertionPt = Entry->begin();
- TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
- InstrOffset);
-
- // The base register already includes any offset specified
- // by the instruction, so account for that so it doesn't get
- // applied twice.
- Offset = -InstrOffset;
-
- int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
- InstrOffset;
- BaseRegisters.push_back(
- std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
- ++NumBaseRegisters;
- UsedBaseReg = true;
- }
- assert(BaseReg != 0 && "Unable to allocate virtual base register!");
-
- // Modify the instruction to use the new base register rather
- // than the frame index operand.
- TRI->resolveFrameIndex(I, BaseReg, Offset);
- DEBUG(dbgs() << "Resolved: " << *MI);
-
- ++NumReplacements;
- }
+ int64_t LocalOffset = FR.getLocalOffset();
+ int FrameIdx = FR.getFrameIndex();
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+
+ unsigned idx = 0;
+ for (unsigned f = MI->getNumOperands(); idx != f; ++idx) {
+ if (!MI->getOperand(idx).isFI())
+ continue;
+
+ if (FrameIdx == I->getOperand(idx).getIndex())
+ break;
+ }
+
+ assert(idx < MI->getNumOperands() && "Cannot find FI operand");
+
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+ LocalOffset, MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
+ // We found a register to reuse.
+ Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+ } else {
+ // No previously defined register was in range, so create a // new one.
+
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+
+ int64_t PrevBaseOffset = BaseOffset;
+ BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+ // We'd like to avoid creating single-use virtual base registers.
+ // Because the FrameRefs are in sorted order, and we've already
+ // processed all FrameRefs before this one, just check whether or not
+ // the next FrameRef will be able to reuse this new register. If not,
+ // then don't bother creating it.
+ bool CanReuse = false;
+ for (int refn = ref + 1; refn < e; ++refn) {
+ FrameRef &FRN = FrameReferenceInsns[refn];
+ MachineBasicBlock::iterator J = FRN.getMachineInstr();
+ MachineInstr *MIN = J;
+
+ CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+ FRN.getLocalOffset(), MIN, TRI);
+ break;
}
+
+ if (!CanReuse) {
+ BaseOffset = PrevBaseOffset;
+ continue;
+ }
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " << LocalOffset + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
}
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
}
+
return UsedBaseReg;
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index fecd1adf2b..71a377df09 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
: BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
- AddressTaken(false) {
+ AddressTaken(false), CachedMCSymbol(NULL) {
Insts.Parent = this;
}
@@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() {
/// getSymbol - Return the MCSymbol for this basic block.
///
MCSymbol *MachineBasicBlock::getSymbol() const {
- const MachineFunction *MF = getParent();
- MCContext &Ctx = MF->getContext();
- const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
- return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
- Twine(MF->getFunctionNumber()) + "_" +
- Twine(getNumber()));
+ if (!CachedMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+ CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+
+ return CachedMCSymbol;
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 3b09c6b779..bfba503b35 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq,
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks in the function."),
+ cl::init(0), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1061,7 +1067,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
// Align this block if the layout predecessor's edge into this block is
- // cold relative to the block. When this is true, othe predecessors make up
+ // cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
@@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
BlockToChain.clear();
ChainAllocator.DestroyAll();
+ if (AlignAllBlock)
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ FI->setAlignment(AlignAllBlock);
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
return true;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5e04f2d8a3..04321f3292 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
return BV;
}
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 0ea9ae0fcc..8af9d053b1 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) {
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0) return;
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (const Function *F =
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index a777f52cb2..68372f6c90 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -15,6 +15,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/raw_os_ostream.h"
+
using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
@@ -37,6 +39,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
VRegInfo[Reg].first = RC;
}
@@ -105,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
- "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (!VRegInfo[Reg].second)
+ continue;
+ verifyUseList(Reg);
+ llvm_unreachable("Remaining virtual register operands");
+ }
#endif
VRegInfo.clear();
}
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+ bool Valid = true;
+ for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+ MachineOperand *MO = &I.getOperand();
+ MachineInstr *MI = MO->getParent();
+ if (!MI) {
+ errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+ << " has no parent instruction.\n";
+ Valid = false;
+ }
+ MachineOperand *MO0 = &MI->getOperand(0);
+ unsigned NumOps = MI->getNumOperands();
+ if (!(MO >= MO0 && MO < MO0+NumOps)) {
+ errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+ << " doesn't belong to parent MI: " << *MI;
+ Valid = false;
+ }
+ if (!MO->isReg()) {
+ errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+ << " is not a register\n";
+ Valid = false;
+ }
+ if (MO->getReg() != Reg) {
+ errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+ << *MO << " is the wrong register\n";
+ Valid = false;
+ }
+ }
+ assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+ verifyUseList(i);
+#endif
+}
+
/// Add MO to the linked list of operands for its register.
void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
assert(!MO->isOnRegUseList() && "Already on list");
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 589fa1fa02..fff6b2b4c0 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDFS.h"
@@ -49,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
-// Experimental heuristics
+// FIXME: remove this flag after initial testing. It should always be a good
+// thing.
+static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
+ cl::desc("Constrain vreg copies."), cl::init(true));
+
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -57,6 +63,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -197,6 +206,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
LIS = &getAnalysis<LiveIntervals>();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ if (VerifyScheduling) {
+ DEBUG(LIS->print(dbgs()));
+ MF->verify(this, "Before machine scheduling.");
+ }
RegClassInfo->runOnMachineFunction(*MF);
// Select the scheduler, or set the default.
@@ -285,6 +298,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
Scheduler->finalizeSchedule();
DEBUG(LIS->print(dbgs()));
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
return true;
}
@@ -294,7 +309,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ReadyQueue::dump() {
- dbgs() << Name << ": ";
+ dbgs() << " " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
@@ -312,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() {
delete SchedImpl;
}
+bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
if (SuccSU != &ExitSU) {
// Do not use WillCreateCycle, it assumes SD scheduling.
@@ -393,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
}
}
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
// Advance RegionBegin if the first instruction moves down.
@@ -494,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
if ((int)NewMaxPressure[ID] > MaxUnits)
MaxUnits = NewMaxPressure[ID];
}
+ DEBUG(
+ for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (NewMaxPressure[i] > Limit ) {
+ dbgs() << " " << TRI->getRegPressureSetName(i) << ": "
+ << NewMaxPressure[i] << " > " << Limit << "\n";
+ }
+ });
}
/// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -894,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
}
//===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+ // Transient state.
+ SlotIndex RegionBeginIdx;
+ // RegionEndIdx is the slot index of the last non-debug instruction in the
+ // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+ SlotIndex RegionEndIdx;
+public:
+ CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+
+protected:
+ void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0: = dst
+/// I1: src = ...
+/// I2: = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1: = dst
+/// I2: src = ...
+/// I3: = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineInstr *Copy = CopySU->getInstr();
+
+ // Check for pure vreg copies.
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return;
+
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return;
+
+ // Check if either the dest or source is local. If it's live across a back
+ // edge, it's not local. Note that if both vregs are live across the back
+ // edge, we cannot successfully contrain the copy without cyclic scheduling.
+ unsigned LocalReg = DstReg;
+ unsigned GlobalReg = SrcReg;
+ LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+ LocalReg = SrcReg;
+ GlobalReg = DstReg;
+ LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+ return;
+ }
+ LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+ // Find the global segment after the start of the local LI.
+ LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+ // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+ // local live range. We could create edges from other global uses to the local
+ // start, but the coalescer should have already eliminated these cases, so
+ // don't bother dealing with it.
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // If GlobalSegment is killed at the LocalLI->start, the call to find()
+ // returned the next global segment. But if GlobalSegment overlaps with
+ // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+ // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+ if (GlobalSegment->contains(LocalLI->beginIndex()))
+ ++GlobalSegment;
+
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+ if (GlobalSegment != GlobalLI->begin()) {
+ // Two address defs have no hole.
+ if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
+ GlobalSegment->start)) {
+ return;
+ }
+ // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+ // it would be a disconnected component in the live range.
+ assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
+ "Disconnected LRG within the scheduling region.");
+ }
+ MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+ if (!GlobalDef)
+ return;
+
+ SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+ if (!GlobalSU)
+ return;
+
+ // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+ // constraining the uses of the last local def to precede GlobalDef.
+ SmallVector<SUnit*,8> LocalUses;
+ const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+ MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+ SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+ for (SUnit::const_succ_iterator
+ I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ continue;
+ if (I->getSUnit() == GlobalSU)
+ continue;
+ if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ return;
+ LocalUses.push_back(I->getSUnit());
+ }
+ // Open the top of the GlobalLI hole by constraining any earlier global uses
+ // to precede the start of LocalLI.
+ SmallVector<SUnit*,8> GlobalUses;
+ MachineInstr *FirstLocalDef =
+ LIS->getInstructionFromIndex(LocalLI->beginIndex());
+ SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+ for (SUnit::const_pred_iterator
+ I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
+ if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ continue;
+ if (I->getSUnit() == FirstLocalSU)
+ continue;
+ if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ return;
+ GlobalUses.push_back(I->getSUnit());
+ }
+ DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+ // Add the weak edges.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ << GlobalSU->NodeNum << ")\n");
+ DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+ }
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ << FirstLocalSU->NodeNum << ")\n");
+ DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+ MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+ if (FirstPos == DAG->end())
+ return;
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+ RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+ &*priorNonDebug(DAG->end(), DAG->begin()));
+
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->isCopy())
+ continue;
+
+ constrainLocalCopy(SU, DAG);
+ }
+}
+
+//===----------------------------------------------------------------------===//
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
//===----------------------------------------------------------------------===//
@@ -905,7 +1112,7 @@ public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
- NoCand, SingleExcess, SingleCritical, Cluster,
+ NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
NodeOrder};
@@ -1180,8 +1387,10 @@ protected:
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
+ void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
#ifndef NDEBUG
- void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
+ void traceCandidate(const SchedCandidate &Cand);
#endif
};
} // namespace
@@ -1232,8 +1441,6 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
Top.init(DAG, SchedModel, &Rem);
Bot.init(DAG, SchedModel, &Rem);
- DAG->computeDFSResult();
-
// Initialize resource counts.
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
@@ -1330,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
I != E; ++I) {
unsigned L = getUnscheduledLatency(*I);
+ DEBUG(dbgs() << " " << Available.getName()
+ << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
if (L > RemLatency)
RemLatency = L;
}
@@ -1340,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
RemLatency = L;
}
unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ DEBUG(dbgs() << " " << Available.getName()
+ << " ExpectedLatency " << ExpectedLatency
+ << " CP Limit " << CriticalPathLimit << '\n');
if (RemLatency + ExpectedLatency >= CriticalPathLimit
&& RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
Policy.ReduceLatency = true;
- DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+ DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n');
}
}
@@ -1392,8 +1604,8 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
CheckPending = true;
IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
- DEBUG(dbgs() << " *** " << Available.getName() << " cycle "
- << CurrCycle << '\n');
+ DEBUG(dbgs() << " " << Available.getName()
+ << " Cycle: " << CurrCycle << '\n');
}
/// Add the given processor resource to this scheduled zone.
@@ -1560,7 +1772,8 @@ void ConvergingScheduler::balanceZones(
if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
> (int)SchedModel->getLatencyFactor()) {
CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+ DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName()
+ << " reduce "
<< SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
<< '\n');
}
@@ -1571,7 +1784,8 @@ void ConvergingScheduler::balanceZones(
if ((int)(OppositeZone.ExpectedCount - OppositeCount)
> (int)SchedModel->getLatencyFactor()) {
OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+ DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName()
+ << " demand "
<< SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
<< '\n');
}
@@ -1595,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits(
if (Top.CritResIdx != Rem.CritResIdx) {
TopCand.Policy.ReduceResIdx = Top.CritResIdx;
BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
- DEBUG(dbgs() << "Reduce scheduled "
+ DEBUG(dbgs() << " Reduce scheduled "
<< SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
}
return;
@@ -1612,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits(
&& (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
TopCand.Policy.ReduceLatency = true;
BotCand.Policy.ReduceLatency = true;
- DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+ DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency
<< " + " << Bot.ExpectedLatency << '\n');
}
return;
@@ -1651,7 +1865,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
}
/// Return true if this heuristic determines order.
-static bool tryLess(unsigned TryVal, unsigned CandVal,
+static bool tryLess(int TryVal, int CandVal,
ConvergingScheduler::SchedCandidate &TryCand,
ConvergingScheduler::SchedCandidate &Cand,
ConvergingScheduler::CandReason Reason) {
@@ -1667,7 +1881,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal,
return false;
}
-static bool tryGreater(unsigned TryVal, unsigned CandVal,
+static bool tryGreater(int TryVal, int CandVal,
ConvergingScheduler::SchedCandidate &TryCand,
ConvergingScheduler::SchedCandidate &Cand,
ConvergingScheduler::CandReason Reason) {
@@ -1687,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
}
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->isCopy())
+ return 0;
+
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ return 0;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -1714,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
TryCand.Reason = NodeOrder;
return;
}
+
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+ biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ TryCand, Cand, PhysRegCopy))
+ return;
+
// Avoid exceeding the target's limit.
if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1740,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
TryCand, Cand, Cluster))
return;
- // Currently, weak edges are for clustering, so we hard-code that reason.
- // However, deferring the current TryCand will not change Cand's reason.
+
+ // Weak edges are for clustering and other constraints.
+ //
+ // Deferring TryCand here does not change Cand's reason. This is good in the
+ // sense that a bad candidate shouldn't affect a previous candidate's
+ // goodness, but bad in that it is assymetric and depends on queue order.
CandReason OrigReason = Cand.Reason;
if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
getWeakLeft(Cand.SU, Zone.isTop()),
- TryCand, Cand, Cluster)) {
+ TryCand, Cand, Weak)) {
Cand.Reason = OrigReason;
return;
}
@@ -1816,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
// Avoid increasing the max critical pressure in the scheduled region.
if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
- DEBUG(dbgs() << "RP excess top - bot: "
+ DEBUG(dbgs() << " RP excess top - bot: "
<< (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
}
// Avoid increasing the max critical pressure in the scheduled region.
if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
- DEBUG(dbgs() << "RP critical top - bot: "
+ DEBUG(dbgs() << " RP critical top - bot: "
<< (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
<< '\n');
return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
}
// Avoid increasing the max pressure of the entire region.
if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
- DEBUG(dbgs() << "RP current top - bot: "
+ DEBUG(dbgs() << " RP current top - bot: "
<< (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
<< '\n');
return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
@@ -1842,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr(
ConvergingScheduler::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
+ case PhysRegCopy: return "PREG-COPY";
case SingleExcess: return "REG-EXCESS";
case SingleCritical: return "REG-CRIT ";
case Cluster: return "CLUSTER ";
+ case Weak: return "WEAK ";
case SingleMax: return "REG-MAX ";
case MultiPressure: return "REG-MULTI ";
case ResourceReduce: return "RES-REDUCE";
@@ -1859,9 +2113,7 @@ const char *ConvergingScheduler::getReasonStr(
llvm_unreachable("Unknown reason!");
}
-void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
- const SchedBoundary &Zone) {
- const char *Label = getReasonStr(Cand.Reason);
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
PressureElement P;
unsigned ResIdx = 0;
unsigned Latency = 0;
@@ -1896,21 +2148,21 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
Latency = Cand.SU->getDepth();
break;
}
- dbgs() << Label << " " << Zone.Available.getName() << " ";
+ dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
if (P.isValid())
- dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
- << " ";
+ dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
+ << ":" << P.UnitIncrease << " ";
else
- dbgs() << " ";
+ dbgs() << " ";
if (ResIdx)
- dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
else
- dbgs() << " ";
+ dbgs() << " ";
if (Latency)
- dbgs() << Latency << " cycles ";
+ dbgs() << " " << Latency << " cycles ";
else
- dbgs() << " ";
- Cand.SU->dump(DAG);
+ dbgs() << " ";
+ dbgs() << '\n';
}
#endif
@@ -1939,15 +2191,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(DAG, SchedModel);
Cand.setBest(TryCand);
- DEBUG(traceCandidate(Cand, Zone));
+ DEBUG(traceCandidate(Cand));
}
}
}
static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
bool IsTop) {
- DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
- << " SU(" << Cand.SU->NodeNum << ") "
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
<< ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
}
@@ -1957,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
+ DEBUG(dbgs() << "Pick Top NOCAND\n");
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
+ DEBUG(dbgs() << "Pick Bot NOCAND\n");
return SU;
}
CandPolicy NoPolicy;
@@ -2058,24 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
- DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
- << " Scheduling Instruction in cycle "
- << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
- SU->dump(DAG));
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
return SU;
}
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+ MachineBasicBlock::iterator InsertPos = SU->getInstr();
+ if (!isTop)
+ ++InsertPos;
+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+ // Find already scheduled copies with a single physreg dependence and move
+ // them just above the scheduled instruction.
+ for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ continue;
+ SUnit *DepSU = I->getSUnit();
+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+ continue;
+ MachineInstr *Copy = DepSU->getInstr();
+ if (!Copy->isCopy())
+ continue;
+ DEBUG(dbgs() << " Rescheduling physreg copy ";
+ I->getSUnit()->dump(DAG));
+ DAG->moveInstruction(Copy, InsertPos);
+ }
+}
+
/// Update the scheduler's state after scheduling a node. This is the same node
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
/// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = Top.CurrCycle;
Top.bumpNode(SU);
+ if (SU->hasPhysRegUses)
+ reschedulePhysRegCopies(SU, true);
}
else {
SU->BotReadyCycle = Bot.CurrCycle;
Bot.bumpNode(SU);
+ if (SU->hasPhysRegDefs)
+ reschedulePhysRegCopies(SU, false);
}
}
@@ -2086,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
"-misched-topdown incompatible with -misched-bottomup");
ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
// Register DAG post-processors.
+ //
+ // FIXME: extend the mutation API to allow earlier mutations to instantiate
+ // data and pass it to later mutations. Have a single mutation that gathers
+ // the interesting nodes in one pass.
+ if (EnableCopyConstrain)
+ DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
if (EnableLoadCluster)
DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
@@ -2171,16 +2459,16 @@ public:
/// Callback to select the highest priority node from the ready Q.
virtual SUnit *pickNode(bool &IsTopNode) {
if (ReadyQ.empty()) return NULL;
- pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
IsTopNode = false;
- DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
- << *SU->getInstr()
+ DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
<< " ILP: " << DAG->getDFSResult()->getILP(SU)
<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
<< DAG->getDFSResult()->getSubtreeLevel(
- DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+ << "Scheduling " << *SU->getInstr());
return SU;
}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f77a7b17ed..00f702c846 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
SchedModel.init(*ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
return false;
}
@@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
return FBI;
// Compute resource usage in the block.
- // FIXME: Compute per-functional unit counts.
FBI->HasCalls = false;
unsigned InstrCount = 0;
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
const MachineInstr *MI = I;
@@ -96,11 +103,43 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
++InstrCount;
if (MI->isCall())
FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
}
FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
return FBI;
}
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+
//===----------------------------------------------------------------------===//
// Ensemble utility functions
//===----------------------------------------------------------------------===//
@@ -108,6 +147,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
: MTM(*ct) {
BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
}
// Virtual destructor serves as an anchor.
@@ -123,21 +165,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
void MachineTraceMetrics::Ensemble::
computeDepthResources(const MachineBasicBlock *MBB) {
TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
// Compute resources from trace above. The top block is simple.
if (!TBI->Pred) {
TBI->InstrDepth = 0;
TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
return;
}
// Compute from the block above. A post-order traversal ensures the
// predecessor is always computed first.
- TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
}
// Update resource-related information in the TraceBlockInfo for MBB.
@@ -145,22 +198,33 @@ computeDepthResources(const MachineBasicBlock *MBB) {
void MachineTraceMetrics::Ensemble::
computeHeightResources(const MachineBasicBlock *MBB) {
TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
// Compute resources for the current block.
TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
// The trace tail is done.
if (!TBI->Succ) {
TBI->Tail = MBB->getNumber();
+ std::copy(PRCycles.begin(), PRCycles.end(),
+ ProcResourceHeights.begin() + PROffset);
return;
}
// Compute from the block below. A post-order traversal ensures the
// predecessor is always computed first.
- TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
TBI->InstrHeight += SuccTBI->InstrHeight;
TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
}
// Check if depth resources for MBB are valid and return the TBI.
@@ -181,6 +245,35 @@ getHeightResources(const MachineBasicBlock *MBB) const {
return TBI->hasValidHeight() ? TBI : 0;
}
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
+ PRKinds);
+}
+
//===----------------------------------------------------------------------===//
// Trace Selection Strategies
//===----------------------------------------------------------------------===//
@@ -677,7 +770,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
- if (!DefTBI.isEarlierInSameTrace(TBI))
+ if (!DefTBI.isUsefulDominator(TBI))
continue;
unsigned Len = LIR.Height + Cycles[DefMI].Depth;
MaxLen = std::max(MaxLen, Len);
@@ -713,11 +806,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
SmallVector<DataDep, 8> Deps;
while (!Stack.empty()) {
MBB = Stack.pop_back_val();
- DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrDepths = true;
TBI.CriticalPath = 0;
+ // Print out resource depths here as well.
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
// Also compute the critical path length through MBB when possible.
if (TBI.HasValidInstrHeights)
TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
@@ -740,7 +846,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
const TraceBlockInfo&DepTBI =
BlockInfo[Dep.DefMI->getParent()->getNumber()];
// Ignore dependencies from outside the current trace.
- if (!DepTBI.isEarlierInSameTrace(TBI))
+ if (!DepTBI.isUsefulDominator(TBI))
continue;
assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
@@ -928,6 +1034,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
TBI.HasValidInstrHeights = true;
TBI.CriticalPath = 0;
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
// Get dependencies from PHIs in the trace successor.
const MachineBasicBlock *Succ = TBI.Succ;
// If MBB is the last block in the trace, and it has a back-edge to the
@@ -1058,27 +1176,66 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
}
unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
- // For now, we compute the resource depth from instruction count / issue
- // width. Eventually, we should compute resource depth per functional unit
- // and return the max.
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K]);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
unsigned Instrs = TBI.InstrDepth;
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
- return Instrs;
+ return std::max(Instrs, PRMax);
}
+
unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
+ ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (unsigned I = 0; I != Extrablocks.size(); ++I)
+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
+ const MCSchedClassDesc* SC = ExtraInstrs[I];
+ if (!SC->isValid())
+ continue;
+ for (TargetSchedModel::ProcResIter
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != K)
+ continue;
+ PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+ }
+ }
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
- return Instrs;
+ return std::max(Instrs, PRMax);
}
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 4b1230029a..037043f641 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() {
if (MInfo.Succs.size() != I->succ_size())
report("MBB has duplicate entries in its successor list.", I);
}
+
+ // Check that the register use lists are sane.
+ MRI->verifyUseLists();
}
// Does iterator point to a and b as the first two elements?
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index b79f9f9816..bfbc0623f9 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
@@ -40,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
cl::desc("Disable pre-register allocation tail duplication"));
static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
- cl::Hidden, cl::desc("Disable the probability-driven block placement, and "
- "re-enable the old code placement pass"));
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
- cl::desc("Disable code placement"));
static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
@@ -97,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
/// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+ bool Override) {
if (Override)
- return 0;
+ return IdentifyingPassPtr();
return PassID;
}
@@ -107,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
/// flags with ternary conditions. TargetID is passed through by default. The
/// pass is suppressed when the option is false. When the option is true, the
/// StandardID is selected if the target provides no default.
-static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
- AnalysisID StandardID) {
+static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
+ cl::boolOrDefault Override,
+ AnalysisID StandardID) {
switch (Override) {
case cl::BOU_UNSET:
return TargetID;
case cl::BOU_TRUE:
- if (TargetID)
+ if (TargetID.isValid())
return TargetID;
if (StandardID == 0)
report_fatal_error("Target cannot enable pass");
return StandardID;
case cl::BOU_FALSE:
- return 0;
+ return IdentifyingPassPtr();
}
llvm_unreachable("Invalid command line option state");
}
@@ -136,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
/// pass to run. This allows multiple options to control a single pass depending
/// on where in the pipeline that pass is added.
-static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
if (StandardID == &PostRASchedulerID)
return applyDisable(TargetID, DisablePostRA);
@@ -150,10 +149,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
return applyDisable(TargetID, DisableEarlyTailDup);
if (StandardID == &MachineBlockPlacementID)
- return applyDisable(TargetID, DisableCodePlace);
-
- if (StandardID == &CodePlacementOptID)
- return applyDisable(TargetID, DisableCodePlace);
+ return applyDisable(TargetID, DisableBlockPlacement);
if (StandardID == &StackSlotColoringID)
return applyDisable(TargetID, DisableSSC);
@@ -207,11 +203,11 @@ public:
// user interface. For example, a target may disable a standard pass by
// default by substituting a pass ID of zero, and the user may still enable
// that standard pass with an explicit command line option.
- DenseMap<AnalysisID,AnalysisID> TargetPasses;
+ DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
/// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
/// is inserted after each instance of the first one.
- SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+ SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
};
} // namespace llvm
@@ -246,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- AnalysisID InsertedPassID) {
- assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
- std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ IdentifyingPassPtr InsertedPassID) {
+ assert(((!InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getID()) ||
+ (InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+ "Insert a pass after itself!");
+ std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
Impl->InsertedPasses.push_back(P);
}
@@ -272,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
}
void TargetPassConfig::substitutePass(AnalysisID StandardID,
- AnalysisID TargetID) {
+ IdentifyingPassPtr TargetID) {
Impl->TargetPasses[StandardID] = TargetID;
}
-AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
- DenseMap<AnalysisID, AnalysisID>::const_iterator
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
I = Impl->TargetPasses.find(ID);
if (I == Impl->TargetPasses.end())
return ID;
@@ -310,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) {
/// Add a CodeGen pass at this point in the pipeline after checking for target
/// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
- AnalysisID TargetID = getPassSubstitution(PassID);
- AnalysisID FinalID = overridePass(PassID, TargetID);
- if (FinalID == 0)
- return FinalID;
-
- Pass *P = Pass::createPass(FinalID);
- if (!P)
- llvm_unreachable("Pass ID not registered");
- addPass(P);
+ IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+ IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+ if (!FinalPtr.isValid())
+ return 0;
+
+ Pass *P;
+ if (FinalPtr.isInstance())
+ P = FinalPtr.getInstance();
+ else {
+ P = Pass::createPass(FinalPtr.getID());
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ }
+ AnalysisID FinalID = P->getPassID();
+ addPass(P); // Ends the lifetime of P.
+
// Add the passes after the pass P if there is any.
- for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
I != E; ++I) {
if ((*I).first == PassID) {
- assert((*I).second && "Illegal Pass ID!");
- Pass *NP = Pass::createPass((*I).second);
- assert(NP && "Pass ID not registered");
+ assert((*I).second.isValid() && "Illegal Pass ID!");
+ Pass *NP;
+ if ((*I).second.isInstance())
+ NP = (*I).second.getInstance();
+ else {
+ NP = Pass::createPass((*I).second.getID());
+ assert(NP && "Pass ID not registered");
+ }
addPass(NP);
}
}
@@ -694,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&VirtRegRewriterID);
printAndVerify("After Virtual Register Rewriter");
- // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
- // but eventually, all users of it should probably be moved to addPostRA and
- // it can go away. Currently, it's the intended place for targets to run
- // FinalizeMachineBundles, because passes other than MachineScheduling an
- // RegAlloc itself may not be aware of bundles.
- if (addFinalizeRegAlloc())
- printAndVerify("After RegAlloc finalization");
-
// Perform stack slot coloring and post-ra machine LICM.
//
// FIXME: Re-enable coloring with register when it's capable of adding
@@ -743,16 +750,7 @@ bool TargetPassConfig::addGCPasses() {
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- AnalysisID PassID = 0;
- if (!DisableBlockPlacement) {
- // MachineBlockPlacement is a new pass which subsumes the functionality of
- // CodPlacementOpt. The old code placement pass can be restored by
- // disabling block placement, but eventually it will be removed.
- PassID = addPass(&MachineBlockPlacementID);
- } else {
- PassID = addPass(&CodePlacementOptID);
- }
- if (PassID) {
+ if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
addPass(&MachineBlockPlacementStatsID);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 7ae43ef57e..337b9790a5 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -39,7 +39,6 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <climits>
@@ -57,7 +56,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog",
"Prologue/Epilogue Insertion & Frame Finalization",
false, false)
-STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
@@ -103,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TFI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
@@ -552,9 +550,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
!RegInfo->needsStackRealignment(Fn)) {
- int SFI = RS->getScavengingFrameIndex();
- if (SFI >= 0)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -597,7 +597,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
- if (RS && (int)i == RS->getScavengingFrameIndex())
+ if (RS && RS->isScavengingFrameIndex((int)i))
continue;
if (MFI->isDeadObjectIndex(i))
continue;
@@ -619,7 +619,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
- if (RS && (int)i == RS->getScavengingFrameIndex())
+ if (RS && RS->isScavengingFrameIndex((int)i))
continue;
if (MFI->isDeadObjectIndex(i))
continue;
@@ -635,9 +635,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// stack pointer.
if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
!RegInfo->useFPForScavengingIndex(Fn))) {
- int SFI = RS->getScavengingFrameIndex();
- if (SFI >= 0)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -820,14 +822,28 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
- unsigned VirtReg = 0;
- unsigned ScratchReg = 0;
int SPAdj = 0;
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in BB.
+ if (I == MachineBasicBlock::iterator(NULL))
+ I = BB->begin();
+
MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = llvm::next(I);
+ MachineBasicBlock::iterator P = I == BB->begin() ?
+ MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isReg()) {
MachineOperand &MO = MI->getOperand(i);
@@ -837,29 +853,47 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- ++NumVirtualFrameRegs;
-
- // Have we already allocated a scratch register for this virtual?
- if (Reg != VirtReg) {
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MI->getOperand(i).isDef() &&
- "frame index virtual missing def!");
- // Scavenge a new scratch register
- VirtReg = Reg;
- const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
- ++NumScavengedRegs;
- }
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
- MI->getOperand(i).setReg(ScratchReg);
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setUsed(ScratchReg);
}
}
- RS->forward(I);
- ++I;
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != llvm::prior(J)) {
+ BB->splice(J, BB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+ } else
+ ++I;
}
}
}
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 3053119f4d..7fcfe9e88b 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cstdlib>
#include <queue>
@@ -64,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
MachineFunction *MF;
// state
- std::auto_ptr<Spiller> SpillerInstance;
+ OwningPtr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
CompSpillWeight> Queue;
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6344a736ab..9eed1fc62a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -41,7 +41,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
#include <queue>
using namespace llvm;
@@ -79,7 +78,7 @@ class RAGreedy : public MachineFunctionPass,
LiveDebugVariables *DebugVars;
// state
- std::auto_ptr<Spiller> SpillerInstance;
+ OwningPtr<Spiller> SpillerInstance;
std::priority_queue<std::pair<unsigned, unsigned> > Queue;
unsigned NextCascade;
@@ -167,8 +166,8 @@ class RAGreedy : public MachineFunctionPass,
};
// splitting state.
- std::auto_ptr<SplitAnalysis> SA;
- std::auto_ptr<SplitEditor> SE;
+ OwningPtr<SplitAnalysis> SA;
+ OwningPtr<SplitEditor> SE;
/// Cached per-block interference maps
InterferenceCache IntfCache;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 607edac24b..15a88e224f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -89,8 +90,8 @@ public:
static char ID;
/// Construct a PBQP register allocator.
- RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
- : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
@@ -121,7 +122,7 @@ private:
typedef std::set<unsigned> RegSet;
- std::auto_ptr<PBQPBuilder> builder;
+ OwningPtr<PBQPBuilder> builder;
char *customPassID;
@@ -132,7 +133,7 @@ private:
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
- std::auto_ptr<Spiller> spiller;
+ OwningPtr<Spiller> spiller;
LiveIntervals *lis;
LiveStacks *lss;
VirtRegMap *vrm;
@@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
return allowedSet[option - 1];
}
-std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
- const LiveIntervals *lis,
- const MachineLoopInfo *loopInfo,
- const RegSet &vregs) {
+PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
MachineRegisterInfo *mri = &mf->getRegInfo();
const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
- std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
PBQP::Graph &g = p->getGraph();
RegSet pregs;
@@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
}
- return p;
+ return p.take();
}
void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts(
}
}
-std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
- MachineFunction *mf,
+PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
const LiveIntervals *lis,
const MachineLoopInfo *loopInfo,
const RegSet &vregs) {
- std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
PBQP::Graph &g = p->getGraph();
const TargetMachine &tm = mf->getTarget();
@@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
}
}
- return p;
+ return p.take();
}
void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
while (!pbqpAllocComplete) {
DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
- std::auto_ptr<PBQPRAProblem> problem =
- builder->build(mf, lis, loopInfo, vregsToAlloc);
+ OwningPtr<PBQPRAProblem> problem(
+ builder->build(mf, lis, loopInfo, vregsToAlloc));
#ifndef NDEBUG
if (pbqpDumpGraphs) {
@@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
}
FunctionPass* llvm::createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder> builder,
+ OwningPtr<PBQPBuilder> &builder,
char *customPassID) {
return new RegAllocPBQP(builder, customPassID);
}
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
- if (pbqpCoalescing) {
- return createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
- } // else
- return createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+ OwningPtr<PBQPBuilder> Builder;
+ if (pbqpCoalescing)
+ Builder.reset(new PBQPBuilderWithCoalescing());
+ else
+ Builder.reset(new PBQPBuilder());
+ return createPBQPRegisterAllocator(Builder);
}
#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index e2488adcdc..d85646dd3c 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 6da901f81d..f82ccbe84d 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -45,9 +45,11 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const {
}
void RegScavenger::initRegState() {
- ScavengedReg = 0;
- ScavengedRC = NULL;
- ScavengeRestore = NULL;
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
// All registers started out unused.
RegsAvailable.set();
@@ -108,27 +110,11 @@ void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
BV.set(*SubRegs);
}
-void RegScavenger::forward() {
- // Move ptr forward.
- if (!Tracking) {
- MBBI = MBB->begin();
- Tracking = true;
- } else {
- assert(MBBI != MBB->end() && "Already past the end of the basic block!");
- MBBI = llvm::next(MBBI);
- }
- assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+void RegScavenger::determineKillsAndDefs() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
MachineInstr *MI = MBBI;
-
- if (MI == ScavengeRestore) {
- ScavengedReg = 0;
- ScavengedRC = NULL;
- ScavengeRestore = NULL;
- }
-
- if (MI->isDebugValue())
- return;
+ assert(!MI->isDebugValue() && "Debug values have no kills or defs");
// Find out which registers are early clobbered, killed, defined, and marked
// def-dead in this instruction.
@@ -145,7 +131,7 @@ void RegScavenger::forward() {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || isReserved(Reg))
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
@@ -162,6 +148,53 @@ void RegScavenger::forward() {
addRegWithSubRegs(DefRegs, Reg);
}
}
+}
+
+void RegScavenger::unprocess() {
+ assert(Tracking && "Cannot unprocess because we're not tracking");
+
+ MachineInstr *MI = MBBI;
+ if (!MI->isDebugValue()) {
+ determineKillsAndDefs();
+
+ // Commit the changes.
+ setUsed(KillRegs);
+ setUnused(DefRegs);
+ }
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(NULL);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = llvm::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr *MI = MBBI;
+
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ if (I->Restore != MI)
+ continue;
+
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
+
+ if (MI->isDebugValue())
+ return;
+
+ determineKillsAndDefs();
// Verify uses and defs.
#ifndef NDEBUG
@@ -170,7 +203,7 @@ void RegScavenger::forward() {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || isReserved(Reg))
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -360,37 +393,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
- assert(ScavengedReg == 0 &&
- "Scavenger slot is live, unable to scavenge another register!");
+ // Find an available scavenging slot.
+ unsigned SI;
+ for (SI = 0; SI < Scavenged.size(); ++SI)
+ if (Scavenged[SI].Reg == 0)
+ break;
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo());
+ }
// Avoid infinite regress
- ScavengedReg = SReg;
+ Scavenged[SI].Reg = SReg;
// If the target knows how to save/restore the register, let it do so;
// otherwise, use the emergency stack spill slot.
if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
// Spill the scavenged register before I.
- assert(ScavengingFrameIndex >= 0 &&
+ assert(Scavenged[SI].FrameIndex >= 0 &&
"Cannot scavenge register without an emergency spill slot!");
- TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+ RC, TRI);
MachineBasicBlock::iterator II = prior(I);
unsigned FIOperandNum = getFrameIndexOperandNum(II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+ RC, TRI);
II = prior(UseMI);
FIOperandNum = getFrameIndexOperandNum(II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
- ScavengeRestore = prior(UseMI);
+ Scavenged[SI].Restore = prior(UseMI);
// Doing this here leads to infinite regress.
- // ScavengedReg = SReg;
- ScavengedRC = RC;
+ // Scavenged[SI].Reg = SReg;
DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
"\n");
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 45b4f68570..07e5b470fb 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -329,8 +329,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
- dbgs() << " Depth : " << Depth << "\n";
- dbgs() << " Height : " << Height << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
@@ -367,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
dbgs() << "\n";
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21ef2..e4da6a41ee 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
else {
+ // Set the hasPhysRegDefs only for physreg defs that have a use within
+ // the scheduling region.
+ SU->hasPhysRegDefs = true;
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
if (!MO.isDef()) {
+ SU->hasPhysRegUses = true;
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ec52d7e906..2e09ec08fd 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -205,6 +205,7 @@ namespace {
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
@@ -243,7 +244,6 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
- SDValue visitMEMBARRIER(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
@@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
- case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
return SDValue();
}
@@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC) {
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
- if (VT.isVector() && !LegalOperations) {
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(true) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
EVT N0VT = N0.getOperand(0).getValueType();
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
@@ -4496,8 +4538,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
DAG.getSetCC(N->getDebugLoc(),
TLI.getSetCCResultType(VT),
@@ -5835,14 +5877,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has hard time in dealing with FP constant.
+ //
+ // We don't need test this condition for transformation like following, as
+ // the DAG being transformed implies it is legal to take FP constant as
+ // operand.
+ //
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ //
+ bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
// If allow, fold (fadd (fneg x), x) -> 0.0
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
return DAG.getConstantFP(0.0, VT);
}
// If allow, fold (fadd x, (fneg x)) -> 0.0
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
return DAG.getConstantFP(0.0, VT);
}
@@ -5944,7 +5997,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::FADD) {
+ if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul 3.0, x)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
@@ -5954,7 +6007,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N1.getOpcode() == ISD::FADD) {
+ if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul 3.0, x)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
@@ -5965,7 +6018,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
- if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ if (AllowNewFpConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
@@ -6709,7 +6763,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
- TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
Chain, N1.getOperand(2),
N1.getOperand(0), N1.getOperand(1), N2);
@@ -6810,9 +6865,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
MVT::Other, Chain, Tmp, N2);
}
- // visitXOR has changed XOR's operands.
- Op0 = TheXor->getOperand(0);
- Op1 = TheXor->getOperand(1);
+ // visitXOR has changed XOR's operands or replaced the XOR completely,
+ // bail out.
+ return SDValue(N, 0);
}
}
@@ -7097,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
BasePtr.getNode() && "Expected BasePtr operand");
- APInt OV =
- cast<ConstantSDNode>(Offset)->getAPIntValue();
- if (AM == ISD::PRE_DEC)
- OV = -OV;
+ // We need to replace ptr0 in the following expression:
+ // x0 * offset0 + y0 * ptr0 = t0
+ // knowing that
+ // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+ //
+ // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+ // indexed load/store and the expresion that needs to be re-written.
+ //
+ // Therefore, we have:
+ // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
ConstantSDNode *CN =
cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
- APInt CNV = CN->getAPIntValue();
- if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
- CNV += OV;
- else
- CNV -= OV;
+ int X0, X1, Y0, Y1;
+ APInt Offset0 = CN->getAPIntValue();
+ APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
- SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
- SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
- if (OffsetIdx == 0)
- std::swap(NewOp1, NewOp2);
+ X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
- SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+ APInt CNV = Offset0;
+ if (X0 < 0) CNV = -CNV;
+ if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+ else CNV = CNV - Offset1;
+
+ // We can now generate the new expression.
+ SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+ SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+ SDValue NewUse = DAG.getNode(Opcode,
OtherUses[i]->getDebugLoc(),
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
@@ -7698,16 +7768,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
-/// Returns the base pointer and an integer offset from that object.
-static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
- if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- SDValue Base = Ptr->getOperand(0);
- return std::make_pair(Base, Offset);
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
}
- return std::make_pair(Ptr, 0);
-}
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr) {
+ bool IsIndexSignExt = false;
+
+ // Just Base or possibly anything else.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Base + offset.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+ IsIndexSignExt);
+ }
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+ }
+};
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
@@ -7754,16 +7890,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
return false;
- // This holds the base pointer and the offset in bytes from the base pointer.
- std::pair<SDValue, int64_t> BasePtr =
- GetPointerBaseAndOffset(St->getBasePtr());
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
// We must have a base and an offset.
- if (!BasePtr.first.getNode())
+ if (!BasePtr.Base.getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.first.getOpcode() == ISD::UNDEF)
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
return false;
// Save the LoadSDNodes that we find in the chain.
@@ -7785,11 +7921,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
// Find the base pointer and offset for this memory node.
- std::pair<SDValue, int64_t> Ptr =
- GetPointerBaseAndOffset(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
// Check that the base pointer is the same as the original one.
- if (Ptr.first.getNode() != BasePtr.first.getNode())
+ if (!Ptr.equalBaseIndex(BasePtr))
break;
// Check that the alignment is the same.
@@ -7815,7 +7950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
// We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+ StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
// Find the next memory operand in the chain. If the next operand in the
// chain is a store then move up and continue the scan with the next
@@ -7902,6 +8037,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
if (TLI.isTypeLegal(StoreTy))
LastLegalType = i+1;
+ // Or check whether a truncstore is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
+ LastLegalType = i+1;
+ }
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
@@ -8012,7 +8155,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
- SDValue LdBasePtr;
+ BaseIndexOffset LdBasePtr;
for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
@@ -8038,21 +8181,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Ld->getMemoryVT() != MemVT)
break;
- std::pair<SDValue, int64_t> LdPtr =
- GetPointerBaseAndOffset(Ld->getBasePtr());
-
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
// If this is not the first ptr that we check.
- if (LdBasePtr.getNode()) {
+ if (LdBasePtr.Base.getNode()) {
// The base ptr must be the same.
- if (LdPtr.first != LdBasePtr)
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
break;
} else {
// Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr.first;
+ LdBasePtr = LdPtr;
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
}
if (LoadNodes.size() < 2)
@@ -8087,6 +8228,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
if (TLI.isTypeLegal(StoreTy))
LastLegalIntegerType = i + 1;
+ // Or check whether a truncstore and extload is legal.
+ else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+ LastLegalIntegerType = i+1;
+ }
}
// Only use vector types if the vector type is larger than the integer type.
@@ -8970,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(N->getValueType(0));
+ // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+ // nodes often generate nop CONCAT_VECTOR nodes.
+ // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+ // place the incoming vectors at the exact same location.
+ SDValue SingleSource = SDValue();
+ unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.getOpcode() == ISD::UNDEF)
+ continue;
+
+ // Check if this is the identity extract:
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // Find the single incoming vector for the extract_subvector.
+ if (SingleSource.getNode()) {
+ if (Op.getOperand(0) != SingleSource)
+ return SDValue();
+ } else {
+ SingleSource = Op.getOperand(0);
+
+ // Check the source type is the same as the type of the result.
+ // If not, this concat may extend the vector, so we can not
+ // optimize it away.
+ if (SingleSource.getValueType() != N->getValueType(0))
+ return SDValue();
+ }
+
+ unsigned IdentityIndex = i * PartNumElem;
+ ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // The extract index must be constant.
+ if (!CS)
+ return SDValue();
+
+ // Check that we are reading from the identity index.
+ if (CS->getZExtValue() != IdentityIndex)
+ return SDValue();
+ }
+
+ if (SingleSource.getNode())
+ return SingleSource;
+
return SDValue();
}
@@ -8977,12 +9174,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ // Skip bitcasting
+ if (V->getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ DebugLoc dl = N->getDebugLoc();
// Handle only simple case where vector being inserted and vector
// being extracted are of same type, and are half size of larger vectors.
EVT BigVT = V->getOperand(0).getValueType();
EVT SmallVT = V->getOperand(1).getValueType();
- if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
// Only handle cases where both indexes are constants with the same type.
@@ -8995,31 +9212,57 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
- // indices are equal => V1
+ // indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ N->getOperand(0).getValueType(),
+ V->getOperand(0)), N->getOperand(1));
}
}
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
- if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+}
+
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SmallVector<SDValue, 4> Ops;
+ EVT ConcatVT = N0.getOperand(0).getValueType();
+ unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+ unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+ // Look at every vector that's inserted. We're looking for exact
+ // subvector-sized copies from a concatenated vector
+ for (unsigned I = 0; I != NumConcats; ++I) {
+ // Make sure we're dealing with a copy.
+ unsigned Begin = I * NumElemsPerConcat;
+ if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
return SDValue();
- unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- unsigned NumElems = NVT.getVectorNumElements();
- assert((Idx % NumElems) == 0 &&
- "IDX in concat is not a multiple of the result vector length.");
- return V->getOperand(Idx / NumElems);
+
+ for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
+ if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+ return SDValue();
+ }
+
+ unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+ if (FirstElt < N0.getNumOperands())
+ Ops.push_back(N0.getOperand(FirstElt));
+ else
+ Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
}
- return SDValue();
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
+ Ops.size());
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
@@ -9123,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ Level < AfterLegalizeVectorOps &&
+ (N1.getOpcode() == ISD::UNDEF ||
+ (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+ SDValue V = partitionShuffleOfConcats(N, DAG);
+
+ if (V.getNode())
+ return V;
+ }
+
// If this shuffle node is simply a swizzle of another shuffle node,
// and it reverses the swizzle of the previous shuffle then we can
// optimize shuffle(shuffle(x, undef), undef) -> x.
@@ -9159,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
- if (!TLI.getShouldFoldAtomicFences())
- return SDValue();
-
- SDValue atomic = N->getOperand(0);
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- break;
- default:
- return SDValue();
- }
-
- SDValue fence = atomic.getOperand(0);
- if (fence.getOpcode() != ISD::MEMBARRIER)
- return SDValue();
-
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2),
- atomic.getOperand(3)), atomic.getResNo());
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2)),
- atomic.getResNo());
- default:
- return SDValue();
- }
-}
-
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 04f5b32e04..288499ac6f 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -696,6 +696,13 @@ bool FastISel::SelectCall(const User *I) {
UpdateValueMap(Call, ResultReg);
return true;
}
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
+ }
}
// Usually, it does not make sense to initialize a value,
@@ -1176,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
VT.getSizeInBits());
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ assert (MaterialReg != 0 && "Unable to materialize imm.");
+ if (MaterialReg == 0) return 0;
}
return FastEmit_rr(VT, VT, Opcode,
Op0, Op0IsKill,
@@ -1496,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
return true;
}
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+ assert(LI->hasOneUse() &&
+ "tryToFoldLoad expected a LoadInst with a single use");
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile())
+ return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // We can't fold if this vreg has no uses or more than one use. Multiple uses
+ // may mean that the instruction got lowered to multiple MIs, or the use of
+ // the loaded value ended up being multiple operands of the result.
+ if (!MRI.hasOneUse(LoadReg))
+ return false;
+
+ MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes; make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo.InsertPt = User;
+ FuncInfo.MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f085e444b6..2a1d8c2819 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
- case ISD::ATOMIC_FENCE:
- case ISD::MEMBARRIER: {
+ case ISD::ATOMIC_FENCE: {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
// FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
@@ -3632,8 +3631,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(VT.isVector())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
- DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+ DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+ Tmp3);
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 1ee21921b4..de217d8571 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
return
VT == MVT::f32 ? Call_F32 :
VT == MVT::f64 ? Call_F64 :
VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
VT == MVT::ppcf128 ? Call_PPCF128 :
RTLIB::UNKNOWN_LIBCALL;
}
@@ -156,6 +158,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -167,6 +170,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F32,
RTLIB::CEIL_F64,
RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -220,6 +224,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F32,
RTLIB::COS_F64,
RTLIB::COS_F80,
+ RTLIB::COS_F128,
RTLIB::COS_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -232,6 +237,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -243,6 +249,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F32,
RTLIB::EXP_F64,
RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -254,6 +261,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F32,
RTLIB::EXP2_F64,
RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -265,6 +273,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F32,
RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -276,6 +285,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F32,
RTLIB::LOG_F64,
RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -287,6 +297,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F32,
RTLIB::LOG2_F64,
RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -298,6 +309,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F32,
RTLIB::LOG10_F64,
RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -311,6 +323,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
NVT, Ops, 3, false, N->getDebugLoc());
}
@@ -323,6 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -334,6 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -347,6 +362,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -384,6 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F32,
RTLIB::POW_F64,
RTLIB::POW_F80,
+ RTLIB::POW_F128,
RTLIB::POW_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -397,6 +414,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F32,
RTLIB::POWI_F64,
RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -409,6 +427,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F32,
RTLIB::REM_F64,
RTLIB::REM_F80,
+ RTLIB::REM_F128,
RTLIB::REM_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -420,6 +439,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F32,
RTLIB::RINT_F64,
RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -431,6 +451,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F32,
RTLIB::SIN_F64,
RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -442,6 +463,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F32,
RTLIB::SQRT_F64,
RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -454,6 +476,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
NVT, Ops, 2, false, N->getDebugLoc());
}
@@ -465,6 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F32,
RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
NVT, &Op, 1, false, N->getDebugLoc());
}
@@ -800,6 +824,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -839,7 +864,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -848,7 +874,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -859,6 +886,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
RTLIB::COPYSIGN_F32,
RTLIB::COPYSIGN_F64,
RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
RTLIB::COPYSIGN_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
@@ -868,7 +896,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_PPCF128),
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -880,6 +909,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -890,7 +920,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -899,7 +930,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -907,8 +939,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -917,7 +950,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -926,7 +960,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -934,8 +969,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32,RTLIB::LOG10_F64,
- RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -947,6 +983,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, 3, false,
N->getDebugLoc());
@@ -960,6 +997,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -972,6 +1010,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
@@ -997,7 +1036,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_PPCF128),
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1006,7 +1046,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1015,7 +1066,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1024,7 +1076,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1033,7 +1086,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
@@ -1045,6 +1099,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
N->getValueType(0), Ops, 2, false,
N->getDebugLoc());
@@ -1055,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
N, false);
GetPairElements(Call, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 182b7f3e68..cd2f060ce0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -515,7 +515,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
// Only use the result of getSetCCResultType if it is legal,
// otherwise just use the promoted result type (NVT).
if (!TLI.isTypeLegal(SVT))
- SVT = NVT;
+ SVT = NVT;
DebugLoc dl = N->getDebugLoc();
assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
@@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
- return DAG.getNode(ISD::SHL, N->getDebugLoc(),
- TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
- GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -549,22 +550,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
- LHS.getValueType(), LHS, RHS);
+ LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue Res = SExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::SRA, N->getDebugLoc(),
- Res.getValueType(), Res, N->getOperand(1));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
// The input value must be properly zero extended.
- EVT VT = N->getValueType(0);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Res = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+ SDValue Amt = N->getOperand(1);
+ Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+ return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
}
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
@@ -775,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
- case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
case ISD::VSELECT:
@@ -959,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
N->getOperand(1), Idx), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
- SDValue NewOps[6];
- DebugLoc dl = N->getDebugLoc();
- NewOps[0] = N->getOperand(0);
- for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
- SDValue Flag = GetPromotedInteger(N->getOperand(i));
- NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
- }
- return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
// the operand in place.
@@ -2101,8 +2091,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// have an illegal type. Fix that first by casting the operand, otherwise
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
- MVT ShiftTy = TLI.getShiftAmountTy(VT);
- assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) &&
+ EVT ShiftTy = TLI.getShiftAmountTy(VT);
+ assert(ShiftTy.getScalarType().getSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
"ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index e26d1656e8..b6436bf427 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
SDValue &OpEntry = PromotedIntegers[Op];
assert(OpEntry.getNode() == 0 && "Node is already promoted!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
SDValue &OpEntry = SoftenedFloats[Op];
assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = ScalarizedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
@@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
@@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
@@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already split");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = WidenedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node already widened!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
@@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 7de42ea012..1c4274a910 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -270,7 +270,6 @@ private:
SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
- SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -465,6 +464,7 @@ private:
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -530,6 +530,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_EXTEND(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -580,6 +581,7 @@ private:
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 09a50d9263..04c6bfd0c2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST:
Res = ScalarizeVecOp_BITCAST(N);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ Res = ScalarizeVecOp_EXTEND(N);
+ break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
@@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
N->getValueType(0), Elt);
}
+/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SmallVector<SDValue, 1> Ops(1);
+ Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+ &Ops[0], 1);
+}
+
/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
/// use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
@@ -1026,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1042,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::FTRUNC:
- case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
@@ -1252,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
- // The input operands all must have the same type, and we know the result the
- // result type is valid. Convert this to a buildvector which extracts all the
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
// input elements.
// TODO: If the input elements are power-two vectors, we could convert this to
// a new CONCAT_VECTORS node with elements that are half-wide.
@@ -1273,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ SDValue InVec = N->getOperand(0);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ unsigned NumElements = OutVT.getVectorNumElements();
+ // Widening should have already made sure this is a power-two vector
+ // if we're trying to split it at all. assert() that's true, just in case.
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+ unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+ unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Extract the halves of the input via extract_subvector.
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElements/2);
+ SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+ DAG.getIntPtrConstant(0));
+ SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+ DAG.getIntPtrConstant(NumElements/2));
+ // Truncate them to 1/2 the element size.
+ EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements/2);
+ SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+ return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index d2269f8acc..7e7b8974be 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -33,8 +33,10 @@ class SDNodeOrdering {
public:
SDNodeOrdering() {}
- void add(const SDNode *Node, unsigned O) {
- OrderMap[Node] = O;
+ void add(const SDNode *Node, unsigned NewOrder) {
+ unsigned &OldOrder = OrderMap[Node];
+ if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+ OldOrder = NewOrder;
}
void remove(const SDNode *Node) {
DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index addfccbd00..c009cfcc51 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -904,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
SUnit *OldSU = Sequence.back();
while (true) {
Sequence.pop_back();
- if (SU->isSucc(OldSU))
- // Don't try to remove SU from AvailableQueue.
- SU->isAvailable = false;
// FIXME: use ready cycle instead of height
CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
@@ -1363,8 +1360,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
- DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0])
- << " SU #" << CurSU->NodeNum << '\n');
+ DEBUG(dbgs() << " Interfering reg " <<
+ (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+ : TRI->getName(LRegs[0]))
+ << " SU #" << CurSU->NodeNum << '\n');
std::pair<LRegsMapT::iterator, bool> LRegsPair =
LRegsMap.insert(std::make_pair(CurSU, LRegs));
if (LRegsPair.second) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index db8ae6ea06..15235c8ac3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1518,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI.getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- if (ISD::isZEXTLoad(Op.getNode())) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
break;
}
- // Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
- unsigned ExtType = LD->getExtensionType();
- switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp;
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
}
}
@@ -2781,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
}
// Handle the scalar case first.
- if (Outputs.size() == 1)
+ if (Scalar1 && Scalar2)
return Outputs.back();
// Otherwise build a big vector out of the scalar elements we generated.
@@ -2912,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
// Verify that the shift amount VT is bit enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
@@ -4702,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
- const std::vector<EVT> &ResultTys,
+ ArrayRef<EVT> ResultTys,
const SDValue *Ops, unsigned NumOps) {
return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
Ops, NumOps);
@@ -5246,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, 0, 0);
+ return getMachineNode(Opcode, dl, VTs, None);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5261,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5269,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
SDValue Op1, SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
- return getMachineNode(Opcode, dl, VTs, 0, 0);
+ return getMachineNode(Opcode, dl, VTs, None);
}
MachineSDNode *
@@ -5290,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, SDValue Op1) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5298,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5307,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5324,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5333,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op1, SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, EVT VT3,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
EVT VT2, EVT VT3, EVT VT4,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
- const std::vector<EVT> &ResultTys,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<EVT> ResultTys,
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> OpsArray) {
bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
void *IP = 0;
+ const SDValue *Ops = OpsArray.data();
+ unsigned NumOps = OpsArray.size();
if (DoCSE) {
FoldingSetNodeID ID;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b7a7e2e133..9d02fc7323 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
} else {
Ctx.emitError(ErrMsg);
}
- report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ return DAG.getUNDEF(ValueVT);
}
if (ValueVT.getVectorNumElements() == 1 &&
@@ -1872,13 +1872,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
visitInlineAsm(&I);
else if (Fn && Fn->isIntrinsic()) {
assert(Fn->getIntrinsicID() == Intrinsic::donothing);
- // If donothing has a landingpad, we should clear CurrentCallSite.
- if (LandingPad) {
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- unsigned CallSiteIndex = MMI.getCurrentCallSite();
- if (CallSiteIndex)
- MMI.setCurrentCallSite(0);
- }
// Ignore invokes to @llvm.donothing: jump directly to the next BB.
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
@@ -2661,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+ EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -4921,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isOperationLegalOrCustom(ISD::FMA, VT) &&
TLI.isFMAFasterThanMulAndAdd(VT)){
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5042,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Drop the intrinsic, but forward the value
+ setValue(&I, getValue(I.getOperand(0)));
+ return 0;
case Intrinsic::var_annotation:
// Discard annotate attributes
return 0;
@@ -5262,6 +5259,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Entry.isSRet = true;
Entry.isNest = false;
Entry.isByVal = false;
+ Entry.isReturned = false;
Entry.Alignment = Align;
Args.push_back(Entry);
RetTy = Type::getVoidTy(FTy->getContext());
@@ -5279,13 +5277,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Entry.Node = ArgNode; Entry.Ty = V->getType();
unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
- Entry.Alignment = CS.getParamAlignment(attrInd);
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
Args.push_back(Entry);
}
@@ -6190,6 +6189,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
" don't know how to handle tied "
"indirect register inputs");
+ report_fatal_error("Cannot handle indirect register inputs!");
}
RegsForValue MatchedRegs;
@@ -6198,10 +6198,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MatchedRegs.RegVTs.push_back(RegVT);
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
- i != e; ++i)
- MatchedRegs.Regs.push_back
- (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
-
+ i != e; ++i) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+ MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
+ else {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm error: This value"
+ " type register class is not natively supported!");
+ report_fatal_error("inline asm error: This value type register "
+ "class is not natively supported!");
+ }
+ }
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag, CS.getInstruction());
@@ -6421,6 +6428,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
@@ -6474,6 +6503,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
+ // Conservatively only handle 'returned' on non-vectors for now
+ if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
+ "unexpected use of 'returned'");
+ // Before passing 'returned' to the target lowering code, ensure that
+ // either the register MVT and the actual EVT are the same size or that
+ // the return value and argument are extended in the same way; in these
+ // cases it's safe to pass the argument register value unchanged as the
+ // return register value (although it's at the target's option whether
+ // to do so)
+ // TODO: allow code generation to take advantage of partially preserved
+ // registers rather than clobbering the entire register when the
+ // parameter extension method is not compatible with the return
+ // extension method
+ if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+ (ExtendKind != ISD::ANY_EXTEND &&
+ CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
+ Flags.setReturned();
+ }
+
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
@@ -6493,28 +6542,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
}
- // Handle the incoming return values from the call.
- CLI.Ins.clear();
- SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, CLI.RetTy, RetTys);
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
- MyFlags.Used = CLI.IsReturnValueUsed;
- if (CLI.RetSExt)
- MyFlags.Flags.setSExt();
- if (CLI.RetZExt)
- MyFlags.Flags.setZExt();
- if (CLI.IsInReg)
- MyFlags.Flags.setInReg();
- CLI.Ins.push_back(MyFlags);
- }
- }
-
SmallVector<SDValue, 4> InVals;
CLI.Chain = LowerCall(CLI, InVals);
@@ -6621,9 +6648,7 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
-void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
- // If this is the entry block, emit arguments.
- const Function &F = *LLVMBB->getParent();
+void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
DebugLoc dl = SDB->getCurDebugLoc();
const DataLayout *TD = TLI.getDataLayout();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 2ce34930a3..50b5bccf7c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1dcd6cdf2c..02b838234d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -58,18 +58,21 @@
#include <algorithm>
using namespace llvm;
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
-
-#ifndef NDEBUG
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
-STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
-STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+ "Number of entry blocks where fast isel failed to lower arguments");
+#ifndef NDEBUG
static cl::opt<bool>
EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
cl::desc("Enable extra verbose messages in the \"fast\" "
"instruction selector"));
+
// Terminators
STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
@@ -363,6 +366,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TargetSubtargetInfo &ST =
const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -734,7 +738,7 @@ public:
} // end anonymous namespace
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(errs() << "===== Instruction selection begins: BB#"
+ DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
<< " '" << FuncInfo->MBB->getName() << "'\n");
@@ -777,8 +781,12 @@ void SelectionDAGISel::DoInstructionSelection() {
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
- if (ResNode)
+ if (ResNode) {
+ // Propagate ordering
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
ReplaceUses(Node, ResNode);
+ }
// If after the replacement this node is not used any more,
// remove this dead node.
@@ -789,7 +797,7 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->setRoot(Dummy.getValue());
}
- DEBUG(errs() << "===== Instruction selection ends:\n");
+ DEBUG(dbgs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
}
@@ -819,84 +827,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
if (Reg) MBB->addLiveIn(Reg);
}
-/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
-/// load into the specified FoldInst. Note that we could have a sequence where
-/// multiple LLVM IR instructions are folded into the same machineinstr. For
-/// example we could have:
-/// A: x = load i32 *P
-/// B: y = icmp A, 42
-/// C: br y, ...
-///
-/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
-/// any other folded instructions) because it is between A and C.
-///
-/// If we succeed in folding the load into the operation, return true.
-///
-bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
- const Instruction *FoldInst,
- FastISel *FastIS) {
- // We know that the load has a single use, but don't know what it is. If it
- // isn't one of the folded instructions, then we can't succeed here. Handle
- // this by scanning the single-use users of the load until we get to FoldInst.
- unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
-
- const Instruction *TheUser = LI->use_back();
- while (TheUser != FoldInst && // Scan up until we find FoldInst.
- // Stay in the right block.
- TheUser->getParent() == FoldInst->getParent() &&
- --MaxUsers) { // Don't scan too far.
- // If there are multiple or no uses of this instruction, then bail out.
- if (!TheUser->hasOneUse())
- return false;
-
- TheUser = TheUser->use_back();
- }
-
- // If we didn't find the fold instruction, then we failed to collapse the
- // sequence.
- if (TheUser != FoldInst)
- return false;
-
- // Don't try to fold volatile loads. Target has to deal with alignment
- // constraints.
- if (LI->isVolatile()) return false;
-
- // Figure out which vreg this is going into. If there is no assigned vreg yet
- // then there actually was no reference to it. Perhaps the load is referenced
- // by a dead instruction.
- unsigned LoadReg = FastIS->getRegForValue(LI);
- if (LoadReg == 0)
- return false;
-
- // Check to see what the uses of this vreg are. If it has no uses, or more
- // than one use (at the machine instr level) then we can't fold it.
- MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
- if (RI == RegInfo->reg_end())
- return false;
-
- // See if there is exactly one use of the vreg. If there are multiple uses,
- // then the instruction got lowered to multiple machine instructions or the
- // use of the loaded value ended up being multiple operands of the result, in
- // either case, we can't fold this.
- MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
- if (PostRI != RegInfo->reg_end())
- return false;
-
- assert(RI.getOperand().isUse() &&
- "The only use of the vreg must be a use, we haven't emitted the def!");
-
- MachineInstr *User = &*RI;
-
- // Set the insertion point properly. Folding the load can cause generation of
- // other random instructions (like sign extends) for addressing modes, make
- // sure they get inserted in a logical place before the new instruction.
- FuncInfo->InsertPt = User;
- FuncInfo->MBB = User->getParent();
-
- // Ask the target to try folding the load.
- return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
-}
-
/// isFoldedOrDeadInstruction - Return true if the specified instruction is
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
@@ -1024,13 +954,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->VisitedBBs.insert(LLVMBB);
}
- FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
- FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-
BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
BasicBlock::const_iterator const End = LLVMBB->end();
BasicBlock::const_iterator BI = End;
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
// Setup an EH landing-pad block.
@@ -1044,17 +972,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Emit code for any incoming arguments. This must happen before
// beginning FastISel on the entry block.
if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
+
// Lower any arguments needed in this block if this is the entry block.
if (!FastIS->LowerArguments()) {
-
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
if (EnableFastISelAbortArgs)
- // The "fast" selector couldn't lower these arguments. For the
- // purpose of debugging, just abort.
llvm_unreachable("FastISel didn't lower all arguments");
- // Call target indepedent SDISel argument lowering code if the target
- // specific routine is not successful.
- LowerArguments(LLVMBB);
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
CurDAG->setRoot(SDB->getControlRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -1087,7 +1015,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
--NumFastIselRemaining;
- DEBUG(++NumFastIselSuccess);
+ ++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
// then see if there is a load right before the selected instructions.
// Try to fold the load if so.
@@ -1099,11 +1027,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+ FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
--NumFastIselRemaining;
- DEBUG(++NumFastIselSuccess);
+ ++NumFastIselSuccess;
}
continue;
}
@@ -1142,21 +1070,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Recompute NumFastIselRemaining as Selection DAG instruction
// selection may have handled the call, input args, etc.
unsigned RemainingNow = std::distance(Begin, BI);
- (void) RemainingNow;
- DEBUG(NumFastIselFailures += NumFastIselRemaining - RemainingNow);
- DEBUG(NumFastIselRemaining = RemainingNow);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
continue;
}
if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
// Don't abort, and use a different message for terminator misses.
- DEBUG(NumFastIselFailures += NumFastIselRemaining);
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed terminator: ";
Inst->dump();
}
} else {
- DEBUG(NumFastIselFailures += NumFastIselRemaining);
+ NumFastIselFailures += NumFastIselRemaining;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
Inst->dump();
@@ -1172,8 +1099,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
} else {
// Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
- LowerArguments(LLVMBB);
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
+ LowerArguments(Fn);
+ }
}
if (Begin != BI)
@@ -1668,9 +1597,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
- std::vector<EVT> VTs;
- VTs.push_back(MVT::Other);
- VTs.push_back(MVT::Glue);
+ EVT VTs[] = { MVT::Other, MVT::Glue };
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
New->setNodeId(-1);
@@ -1767,7 +1694,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes);
- DEBUG(errs() << "ISEL: Match complete!\n");
+ DEBUG(dbgs() << "ISEL: Match complete!\n");
}
enum ChainResult {
@@ -2272,9 +2199,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
SmallVector<SDNode*, 3> ChainNodesMatched;
SmallVector<SDNode*, 3> GlueResultNodesMatched;
- DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
- errs() << '\n');
+ dbgs() << '\n');
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -2286,7 +2213,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
- DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+ DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
} else if (MatcherTable[0] == OPC_SwitchOpcode) {
// Otherwise, the table isn't computed, but the state machine does start
@@ -2353,10 +2280,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (!Result)
break;
- DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
- DEBUG(++NumDAGIselRetries);
+ ++NumDAGIselRetries;
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
@@ -2483,7 +2410,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
@@ -2515,7 +2442,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
continue;
}
@@ -2604,11 +2531,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
- int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
- Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
} else if (Imm->getOpcode() == ISD::ConstantFP) {
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
- Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+ Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
}
RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
@@ -2783,7 +2710,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
- VTList, Ops.data(), Ops.size());
+ VTList, Ops);
// Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
@@ -2859,9 +2786,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
- DEBUG(errs() << " "
+ DEBUG(dbgs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
- << " node: "; Res->dump(CurDAG); errs() << "\n");
+ << " node: "; Res->dump(CurDAG); dbgs() << "\n");
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
@@ -2936,8 +2863,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
- DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
- DEBUG(++NumDAGIselRetries);
+ DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
while (1) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
@@ -2956,7 +2883,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
- DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
+ DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
InputChain = LastScope.InputChain;
InputGlue = LastScope.InputGlue;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 9ab491808f..2feea59c03 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
// Debugging level for shrink wrapping.
enum ShrinkWrapDebugLevel {
- None, BasicInfo, Iterations, Details
+ Disabled, BasicInfo, Iterations, Details
};
static cl::opt<enum ShrinkWrapDebugLevel>
ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
cl::desc("Print shrink wrapping debugging information"),
cl::values(
- clEnumVal(None , "disable debug output"),
+ clEnumVal(Disabled , "disable debug output"),
clEnumVal(BasicInfo , "print basic DF sets"),
clEnumVal(Iterations, "print SR sets for each iteration"),
clEnumVal(Details , "print all DF sets"),
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index b58bb85e49..3903743878 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
- SmallVector<ReturnInst*, 16> Returns;
- SmallVector<InvokeInst*, 16> Invokes;
+ SmallVector<ReturnInst*, 16> Returns;
+ SmallVector<InvokeInst*, 16> Invokes;
SmallSetVector<LandingPadInst*, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->isIntrinsic() &&
+ Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
Invokes.push_back(II);
LPads.insert(II->getUnwindDest()->getLandingPadInst());
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 320128a999..c5bbba3ffc 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -29,6 +29,7 @@
#define DEBUG_TYPE "spillplacement"
#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index c10e3190f6..4b7e4609e5 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -67,14 +67,14 @@ DisableColoring("no-stack-coloring",
/// code. If this flag is enabled, we try to save the user.
static cl::opt<bool>
ProtectFromEscapedAllocas("protect-from-escaped-allocas",
- cl::init(false), cl::Hidden,
- cl::desc("Do not optimize lifetime zones that are broken"));
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
-STATISTIC(EscapedAllocas,
- "Number of allocas that escaped the lifetime region");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
//===----------------------------------------------------------------------===//
// StackColoring Pass
@@ -574,7 +574,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
SlotIndex Index = Indexes->getInstructionIndex(I);
LiveInterval *Interval = Intervals[FromSlot];
assert(Interval->find(Index) != Interval->end() &&
- "Found instruction usage outside of live range.");
+ "Found instruction usage outside of live range.");
}
#endif
@@ -738,9 +738,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
SlotSizeSorter(MFI));
- bool Chanded = true;
- while (Chanded) {
- Chanded = false;
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
for (unsigned I = 0; I < NumSlots; ++I) {
if (SortedSlots[I] == -1)
continue;
@@ -757,7 +757,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Merge disjoint slots.
if (!First->overlaps(*Second)) {
- Chanded = true;
+ Changed = true;
First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index f3be37c9ee..fbef34772b 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -31,7 +31,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumFunProtected, "Number of functions protected");
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 2a02f6a3c0..8074d167f4 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
: TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+ initActions();
+
+ // Perform these initializations only once.
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ PredictableSelectIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ InsertFencesForAtomic = false;
+ SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
+
+ InitLibcallNames(LibcallRoutineNames, TM);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+ delete &TLOF;
+}
+
+void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
// Set default actions for various operations.
for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
@@ -702,50 +745,17 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
//
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
- IsLittleEndian = TD->isLittleEndian();
- PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
- memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
- memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
- MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
- MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
- = MaxStoresPerMemmoveOptSize = 4;
- BenefitFromCodePlacementOpt = false;
- UseUnderscoreSetJmp = false;
- UseUnderscoreLongJmp = false;
- SelectIsExpensive = false;
- IntDivIsCheap = false;
- Pow2DivIsCheap = false;
- JumpIsExpensive = false;
- PredictableSelectIsExpensive = false;
- StackPointerRegisterToSaveRestore = 0;
- ExceptionPointerRegister = 0;
- ExceptionSelectorRegister = 0;
- BooleanContents = UndefinedBooleanContent;
- BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::ILP;
- JumpBufSize = 0;
- JumpBufAlignment = 0;
- MinFunctionAlignment = 0;
- PrefFunctionAlignment = 0;
- PrefLoopAlignment = 0;
- MinStackArgumentAlignment = 1;
- ShouldFoldAtomicFences = false;
- InsertFencesForAtomic = false;
- SupportJumpTables = true;
- MinimumJumpTableEntries = 4;
-
- InitLibcallNames(LibcallRoutineNames, TM);
- InitCmpLibcallCCs(CmpLibcallCCs);
- InitLibcallCallingConvs(LibcallCallingConvs);
}
-TargetLoweringBase::~TargetLoweringBase() {
- delete &TLOF;
+MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
+ return MVT::getIntegerVT(8*TD->getPointerSize(0));
}
-MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8*TD->getPointerSize(0));
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ return getScalarShiftAmountTy(LHSTy);
}
/// canOpTrap - Returns true if the operation can trap for the value type.
@@ -905,6 +915,15 @@ void TargetLoweringBase::computeRegisterProperties() {
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
}
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f64)) {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index bdff954d61..f91688531b 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -36,7 +36,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
using namespace dwarf;
@@ -540,11 +539,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
-
- // Handle thread local data.
- if (Kind.isThreadBSS()) return TLSBSSSection;
- if (Kind.isThreadData()) return TLSDataSection;
-
if (Kind.isText())
return GV->isWeakForLinker() ? TextCoalSection : TextSection;
@@ -597,6 +591,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isBSSLocal())
return DataBSSSection;
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
// Otherwise, just drop the variable in the normal data section.
return DataSection;
}
@@ -759,8 +757,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
- if (Kind.isThreadLocal())
- return ".tls$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
@@ -796,3 +797,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
return getDataSection();
}
+void TargetLoweringObjectFileCOFF::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ MDNode *LinkerOptions = 0;
+
+ // Look for the "Linker Options" flag, since it's the only one we support.
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+ if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ break;
+ }
+ }
+ if (!LinkerOptions)
+ return;
+
+ // Emit the linker options to the linker .drectve section. According to the
+ // spec, this section is a space-separated string containing flags for linker.
+ const MCSection *Sec = getDrectveSection();
+ Streamer.SwitchSection(Sec);
+ for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+ MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+ MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+ StringRef Op = MDOption->getString();
+ // Lead with a space for consistency with our dllexport implementation.
+ std::string Escaped(" ");
+ if (Op.find(" ") != StringRef::npos) {
+ // The PE-COFF spec says args with spaces must be quoted. It doesn't say
+ // how to escape quotes, but it probably uses this algorithm:
+ // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
+ // FIXME: Reuse escaping code from Support/Windows/Program.inc
+ Escaped.push_back('\"');
+ Escaped.append(Op);
+ Escaped.push_back('\"');
+ } else {
+ Escaped.append(Op);
+ }
+ Streamer.EmitBytes(Escaped);
+ }
+ }
+}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 0f59d0169e..435a5e7e0b 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const {
return TrapFuncName;
}
+bool TargetOptions::operator==(const TargetOptions &TO) {
+#define ARE_EQUAL(X) X == TO.X
+ return
+ ARE_EQUAL(UnsafeFPMath) &&
+ ARE_EQUAL(NoInfsFPMath) &&
+ ARE_EQUAL(NoNaNsFPMath) &&
+ ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
+ ARE_EQUAL(UseSoftFloat) &&
+ ARE_EQUAL(NoZerosInBSS) &&
+ ARE_EQUAL(JITExceptionHandling) &&
+ ARE_EQUAL(JITEmitDebugInfo) &&
+ ARE_EQUAL(JITEmitDebugInfoToDisk) &&
+ ARE_EQUAL(GuaranteedTailCallOpt) &&
+ ARE_EQUAL(DisableTailCalls) &&
+ ARE_EQUAL(StackAlignmentOverride) &&
+ ARE_EQUAL(RealignStack) &&
+ ARE_EQUAL(SSPBufferSize) &&
+ ARE_EQUAL(EnableFastISel) &&
+ ARE_EQUAL(PositionIndependentExecutable) &&
+ ARE_EQUAL(EnableSegmentedStacks) &&
+ ARE_EQUAL(UseInitArray) &&
+ ARE_EQUAL(TrapFuncName) &&
+ ARE_EQUAL(FloatABIType) &&
+ ARE_EQUAL(AllowFPOpFusion);
+#undef ARE_EQUAL
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index f31f67d58c..1bf14dbcef 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const {
// Get the definition's scheduling class descriptor from this machine model.
unsigned SchedClass = MI->getDesc().getSchedClass();
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ return SCDesc;
#ifndef NDEBUG
unsigned NIter = 0;
@@ -240,7 +242,10 @@ unsigned TargetSchedModel::computeOperandLatency(
report_fatal_error(ss.str());
}
#endif
- return DefMI->isTransient() ? 0 : 1;
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
}
unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 26c5fe4dcb..7ca2beef65 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -43,11 +43,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -59,6 +59,12 @@ STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+ cl::desc("Coalesce copies by rescheduling (default=true)"),
+ cl::init(true), cl::Hidden);
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -427,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
/// isTwoAddrUse - Return true if the specified MI uses the specified register
/// as a two-address use. If so, return the destination register by reference.
static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned NumOps = MI.isInlineAsm()
- ? MI.getNumOperands() : MCID.getNumOperands();
- for (unsigned i = 0; i != NumOps; ++i) {
+ for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
continue;
@@ -1145,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule this MI below it.
- if (rescheduleMIBelowKill(mi, nmi, regB)) {
+ if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
++NumReSchedDowns;
return true;
}
@@ -1164,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule it before this MI if it's legal.
- if (rescheduleKillAboveMI(mi, nmi, regB)) {
+ if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
++NumReSchedUps;
return true;
}