aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp272
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp35
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp48
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp1
-rw-r--r--lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeOrdering.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp7
-rw-r--r--lib/CodeGen/SpillPlacement.cpp1
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp7
-rw-r--r--lib/CodeGen/TargetSchedule.cpp5
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.cpp32
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h8
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp2
-rw-r--r--lib/IR/Attributes.cpp7
-rw-r--r--lib/IR/Constants.cpp10
-rw-r--r--lib/IR/DIBuilder.cpp122
-rw-r--r--lib/IR/DataLayout.cpp47
-rw-r--r--lib/IR/DebugInfo.cpp97
-rw-r--r--lib/IR/Function.cpp4
-rw-r--r--lib/IR/Instructions.cpp19
-rw-r--r--lib/Linker/LinkModules.cpp2
-rw-r--r--lib/Linker/Linker.cpp21
-rw-r--r--lib/MC/MCContext.cpp6
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp24
-rw-r--r--lib/MC/MCDwarf.cpp8
-rw-r--r--lib/MC/MCObjectFileInfo.cpp12
-rw-r--r--lib/MC/MCParser/AsmParser.cpp49
-rw-r--r--lib/MC/MCStreamer.cpp10
-rw-r--r--lib/Support/APFloat.cpp6
-rw-r--r--lib/Support/APInt.cpp8
-rw-r--r--lib/Support/FileOutputBuffer.cpp4
-rw-r--r--lib/Support/MemoryBuffer.cpp78
-rw-r--r--lib/Support/PathV2.cpp24
-rw-r--r--lib/Support/Unix/Memory.inc9
-rw-r--r--lib/Support/Unix/PathV2.inc17
-rw-r--r--lib/Support/Windows/PathV2.inc61
-rw-r--r--lib/Support/raw_ostream.cpp7
-rw-r--r--lib/TableGen/Error.cpp6
-rw-r--r--lib/TableGen/Main.cpp7
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp55
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp83
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp57
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp11
-rw-r--r--lib/Target/ARM/ARMSubtarget.h9
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp16
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp94
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp250
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp9
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp50
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h3
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp307
-rw-r--r--lib/Target/Mips/CMakeLists.txt4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp670
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp893
-rw-r--r--lib/Target/Mips/MipsISelLowering.h75
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h3
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td21
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp460
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp197
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h46
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp81
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td5
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp123
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h12
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp6
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp325
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h21
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td205
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td12
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp166
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td296
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp260
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h21
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td18
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp33
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp54
-rw-r--r--lib/Target/R600/R600InstrInfo.h3
-rw-r--r--lib/Target/R600/R600Instructions.td4
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp1
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp75
-rw-r--r--lib/Target/R600/R600MachineScheduler.h3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp9
-rw-r--r--lib/Target/R600/SIISelLowering.h1
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp16
-rw-r--r--lib/Target/R600/SIInstrInfo.td7
-rw-r--r--lib/Target/R600/SIInstructions.td212
-rw-r--r--lib/Target/R600/SIIntrinsics.td3
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp117
-rw-r--r--lib/Target/R600/SIRegisterInfo.td14
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp3
-rw-r--r--lib/Target/TargetMachine.cpp26
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp450
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp24
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp437
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td187
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td9
-rw-r--r--lib/Target/X86/X86InstrCompiler.td20
-rw-r--r--lib/Target/X86/X86InstrExtension.td65
-rw-r--r--lib/Target/X86/X86InstrInfo.td90
-rw-r--r--lib/Target/X86/X86InstrSSE.td762
-rw-r--r--lib/Target/X86/X86InstrSystem.td24
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp51
-rw-r--r--lib/Target/X86/X86Schedule.td82
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp27
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp741
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp96
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp7
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp25
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp209
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp15
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp82
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp54
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp675
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp703
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp55
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
163 files changed, 8630 insertions, 5112 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 1faa04623e..57bce84e83 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file implements an analysis that determines, for a given memory
-// operation, what preceding memory operations it depends on. It builds on
+// operation, what preceding memory operations it depends on. It builds on
// alias analysis information, and tries to provide a lazy, caching interface to
// a common kind of alias information query.
//
@@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
static const int BlockScanLimit = 500;
char MemoryDependenceAnalysis::ID = 0;
-
+
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
/// RemoveFromReverseMap - This is a helper function that removes Val from
/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
template <typename KeyTy>
-static void RemoveFromReverseMap(DenseMap<Instruction*,
+static void RemoveFromReverseMap(DenseMap<Instruction*,
SmallPtrSet<KeyTy, 4> > &ReverseMap,
Instruction *Inst, KeyTy Val) {
typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
@@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (LI->isUnordered()) {
Loc = AA->getLocation(LI);
return AliasAnalysis::Ref;
- } else if (LI->getOrdering() == Monotonic) {
+ }
+ if (LI->getOrdering() == Monotonic) {
Loc = AA->getLocation(LI);
return AliasAnalysis::ModRef;
}
@@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (SI->isUnordered()) {
Loc = AA->getLocation(SI);
return AliasAnalysis::Mod;
- } else if (SI->getOrdering() == Monotonic) {
+ }
+ if (SI->getOrdering() == Monotonic) {
Loc = AA->getLocation(SI);
return AliasAnalysis::ModRef;
}
@@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// Walk backwards through the block, looking for dependencies
while (ScanIt != BB->begin()) {
// Limit the amount of scanning we do so we don't end up with quadratic
- // running time on extreme testcases.
+ // running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
Instruction *Inst = --ScanIt;
-
+
// If this inst is a memory op, get the pointer it accessed
AliasAnalysis::Location Loc;
AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
@@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
///
/// MemLocBase, MemLocOffset are lazily computed here the first time the
/// base/offs of memloc is needed.
-static bool
+static bool
isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
const Value *&MemLocBase,
int64_t &MemLocOffs,
@@ -289,25 +291,25 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
if (LI->getParent()->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
return 0;
-
+
// Get the base of this load.
int64_t LIOffs = 0;
- const Value *LIBase =
+ const Value *LIBase =
GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
-
+
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
if (LIBase != MemLocBase) return 0;
-
+
// Okay, the two values are based on the same pointer, but returned as
// no-alias. This happens when we have things like two byte loads at "P+1"
// and "P+3". Check to see if increasing the size of the "LI" load up to its
// alignment (or the largest native integer type) will allow us to load all
// the bits required by MemLoc.
-
+
// If MemLoc is before LI, then no widening of LI will help us out.
if (MemLocOffs < LIOffs) return 0;
-
+
// Get the alignment of the load in bytes. We assume that it is safe to load
// any legal integer up to this size without a problem. For example, if we're
// looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
@@ -316,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
unsigned LoadAlign = LI->getAlignment();
int64_t MemLocEnd = MemLocOffs+MemLocSize;
-
+
// If no amount of rounding up will let MemLoc fit into LI, then bail out.
if (LIOffs+LoadAlign < MemLocEnd) return 0;
-
+
// This is the size of the load to try. Start with the next larger power of
// two.
unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
-
+
while (1) {
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
@@ -343,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
return NewLoadByteSize;
-
+
NewLoadByteSize <<= 1;
}
}
@@ -355,7 +357,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
/// instruction as well; this function may take advantage of the metadata
/// annotated to the query instruction to refine the result.
MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB,
Instruction *QueryInst) {
@@ -382,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Debug intrinsics don't (and can't) cause dependences.
if (isa<DbgInfoIntrinsic>(II)) continue;
-
+
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
@@ -406,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
return MemDepResult::getClobber(LI);
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
-
+
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-
+
if (isLoad) {
if (R == AliasAnalysis::NoAlias) {
// If this is an over-aligned integer load (for example,
@@ -423,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI, TD))
return MemDepResult::getClobber(Inst);
-
+
continue;
}
-
+
// Must aliased loads are defs of each other.
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
@@ -441,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (R == AliasAnalysis::PartialAlias)
return MemDepResult::getClobber(Inst);
#endif
-
+
// Random may-alias loads don't depend on each other without a
// dependence.
continue;
@@ -458,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Stores depend on may/must aliased loads.
return MemDepResult::getDef(Inst);
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
// FIXME: This is overly conservative.
@@ -474,10 +476,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Ok, this store might clobber the query pointer. Check to see if it is
// a must alias: in this case, we want to return this as a def.
AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
-
+
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
-
+
if (R == AliasAnalysis::NoAlias)
continue;
if (R == AliasAnalysis::MustAlias)
@@ -498,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
-
+
if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
// Be conservative if the accessed pointer may alias the allocation.
@@ -532,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
return MemDepResult::getClobber(Inst);
}
}
-
+
// No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
@@ -544,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
/// depends.
MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
Instruction *ScanPos = QueryInst;
-
+
// Check for a cached result
MemDepResult &LocalCache = LocalDeps[QueryInst];
-
+
// If the cached entry is non-dirty, just return it. Note that this depends
// on MemDepResult's default constructing to 'dirty'.
if (!LocalCache.isDirty())
return LocalCache;
-
+
// Otherwise, if we have a dirty entry, we know we can start the scan at that
// instruction, which may save us some work.
if (Instruction *Inst = LocalCache.getInst()) {
ScanPos = Inst;
-
+
RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
}
-
+
BasicBlock *QueryParent = QueryInst->getParent();
-
+
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
// No dependence found. If this is the entry block of the function, it is
@@ -591,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
}
-
+
// Remember the result!
if (Instruction *I = LocalCache.getInst())
ReverseLocalDeps[I].insert(QueryInst);
-
+
return LocalCache;
}
@@ -636,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
/// the uncached case, this starts out as the set of predecessors we care
/// about.
SmallVector<BasicBlock*, 32> DirtyBlocks;
-
+
if (!Cache.empty()) {
// Okay, we have a cache entry. If we know it is not dirty, just return it
// with no computation.
@@ -644,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
++NumCacheNonLocal;
return Cache;
}
-
+
// If we already have a partially computed set of results, scan them to
// determine what is dirty, seeding our initial DirtyBlocks worklist.
for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
I != E; ++I)
if (I->getResult().isDirty())
DirtyBlocks.push_back(I->getBB());
-
+
// Sort the cache so that we can do fast binary search lookups below.
std::sort(Cache.begin(), Cache.end());
-
+
++NumCacheDirtyNonLocal;
//cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
// << Cache.size() << " cached: " << *QueryInst;
@@ -665,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.push_back(*PI);
++NumUncacheNonLocal;
}
-
+
// isReadonlyCall - If this is a read-only call, we can be more aggressive.
bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
SmallPtrSet<BasicBlock*, 64> Visited;
-
+
unsigned NumSortedEntries = Cache.size();
DEBUG(AssertSorted(Cache));
-
+
// Iterate while we still have blocks to update.
while (!DirtyBlocks.empty()) {
BasicBlock *DirtyBB = DirtyBlocks.back();
DirtyBlocks.pop_back();
-
+
// Already processed this block?
if (!Visited.insert(DirtyBB))
continue;
-
+
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
DEBUG(AssertSorted(Cache, NumSortedEntries));
- NonLocalDepInfo::iterator Entry =
+ NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
NonLocalDepEntry(DirtyBB));
if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
--Entry;
-
+
NonLocalDepEntry *ExistingResult = 0;
- if (Entry != Cache.begin()+NumSortedEntries &&
+ if (Entry != Cache.begin()+NumSortedEntries &&
Entry->getBB() == DirtyBB) {
// If we already have an entry, and if it isn't already dirty, the block
// is done.
if (!Entry->getResult().isDirty())
continue;
-
+
// Otherwise, remember this slot so we can update the value.
ExistingResult = &*Entry;
}
-
+
// If the dirty entry has a pointer, start scanning from it so we don't have
// to rescan the entire block.
BasicBlock::iterator ScanPos = DirtyBB->end();
@@ -715,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
QueryCS.getInstruction());
}
}
-
+
// Find out if this block has a local dependency for QueryInst.
MemDepResult Dep;
-
+
if (ScanPos != DirtyBB->begin()) {
Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
@@ -728,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
} else {
Dep = MemDepResult::getNonFuncLocal();
}
-
+
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
ExistingResult->setResult(Dep);
else
Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
-
+
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the association!
if (!Dep.isNonLocal()) {
@@ -744,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
if (Instruction *Inst = Dep.getInst())
ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
} else {
-
+
// If the block *is* completely transparent to the load, we need to check
// the predecessors of this block. Add them to our worklist.
for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
DirtyBlocks.push_back(*PI);
}
}
-
+
return Cache;
}
@@ -769,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+
PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
-
+
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// a block with multiple different pointers. This can happen during PHI
@@ -794,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis::
GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *BB,
NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
-
+
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
NonLocalDepInfo::iterator Entry =
@@ -802,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
NonLocalDepEntry(BB));
if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
--Entry;
-
+
NonLocalDepEntry *ExistingResult = 0;
if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
ExistingResult = &*Entry;
-
+
// If we have a cached entry, and it is non-dirty, use it as the value for
// this dependency.
if (ExistingResult && !ExistingResult->getResult().isDirty()) {
++NumCacheNonLocalPtr;
return ExistingResult->getResult();
- }
-
+ }
+
// Otherwise, we have to scan for the value. If we have a dirty cache
// entry, start scanning from its position, otherwise we scan from the end
// of the block.
@@ -823,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
"Instruction invalidated?");
++NumCacheDirtyNonLocalPtr;
ScanPos = ExistingResult->getResult().getInst();
-
+
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
-
+
// Scan the block for the dependency.
MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
-
+
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
ExistingResult->setResult(Dep);
else
Cache->push_back(NonLocalDepEntry(BB, Dep));
-
+
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
if (!Dep.isDef() && !Dep.isClobber())
return Dep;
-
+
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
// update MemDep when we remove instructions.
Instruction *Inst = Dep.getInst();
@@ -859,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
-static void
+static void
SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
switch (Cache.size() - NumSortedEntries) {
@@ -911,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SmallVectorImpl<NonLocalDepResult> &Result,
DenseMap<BasicBlock*, Value*> &Visited,
bool SkipFirstBlock) {
-
+
// Look up the cached info for Pointer.
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
@@ -925,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
- std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
NonLocalPointerInfo *CacheInfo = &Pair.first->second;
@@ -987,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
if (VI == Visited.end() || VI->second == Pointer.getAddr())
continue;
-
+
// We have a pointer mismatch in a block. Just return clobber, saying
// that something was clobbered in this result. We could also do a
// non-fully cached query, but there is little point in doing this.
return true;
}
}
-
+
Value *Addr = Pointer.getAddr();
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
@@ -1005,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
++NumCacheCompleteNonLocalPtr;
return false;
}
-
+
// Otherwise, either this is a new block, a block with an invalid cache
// pointer or one that we're about to invalidate by putting more info into it
// than its valid cache info. If empty, the result will be valid cache info,
@@ -1014,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
else
CacheInfo->Pair = BBSkipFirstBlockPair();
-
+
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
-
+
// PredList used inside loop.
SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
@@ -1028,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// revisit blocks after we insert info for them.
unsigned NumSortedEntries = Cache->size();
DEBUG(AssertSorted(*Cache));
-
+
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
-
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
@@ -1043,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
DEBUG(AssertSorted(*Cache, NumSortedEntries));
MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
NumSortedEntries);
-
+
// If we got a Def or Clobber, add this to the list of results.
if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
continue;
}
}
-
+
// If 'Pointer' is an instruction defined in this block, then we need to do
// phi translation to change it into a value live in the predecessor block.
// If not, we just add the predecessors to the worklist and scan them with
@@ -1067,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NewBlocks.push_back(*PI);
continue;
}
-
+
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
@@ -1082,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
-
+
// We do need to do phi translation, if we know ahead of time we can't phi
// translate this value, don't even try.
if (!Pointer.IsPotentiallyPHITranslatable())
goto PredTranslationFailure;
-
+
// We may have added values to the cache list before this PHI translation.
// If so, we haven't done anything to ensure that the cache remains sorted.
// Sort it now (if needed) so that recursive invocations of
@@ -1110,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
-
+
// Check to see if we have already visited this pred block with another
// pointer. If so, we can't do this lookup. This failure can occur
// with PHI translation when a critical edge exists and the PHI node in
@@ -1127,7 +1129,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// the analysis and can ignore it.
if (InsertRes.first->second == PredPtrVal)
continue;
-
+
// Otherwise, the block was previously analyzed with a different
// pointer. We can't represent the result of this case, so we just
// treat this as a phi translation failure.
@@ -1143,7 +1145,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Actually process results here; this need to be a separate loop to avoid
// calling getNonLocalPointerDepFromBB for blocks we don't want to return
- // any results for. (getNonLocalPointerDepFromBB will modify our
+ // any results for. (getNonLocalPointerDepFromBB will modify our
// datastructures in ways the code after the PredTranslationFailure label
// doesn't expect.)
for (unsigned i = 0; i < PredList.size(); i++) {
@@ -1186,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
continue;
}
}
-
+
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
-
+
// Since we did phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
@@ -1204,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// The following code is "failure"; we can't produce a sane translation
// for the given block. It assumes that we haven't modified any of
// our datastructures while processing the current block.
-
+
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
}
-
+
// Since we failed phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
// results from the set". Clear out the indicator for this.
CacheInfo->Pair = BBSkipFirstBlockPair();
-
+
// If *nothing* works, mark the pointer as unknown.
//
// If this is the magic first block, return this as a clobber of the whole
@@ -1225,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// we have to bail out.
if (SkipFirstBlock)
return true;
-
+
for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
assert(I != Cache->rend() && "Didn't find current block??");
if (I->getBB() != BB)
continue;
-
+
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
@@ -1250,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
/// CachedNonLocalPointerInfo, remove it.
void MemoryDependenceAnalysis::
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
- CachedNonLocalPointerInfo::iterator It =
+ CachedNonLocalPointerInfo::iterator It =
NonLocalPointerDeps.find(P);
if (It == NonLocalPointerDeps.end()) return;
-
+
// Remove all of the entries in the BB->val map. This involves removing
// instructions from the reverse map.
NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
-
+
for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
Instruction *Target = PInfo[i].getResult().getInst();
if (Target == 0) continue; // Ignore non-local dep results.
assert(Target->getParent() == PInfo[i].getBB());
-
+
// Eliminating the dirty entry from 'Cache', so update the reverse info.
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
}
-
+
// Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
NonLocalPointerDeps.erase(It);
}
@@ -1321,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// Remove this local dependency info.
LocalDeps.erase(LocalDepEntry);
}
-
+
// If we have any cached pointer dependencies on this instruction, remove
// them. If the instruction has non-pointer type, then it can't be a pointer
// base.
-
+
// Remove it from both the load info and the store info. The instruction
// can't be in either of these maps if it is non-pointer.
if (RemInst->getType()->isPointerTy()) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
}
-
+
// Loop over all of the things that depend on the instruction we're removing.
- //
+ //
SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
// If we find RemInst as a clobber or Def in any of the maps for other values,
@@ -1346,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
MemDepResult NewDirtyVal;
if (!RemInst->isTerminator())
NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
-
+
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
-
+
for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
E = ReverseDeps.end(); I != E; ++I) {
Instruction *InstDependingOnRemInst = *I;
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
-
+
LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
-
+
// Make sure to remember that new things depend on NewDepInst.
assert(NewDirtyVal.getInst() && "There is no way something else can have "
"a local dep on this if it is a terminator!");
- ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
InstDependingOnRemInst));
}
-
+
ReverseLocalDeps.erase(ReverseDepIt);
// Add new reverse deps after scanning the set, to avoid invalidating the
@@ -1379,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepsToAdd.pop_back();
}
}
-
+
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
I != E; ++I) {
assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
-
+
PerInstNLInfo &INLD = NonLocalDeps[*I];
// The information is now dirty!
INLD.second = true;
-
- for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
DE = INLD.first.end(); DI != DE; ++DI) {
if (DI->getResult().getInst() != RemInst) continue;
-
+
// Convert to a dirty entry for the subsequent instruction.
DI->setResult(NewDirtyVal);
-
+
if (Instruction *NextI = NewDirtyVal.getInst())
ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
}
@@ -1412,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepsToAdd.pop_back();
}
}
-
+
// If the instruction is in ReverseNonLocalPtrDeps then it appears as a
// value in the NonLocalPointerDeps info.
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
@@ -1420,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
-
+
for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
E = Set.end(); I != E; ++I) {
ValueIsLoadPair P = *I;
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
-
+
NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
-
+
// The cache is not valid for any specific block anymore.
NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
-
+
// Update any entries for RemInst to use the instruction after it.
for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
DI != DE; ++DI) {
if (DI->getResult().getInst() != RemInst) continue;
-
+
// Convert to a dirty entry for the subsequent instruction.
DI->setResult(NewDirtyVal);
-
+
if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
}
-
+
// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
// subsequent value may invalidate the sortedness.
std::sort(NLPDI.begin(), NLPDI.end());
}
-
+
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
-
+
while (!ReversePtrDepsToAdd.empty()) {
ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
.insert(ReversePtrDepsToAdd.back().second);
ReversePtrDepsToAdd.pop_back();
}
}
-
-
+
+
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
AA->deleteValue(RemInst);
DEBUG(verifyRemoved(RemInst));
@@ -1472,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
assert(I->second.getInst() != D &&
"Inst occurs in data structures");
}
-
+
for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
E = NonLocalPointerDeps.end(); I != E; ++I) {
assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
@@ -1481,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
II != E; ++II)
assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
}
-
+
for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
E = NonLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1490,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = INLD.first.end(); II != EE; ++II)
assert(II->getResult().getInst() != D && "Inst occurs in data structures");
}
-
+
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1498,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = I->second.end(); II != EE; ++II)
assert(*II != D && "Inst occurs in data structures");
}
-
+
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
@@ -1507,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = I->second.end(); II != EE; ++II)
assert(*II != D && "Inst occurs in data structures");
}
-
+
for (ReverseNonLocalPtrDepTy::const_iterator
I = ReverseNonLocalPtrDeps.begin(),
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
-
+
for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
E = I->second.end(); II != E; ++II)
assert(*II != ValueIsLoadPair(D, false) &&
*II != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+
}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 72421a00c7..976cd87321 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
+ Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
return TCC_Free;
// Otherwise it's not a no-op.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45b75df508..45dcc5e37e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
+ if (isKnownNonNull(V))
+ return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, TD, Depth))
return true;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4ded2818ed..bbb0432581 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,6 +112,17 @@ DIE::~DIE() {
delete Children[i];
}
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount += IncIndent;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 35d7959ac1..d087c540f2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -152,6 +152,9 @@ namespace llvm {
const std::vector<DIE *> &getChildren() const { return Children; }
const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
@@ -232,9 +235,10 @@ namespace llvm {
///
static unsigned BestForm(bool IsSigned, uint64_t Int) {
if (IsSigned) {
- if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
- if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
- if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
} else {
if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 01f15e52e8..1c743c2414 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
DwarfDebug *DW, DwarfUnits *DWU)
: UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(0) {
+ IndexTyDie(0), DebugInfoOffset(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 77bf6a9e50..2b180c6cc3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -87,6 +87,9 @@ class CompileUnit {
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
/// getLowerBoundDefault - Return the default lower bound for an array. If the
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
@@ -103,6 +106,7 @@ public:
unsigned getUniqueID() const { return UniqueID; }
unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -120,6 +124,7 @@ public:
return AccelTypes;
}
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 30bfa78989..b169602b0e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// If we're updating an abstract DIE, then we will be adding the children and
// object pointer later on. But what we don't want to do is process the
// concrete DIE twice.
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
// Pick up abstract subprogram DIE.
SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsSPDIE);
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
SPCU->addDie(SPDie);
} else {
DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -1692,15 +1697,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Compute the size and offset of all the DIEs.
void DwarfUnits::computeSizeAndOffsets() {
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
unsigned Offset =
sizeof(int32_t) + // Length of Compilation Unit Info
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t); // Pointer Size (in bytes)
- computeSizeAndOffset((*I)->getCUDie(), Offset);
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
}
}
@@ -1774,6 +1783,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
Asm->EmitInt32(Addr);
break;
}
@@ -1871,6 +1887,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
}
}
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 771bc362cb..81e345e628 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -274,6 +274,10 @@ public:
/// \brief Returns the address pool.
AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
};
/// \brief Collects and handles dwarf debug information.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 3b28e6afb6..7793e96c35 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
/// OrigIdx are also available with the same value at UseIdx.
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
- SlotIndex UseIdx) {
+ SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5e04f2d8a3..04321f3292 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
return BV;
}
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index a777f52cb2..1af00e84a6 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
VRegInfo[Reg].first = RC;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 103b058c13..c872355e37 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -2182,7 +2182,7 @@ public:
/// Callback to select the highest priority node from the ready Q.
virtual SUnit *pickNode(bool &IsTopNode) {
if (ReadyQ.empty()) return NULL;
- pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
IsTopNode = false;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32830f57bc..855a8c5f92 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -101,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TFI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 716fb93b29..61603e18f5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
DAG.getSetCC(N->getDebugLoc(),
TLI.getSetCCResultType(VT),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index e26d1656e8..b6436bf427 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
SDValue &OpEntry = PromotedIntegers[Op];
assert(OpEntry.getNode() == 0 && "Node is already promoted!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
SDValue &OpEntry = SoftenedFloats[Op];
assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = ScalarizedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
@@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
@@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
@@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already split");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = WidenedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node already widened!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
@@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index d2269f8acc..7e7b8974be 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -33,8 +33,10 @@ class SDNodeOrdering {
public:
SDNodeOrdering() {}
- void add(const SDNode *Node, unsigned O) {
- OrderMap[Node] = O;
+ void add(const SDNode *Node, unsigned NewOrder) {
+ unsigned &OldOrder = OrderMap[Node];
+ if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+ OldOrder = NewOrder;
}
void remove(const SDNode *Node) {
DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 35707e86ce..64244313a3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- if (ISD::isZEXTLoad(Op.getNode())) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
break;
}
- // Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
- unsigned ExtType = LD->getExtensionType();
- switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp;
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 39a1f8a3d0..eeea9e4cfc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TargetSubtargetInfo &ST =
const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -784,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() {
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
- if (ResNode)
+ if (ResNode) {
+ // Propagate ordering
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
ReplaceUses(Node, ResNode);
+ }
// If after the replacement this node is not used any more,
// remove this dead node.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 320128a999..c5bbba3ffc 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -29,6 +29,7 @@
#define DEBUG_TYPE "spillplacement"
#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9a9ddc9b48..3bdca4c640 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -742,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
- if (Kind.isThreadLocal())
- return ".tls$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index f31f67d58c..783bfa1c1a 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency(
report_fatal_error(ss.str());
}
#endif
- return DefMI->isTransient() ? 0 : 1;
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
}
unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index 2efbfd1f92..7dff9ff49a 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
void DWARFDebugArangeSet::clear() {
Offset = -1U;
- std::memset(&Header, 0, sizeof(Header));
+ std::memset(&HeaderData, 0, sizeof(Header));
ArangeDescriptors.clear();
}
@@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
// descriptor on the target system. This header is followed by a series
// of tuples. Each tuple consists of an address and a length, each in
// the size appropriate for an address on the target architecture.
- Header.Length = data.getU32(offset_ptr);
- Header.Version = data.getU16(offset_ptr);
- Header.CuOffset = data.getU32(offset_ptr);
- Header.AddrSize = data.getU8(offset_ptr);
- Header.SegSize = data.getU8(offset_ptr);
+ HeaderData.Length = data.getU32(offset_ptr);
+ HeaderData.Version = data.getU16(offset_ptr);
+ HeaderData.CuOffset = data.getU32(offset_ptr);
+ HeaderData.AddrSize = data.getU8(offset_ptr);
+ HeaderData.SegSize = data.getU8(offset_ptr);
// Perform basic validation of the header fields.
- if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) ||
- (Header.AddrSize != 4 && Header.AddrSize != 8)) {
+ if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) ||
+ (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) {
clear();
return false;
}
@@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
// size of an address). The header is padded, if necessary, to the
// appropriate boundary.
const uint32_t header_size = *offset_ptr - Offset;
- const uint32_t tuple_size = Header.AddrSize * 2;
+ const uint32_t tuple_size = HeaderData.AddrSize * 2;
uint32_t first_tuple_offset = 0;
while (first_tuple_offset < header_size)
first_tuple_offset += tuple_size;
@@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
Descriptor arangeDescriptor;
assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
- assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize);
+ assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize);
while (data.isValidOffset(*offset_ptr)) {
- arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize);
- arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize);
+ arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+ arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
// Each set of tuples is terminated by a 0 for the address and 0
// for the length.
@@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
- Header.Length, Header.Version)
+ HeaderData.Length, HeaderData.Version)
<< format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
- Header.CuOffset, Header.AddrSize, Header.SegSize);
+ HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize);
- const uint32_t hex_width = Header.AddrSize * 2;
+ const uint32_t hex_width = HeaderData.AddrSize * 2;
for (DescriptorConstIter pos = ArangeDescriptors.begin(),
end = ArangeDescriptors.end(); pos != end; ++pos)
OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
@@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
std::find_if(ArangeDescriptors.begin(), end, // Range
DescriptorContainsAddress(address)); // Predicate
if (pos != end)
- return Header.CuOffset;
+ return HeaderData.CuOffset;
return -1U;
}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index 9a2a6d0f00..d76867615a 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -48,7 +48,7 @@ private:
typedef DescriptorColl::const_iterator DescriptorConstIter;
uint32_t Offset;
- Header Header;
+ Header HeaderData;
DescriptorColl ArangeDescriptors;
public:
@@ -58,11 +58,11 @@ public:
bool extract(DataExtractor data, uint32_t *offset_ptr);
void dump(raw_ostream &OS) const;
- uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; }
- uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; }
+ uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
+ uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
uint32_t findAddress(uint64_t address) const;
uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
- const struct Header &getHeader() const { return Header; }
+ const struct Header &getHeader() const { return HeaderData; }
const Descriptor *getDescriptor(uint32_t i) const {
if (i < ArangeDescriptors.size())
return &ArangeDescriptors[i];
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index b077eb5e38..f79862d606 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -186,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
Range range(address);
RangeCollIterator begin = Aranges.begin();
RangeCollIterator end = Aranges.end();
- RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan);
+ RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
return pos->Offset;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index ed2bf05e90..2d828914cd 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -649,6 +649,13 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
}
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+ StringRef Kind) const {
+ llvm::AttrBuilder B;
+ B.addAttribute(Kind);
+ return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+}
+
AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
AttributeSet Attrs) const {
if (!pImpl) return Attrs;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 0c7effb5ca..1abb656435 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -47,6 +47,16 @@ bool Constant::isNegativeZeroValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->isZero() && CFP->isNegative();
+ // Equivalent for a vector of -0.0's.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ return true;
+
+ // We've already handled true FP case; any other FP vectors can't represent -0.0.
+ if (getType()->isFPOrFPVectorTy())
+ return false;
+
// Otherwise, just use +0.0.
return isNullValue();
}
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 5ee36abc6b..a97b740d26 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -71,6 +71,16 @@ static MDNode *getNonCompileUnitScope(MDNode *N) {
return N;
}
+static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
+ StringRef Directory) {
+ assert(!Filename.empty() && "Unable to create file without name");
+ Value *Pair[] = {
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ };
+ return MDNode::get(VMContext, Pair);
+}
+
/// createCompileUnit - A CompileUnit provides an anchor for all debugging
/// information generated during this instance of compilation.
void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
@@ -93,13 +103,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ createFilePathPair(VMContext, Filename, Directory),
ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
- MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
MDString::get(VMContext, Producer),
- // isMain field can be removed when we remove the legacy debug info.
- ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
MDString::get(VMContext, Flags),
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
@@ -119,13 +125,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
/// createFile - Create a file descriptor to hold debugging information
/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
- assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
- assert(!Filename.empty() && "Unable to create file without name");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
- MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
- NULL // TheCU
+ createFilePathPair(VMContext, Filename, Directory)
};
return DIFile(MDNode::get(VMContext, Elts));
}
@@ -148,9 +150,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) {
// ,size, alignment, offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -171,9 +173,9 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
// offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ NULL, // File/directory name
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -190,9 +192,9 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, StringRef()), // Empty name.
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -210,9 +212,9 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -227,9 +229,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base)
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
+ NULL, // Filename
NULL, //TheCU,
NULL,
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -248,9 +250,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
// References are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ NULL, // Filename
NULL, // TheCU,
NULL, // Name
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -268,9 +270,9 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
assert(Ty.Verify() && "Invalid typedef type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -288,9 +290,9 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
assert(FriendTy.Verify() && "Invalid friend type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ NULL,
Ty,
NULL, // Name
- Ty.getFile(),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -309,9 +311,9 @@ DIDerivedType DIBuilder::createInheritance(
// TAG_inheritance is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ NULL,
Ty,
NULL, // Name
- Ty.getFile(),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -330,9 +332,9 @@ DIDerivedType DIBuilder::createMemberType(
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -353,9 +355,9 @@ DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
Flags |= DIDescriptor::FlagStaticMember;
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
@@ -379,9 +381,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(File),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -406,9 +408,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(File),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -492,9 +494,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -524,9 +526,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
// TAG_structure_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -551,9 +553,9 @@ DICompositeType DIBuilder::createUnionType(
// TAG_union_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -574,8 +576,8 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- MDString::get(VMContext, ""),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ MDString::get(VMContext, ""),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -598,9 +600,9 @@ DICompositeType DIBuilder::createEnumerationType(
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -622,9 +624,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
// TAG_array_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
NULL, //TheCU,
MDString::get(VMContext, ""),
- NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -645,9 +647,9 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
// A vector is an array type with the FlagVector flag applied.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
NULL, //TheCU,
MDString::get(VMContext, ""),
- NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -725,29 +727,6 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
return DIDescriptor(MDNode::get(VMContext, Elts));
}
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType() {
- // Give the temporary MDNode a tag. It doesn't matter what tag we
- // use here as long as DIType accepts it.
- Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
- MDNode *Node = MDNode::getTemporary(VMContext, Elts);
- return DIType(Node);
-}
-
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType(DIFile F) {
- // Give the temporary MDNode a tag. It doesn't matter what tag we
- // use here as long as DIType accepts it.
- Value *Elts[] = {
- GetTagConstant(VMContext, DW_TAG_base_type),
- TheCU,
- NULL,
- F
- };
- MDNode *Node = MDNode::getTemporary(VMContext, Elts);
- return DIType(Node);
-}
-
/// createForwardDecl - Create a temporary forward-declared type that
/// can be RAUW'd if the full type is seen.
DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
@@ -758,9 +737,9 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
// Create a temporary MDNode.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ F.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- F,
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -798,17 +777,18 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
return DISubrange(MDNode::get(VMContext, Elts));
}
-/// createGlobalVariable - Create a new descriptor for the specified global.
+/// \brief Create a new descriptor for the specified global.
DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
- DIType Ty, bool isLocalToUnit, Value *Val) {
+createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
+ unsigned LineNumber, DIType Ty, bool isLocalToUnit,
+ Value *Val) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
NULL, // TheCU,
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
Ty,
@@ -822,6 +802,14 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
return DIGlobalVariable(Node);
}
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, Value *Val) {
+ return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
+ Val);
+}
+
/// createStaticVariable - Create a new descriptor for the specified static
/// variable.
DIGlobalVariable DIBuilder::
@@ -920,12 +908,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -946,7 +933,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
// Create a named metadata so that we do not lose this mdnode.
if (isDefinition)
AllSubprograms.push_back(Node);
- return DISubprogram(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createFunction should return a valid DISubprogram");
+ return S;
}
/// createMethod - Create a new descriptor for the specified C++ method.
@@ -966,12 +955,11 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ F.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
- F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -991,7 +979,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
MDNode *Node = MDNode::get(VMContext, Elts);
if (isDefinition)
AllSubprograms.push_back(Node);
- return DISubprogram(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createMethod should return a valid DISubprogram");
+ return S;
}
/// createNameSpace - This creates new descriptor for a namespace
@@ -1000,9 +990,9 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
DINameSpace R(MDNode::get(VMContext, Elts));
@@ -1033,10 +1023,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
static unsigned int unique_id = 0;
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ File,
getNonCompileUnitScope(Scope),
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt32Ty(VMContext), Col),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
};
DILexicalBlock R(MDNode::get(VMContext, Elts));
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index f09de3a731..9596674912 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -438,6 +438,12 @@ DataLayout::~DataLayout() {
delete static_cast<StructLayoutMap*>(LayoutMap);
}
+bool DataLayout::doFinalization(Module &M) {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+ LayoutMap = 0;
+ return false;
+}
+
const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
@@ -504,47 +510,6 @@ std::string DataLayout::getStringRepresentation() const {
}
-uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
- assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
- switch (Ty->getTypeID()) {
- case Type::LabelTyID:
- return getPointerSizeInBits(0);
- case Type::PointerTyID: {
- unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
- return getPointerSizeInBits(AS);
- }
- case Type::ArrayTyID: {
- ArrayType *ATy = cast<ArrayType>(Ty);
- return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
- }
- case Type::StructTyID:
- // Get the layout annotation... which is lazily created on demand.
- return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::IntegerTyID:
- return cast<IntegerType>(Ty)->getBitWidth();
- case Type::HalfTyID:
- return 16;
- case Type::FloatTyID:
- return 32;
- case Type::DoubleTyID:
- case Type::X86_MMXTyID:
- return 64;
- case Type::PPC_FP128TyID:
- case Type::FP128TyID:
- return 128;
- // In memory objects this is always aligned to a higher boundary, but
- // only 80 bits contain information.
- case Type::X86_FP80TyID:
- return 80;
- case Type::VectorTyID: {
- VectorType *VTy = cast<VectorType>(Ty);
- return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType());
- }
- default:
- llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
- }
-}
-
/*!
\param abi_or_pref Flag that determines which alignment is returned. true
returns the ABI alignment, false returns the preferred alignment.
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index e85d4adf77..91968d8fbf 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -66,18 +66,28 @@ bool DIDescriptor::Verify() const {
DITemplateValueParameter(DbgNode).Verify());
}
-StringRef
-DIDescriptor::getStringField(unsigned Elt) const {
- if (DbgNode == 0)
- return StringRef();
+static Value *getField(const MDNode *DbgNode, unsigned Elt) {
+ if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
+ return 0;
+ return DbgNode->getOperand(Elt);
+}
- if (Elt < DbgNode->getNumOperands())
- if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
- return MDS->getString();
+static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+ if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)))
+ return R;
+ return 0;
+}
+static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
+ if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
+ return MDS->getString();
return StringRef();
}
+StringRef DIDescriptor::getStringField(unsigned Elt) const {
+ return ::getStringField(DbgNode, Elt);
+}
+
uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
if (DbgNode == 0)
return 0;
@@ -407,7 +417,7 @@ bool DICompileUnit::Verify() const {
if (N.empty())
return false;
// It is possible that directory and produce string is empty.
- return DbgNode->getNumOperands() == 15;
+ return DbgNode->getNumOperands() == 12;
}
/// Verify - Verify that an ObjC property is well formed.
@@ -475,7 +485,7 @@ bool DISubprogram::Verify() const {
DICompositeType Ty = getType();
if (!Ty.Verify())
return false;
- return DbgNode->getNumOperands() == 21;
+ return DbgNode->getNumOperands() == 20;
}
/// Verify - Verify that a global variable descriptor is well formed.
@@ -529,9 +539,14 @@ bool DINameSpace::Verify() const {
return DbgNode->getNumOperands() == 5;
}
+/// \brief Retrieve the MDNode for the directory/file pair.
+MDNode *DIFile::getFileNode() const {
+ return const_cast<MDNode*>(getNodeField(DbgNode, 1));
+}
+
/// \brief Verify that the file descriptor is well formed.
bool DIFile::Verify() const {
- return isFile() && DbgNode->getNumOperands() == 4;
+ return isFile() && DbgNode->getNumOperands() == 2;
}
/// \brief Verify that the enumerator descriptor is well formed.
@@ -627,21 +642,21 @@ bool DISubprogram::describes(const Function *F) {
unsigned DISubprogram::isOptimized() const {
assert (DbgNode && "Invalid subprogram descriptor!");
- if (DbgNode->getNumOperands() == 16)
- return getUnsignedField(15);
+ if (DbgNode->getNumOperands() == 15)
+ return getUnsignedField(14);
return 0;
}
MDNode *DISubprogram::getVariablesNodes() const {
- if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
return NULL;
- return dyn_cast_or_null<MDNode>(DbgNode->getOperand(19));
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18));
}
DIArray DISubprogram::getVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
return DIArray();
- if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+ if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)))
return DIArray(T);
return DIArray();
}
@@ -653,17 +668,7 @@ StringRef DIScope::getFilename() const {
return DILexicalBlockFile(DbgNode).getFilename();
if (isLexicalBlock())
return DILexicalBlock(DbgNode).getFilename();
- if (isSubprogram())
- return DISubprogram(DbgNode).getFilename();
- if (isCompileUnit())
- return DICompileUnit(DbgNode).getFilename();
- if (isNameSpace())
- return DINameSpace(DbgNode).getFilename();
- if (isType())
- return DIType(DbgNode).getFilename();
- if (isFile())
- return DIFile(DbgNode).getFilename();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 0);
}
StringRef DIScope::getDirectory() const {
@@ -673,52 +678,42 @@ StringRef DIScope::getDirectory() const {
return DILexicalBlockFile(DbgNode).getDirectory();
if (isLexicalBlock())
return DILexicalBlock(DbgNode).getDirectory();
- if (isSubprogram())
- return DISubprogram(DbgNode).getDirectory();
- if (isCompileUnit())
- return DICompileUnit(DbgNode).getDirectory();
- if (isNameSpace())
- return DINameSpace(DbgNode).getDirectory();
- if (isType())
- return DIType(DbgNode).getDirectory();
- if (isFile())
- return DIFile(DbgNode).getDirectory();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 1);
}
DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
return DIArray(N);
return DIArray();
}
@@ -1026,6 +1021,8 @@ void DIDescriptor::print(raw_ostream &OS) const {
DIVariable(DbgNode).printInternal(OS);
} else if (this->isObjCProperty()) {
DIObjCProperty(DbgNode).printInternal(OS);
+ } else if (this->isNameSpace()) {
+ DINameSpace(DbgNode).printInternal(OS);
} else if (this->isScope()) {
DIScope(DbgNode).printInternal(OS);
}
@@ -1099,6 +1096,14 @@ void DICompositeType::printInternal(raw_ostream &OS) const {
OS << " [" << A.getNumElements() << " elements]";
}
+void DINameSpace::printInternal(raw_ostream &OS) const {
+ StringRef Name = getName();
+ if (!Name.empty())
+ OS << " [" << Name << ']';
+
+ OS << " [line " << getLineNumber() << ']';
+}
+
void DISubprogram::printInternal(raw_ostream &OS) const {
// TODO : Print context
OS << " [line " << getLineNumber() << ']';
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 5559a6c56e..1e72b90a13 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -211,7 +211,7 @@ Function::~Function() {
clearGC();
// Remove the intrinsicID from the Cache.
- if(getValueName() && isIntrinsic())
+ if (getValueName() && isIntrinsic())
getContext().pImpl->IntrinsicIDCache.erase(this);
}
@@ -352,7 +352,7 @@ unsigned Function::getIntrinsicID() const {
LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
getContext().pImpl->IntrinsicIDCache;
- if(!IntrinsicIDCache.count(this)) {
+ if (!IntrinsicIDCache.count(this)) {
unsigned Id = lookupIntrinsicID();
IntrinsicIDCache[this]=Id;
return Id;
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 2e3a525826..841cc5926a 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -256,6 +256,13 @@ void LandingPadInst::addClause(Value *Val) {
OperandList[OpNo] = Val;
}
+bool LandingPadInst::hasCatchAll() const {
+ for (unsigned I = 0, E = getNumClauses(); I != E; ++I)
+ if (isCatch(I) && isa<ConstantPointerNull>(getClause(I)))
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// CallInst Implementation
//===----------------------------------------------------------------------===//
@@ -3000,8 +3007,8 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
uint32_t BitWidth = C.getBitWidth();
switch (pred) {
default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
- case ICmpInst::ICMP_EQ: Upper++; break;
- case ICmpInst::ICMP_NE: Lower++; break;
+ case ICmpInst::ICMP_EQ: ++Upper; break;
+ case ICmpInst::ICMP_NE: ++Lower; break;
case ICmpInst::ICMP_ULT:
Lower = APInt::getMinValue(BitWidth);
// Check for an empty-set condition.
@@ -3015,25 +3022,25 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_UGT:
- Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ ++Lower; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
// Check for an empty-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_SGT:
- Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ ++Lower; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
// Check for an empty-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_ULE:
- Lower = APInt::getMinValue(BitWidth); Upper++;
+ Lower = APInt::getMinValue(BitWidth); ++Upper;
// Check for a full-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/true);
break;
case ICmpInst::ICMP_SLE:
- Lower = APInt::getSignedMinValue(BitWidth); Upper++;
+ Lower = APInt::getSignedMinValue(BitWidth); ++Upper;
// Check for a full-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/true);
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 0acbcfadaf..74cbdadd61 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -17,13 +17,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index c8ea8ff0a9..74d24f278b 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -15,7 +15,6 @@
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
@@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname,
LLVMContext& C, unsigned flags):
Context(C),
Composite(new Module(modname, C)),
- LibPaths(),
Flags(flags),
Error(),
ProgramName(progname) { }
@@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname,
Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
Context(aModule->getContext()),
Composite(aModule),
- LibPaths(),
Flags(flags),
Error(),
ProgramName(progname) { }
@@ -63,27 +60,9 @@ Linker::verbose(StringRef message) {
errs() << " " << message << "\n";
}
-void
-Linker::addPath(const sys::Path& path) {
- LibPaths.push_back(path);
-}
-
-void
-Linker::addPaths(const std::vector<std::string>& paths) {
- for (unsigned i = 0, e = paths.size(); i != e; ++i)
- LibPaths.push_back(sys::Path(paths[i]));
-}
-
-void
-Linker::addSystemPaths() {
- sys::Path::GetBitcodeLibraryPaths(LibPaths);
- LibPaths.insert(LibPaths.begin(),sys::Path("./"));
-}
-
Module*
Linker::releaseModule() {
Module* result = Composite;
- LibPaths.clear();
Error.clear();
Composite = 0;
Flags = 0;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 26d378e6c0..9adcc02b71 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -304,8 +304,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
// Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
// to not be less than one. This needs to be change to be not less than zero.
- std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
- std::vector<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
// Make space for this FileNumber in the MCDwarfFiles vector if needed.
if (FileNumber >= MCDwarfFiles.size()) {
MCDwarfFiles.resize(FileNumber + 1);
@@ -366,7 +366,7 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
/// currently is assigned and false otherwise.
bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
- std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
return false;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index d3fa906a06..4766b37476 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
// Get the assembler info needed to setup the MCContext.
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
- assert(MAI && "Unable to create target asm info!");
+ if (!MAI)
+ return 0;
const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
- assert(MII && "Unable to create target instruction info!");
+ if (!MII)
+ return 0;
const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
- assert(MRI && "Unable to create target register info!");
+ if (!MRI)
+ return 0;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
FeaturesStr);
- assert(STI && "Unable to create subtarget info!");
+ if (!STI)
+ return 0;
// Set up the MCContext for creating symbols and MCExpr's.
MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
- assert(Ctx && "Unable to create MCContext!");
+ if (!Ctx)
+ return 0;
// Set up disassembler.
MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
- assert(DisAsm && "Unable to create disassembler!");
+ if (!DisAsm)
+ return 0;
DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
*MAI, *MII, *MRI, *STI);
- assert(IP && "Unable to create instruction printer!");
+ if (!IP)
+ return 0;
LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
TheTarget, MAI, MRI,
STI, MII, Ctx, DisAsm, IP);
- assert(DC && "Allocation failure!");
+ if (!DC)
+ return 0;
return DC;
}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index c81abe952a..0f8f0741bd 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -298,7 +298,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
// Put out the directory and file tables.
// First the directory table.
- const std::vector<StringRef> &MCDwarfDirs =
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs(CUID);
for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
@@ -307,7 +307,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
MCOS->EmitIntValue(0, 1); // Terminate the directory list
// Second the file table.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles(CUID);
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
@@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// AT_name, the name of the source file. Reconstruct from the first directory
// and file table entries.
- const std::vector<StringRef> &MCDwarfDirs =
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs();
if (MCDwarfDirs.size() > 0) {
MCOS->EmitBytes(MCDwarfDirs[0]);
MCOS->EmitBytes("/");
}
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles();
MCOS->EmitBytes(MCDwarfFiles[1]->getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 2e1a045569..bafa002e3d 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -223,6 +223,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
}
void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+ // FIXME: Check this. Mips64el is using the base values, which is most likely
+ // incorrect.
+ if (T.getArch() != Triple::mips64el)
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
if (T.getArch() == Triple::x86) {
PersonalityEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
@@ -230,15 +235,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
LSDAEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
- FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_)
+ FDEEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
TTypeEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
} else if (T.getArch() == Triple::x86_64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
@@ -261,8 +264,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
}
} else if (T.getArch() == Triple::aarch64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -282,7 +283,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
} else if (T.getArch() == Triple::ppc64) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 6ab49ec92c..804734cea9 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
return TokError("unmatched .ifs or .elses");
// Check to see there are no empty DwarfFile slots.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
if (!MCDwarfFiles[i])
@@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
if (CppHashFilename.size() != 0) {
if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
@@ -4105,12 +4105,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
// Immediate.
- if (Operand->isImm()) {
- if (Operand->needAsmRewrite())
- AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix,
- Operand->getStartLoc()));
+ if (Operand->isImm())
continue;
- }
// Register operand.
if (Operand->isReg() && !Operand->needAddressOf()) {
@@ -4131,11 +4127,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
continue;
bool isOutput = (i == 1) && Desc.mayStore();
- if (Operand->isMem() && Operand->needSizeDirective())
- AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
- Operand->getStartLoc(), /*Len*/0,
- Operand->getMemSize()));
-
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
@@ -4184,28 +4175,31 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
// Build the IR assembly string.
std::string AsmStringIR;
- AsmRewriteKind PrevKind = AOK_Imm;
raw_string_ostream OS(AsmStringIR);
- const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
E = AsmStrRewrites.end();
I != E; ++I) {
const char *Loc = (*I).Loc.getPointer();
- assert(Loc >= Start && "Expected Loc to be after Start!");
+ assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
unsigned AdditionalSkip = 0;
AsmRewriteKind Kind = (*I).Kind;
- // Emit everything up to the immediate/expression. If the previous rewrite
- // was a size directive, then this has already been done.
- if (PrevKind != AOK_SizeDirective)
- OS << StringRef(Start, Loc - Start);
- PrevKind = Kind;
+ // Emit everything up to the immediate/expression.
+ unsigned Len = Loc - AsmStart;
+ if (Len) {
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
+ --Len;
+ OS << StringRef(AsmStart, Len);
+ }
// Skip the original expression.
if (Kind == AOK_Skip) {
- Start = Loc + (*I).Len;
+ AsmStart = Loc + (*I).Len;
continue;
}
@@ -4254,14 +4248,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
}
// Skip the original expression.
- if (Kind != AOK_SizeDirective)
- Start = Loc + (*I).Len + AdditionalSkip;
+ AsmStart = Loc + (*I).Len + AdditionalSkip;
+
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
+ *AsmStart == ']')
+ ++AsmStart;
}
// Emit the remainder of the asm string.
- const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
- if (Start != AsmEnd)
- OS << StringRef(Start, AsmEnd - Start);
+ if (AsmStart != AsmEnd)
+ OS << StringRef(AsmStart, AsmEnd - AsmStart);
AsmString = OS.str();
return false;
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 51ef415542..d02e5535bd 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
: Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.push_back(std::make_pair(section, section));
}
@@ -40,7 +40,7 @@ void MCStreamer::reset() {
EmitDebugFrame = false;
CurrentW64UnwindInfo = 0;
LastSymbol = 0;
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.clear();
SectionStack.push_back(std::make_pair(section, section));
}
@@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
if (FrameInfos.empty())
- return NULL;
+ return 0;
return &FrameInfos.back();
}
@@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
- MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
CurFrame->LastFrameInst = CurFrame->Instructions.size();
CurFrame->Instructions.push_back(Inst);
}
@@ -623,5 +623,5 @@ void MCStreamer::Finish() {
MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
report_fatal_error("Not supported!");
- return *(static_cast<MCSymbolData*> (NULL));
+ return *(static_cast<MCSymbolData*>(0));
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 5b68fbb270..6182e34150 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3311,10 +3311,8 @@ namespace {
significand = significand.udiv(divisor);
- // Truncate the significand down to its active bit count, but
- // don't try to drop below 32.
- unsigned newPrecision = std::max(32U, significand.getActiveBits());
- significand = significand.trunc(newPrecision);
+ // Truncate the significand down to its active bit count.
+ significand = significand.trunc(significand.getActiveBits());
}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 07cb057b48..e8534753b4 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const {
if (lhsNeg) {
// Sign bit is set so perform two's complement to make it positive
lhs.flipAllBits();
- lhs++;
+ ++lhs;
}
if (rhsNeg) {
// Sign bit is set so perform two's complement to make it positive
rhs.flipAllBits();
- rhs++;
+ ++rhs;
}
// Now we have unsigned values to compare so do the comparison if necessary
@@ -2116,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
}
// If its negative, put it in two's complement form
if (isNeg) {
- (*this)--;
+ --(*this);
this->flipAllBits();
}
}
@@ -2197,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
// Flip the bits and add one to turn it into the equivalent positive
// value and put a '-' in the result.
Tmp.flipAllBits();
- Tmp++;
+ ++Tmp;
Str.push_back('-');
}
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index cd430f218b..1ee69b6023 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath,
if (EC)
return EC;
- OwningPtr<mapped_file_region> MappedFile(
- new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC));
+ OwningPtr<mapped_file_region> MappedFile(new mapped_file_region(
+ FD, true, mapped_file_region::readwrite, Size, 0, EC));
if (EC)
return EC;
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4c558b37cf..8042237253 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -72,13 +72,15 @@ static void CopyStringRef(char *Memory, StringRef Data) {
Memory[Data.size()] = 0; // Null terminate string.
}
-/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
-template <typename T>
-static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
- bool RequiresNullTerminator) {
- char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
- CopyStringRef(Mem + sizeof(T), Name);
- return new (Mem) T(Buffer, RequiresNullTerminator);
+struct NamedBufferAlloc {
+ StringRef Name;
+ NamedBufferAlloc(StringRef Name) : Name(Name) {}
+};
+
+void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
+ char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
+ CopyStringRef(Mem + N, Alloc.Name);
+ return Mem;
}
namespace {
@@ -105,8 +107,8 @@ public:
MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
StringRef BufferName,
bool RequiresNullTerminator) {
- return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
- RequiresNullTerminator);
+ return new (NamedBufferAlloc(BufferName))
+ MemoryBufferMem(InputData, RequiresNullTerminator);
}
/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
@@ -183,24 +185,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
//===----------------------------------------------------------------------===//
namespace {
-/// MemoryBufferMMapFile - This represents a file that was mapped in with the
-/// sys::Path::MapInFilePages method. When destroyed, it calls the
-/// sys::Path::UnMapFilePages method.
-class MemoryBufferMMapFile : public MemoryBufferMem {
-public:
- MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
- : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+///
+/// This handles converting the offset into a legal offset on the platform.
+class MemoryBufferMMapFile : public MemoryBuffer {
+ sys::fs::mapped_file_region MFR;
+
+ static uint64_t getLegalMapOffset(uint64_t Offset) {
+ return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
+ }
+
+ static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
+ return Len + (Offset - getLegalMapOffset(Offset));
+ }
- ~MemoryBufferMMapFile() {
- static int PageSize = sys::process::get_self()->page_size();
+ const char *getStart(uint64_t Len, uint64_t Offset) {
+ return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
+ }
- uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
- size_t Size = getBufferSize();
- uintptr_t RealStart = Start & ~(PageSize - 1);
- size_t RealSize = Size + (Start - RealStart);
+public:
+ MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+ uint64_t Offset, error_code EC)
+ : MFR(FD, false, sys::fs::mapped_file_region::readonly,
+ getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
+ if (!EC) {
+ const char *Start = getStart(Len, Offset);
+ init(Start, Start + Len, RequiresNullTerminator);
+ }
+ }
- sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
- RealSize);
+ virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char *>(this + 1);
}
virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
@@ -344,17 +360,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
PageSize)) {
- off_t RealMapOffset = Offset & ~(PageSize - 1);
- off_t Delta = Offset - RealMapOffset;
- size_t RealMapSize = MapSize + Delta;
-
- if (const char *Pages = sys::Path::MapInFilePages(FD,
- RealMapSize,
- RealMapOffset)) {
- result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
- StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+ error_code EC;
+ result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
+ RequiresNullTerminator, FD, MapSize, Offset, EC));
+ if (!EC)
return error_code::success();
- }
}
MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 41add96194..58a6ea720e 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -18,6 +18,9 @@
#include <cctype>
#include <cstdio>
#include <cstring>
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
namespace {
using llvm::StringRef;
@@ -493,6 +496,27 @@ bool is_separator(char value) {
void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.clear();
+#ifdef __APPLE__
+ // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+ int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+ : _CS_DARWIN_USER_CACHE_DIR;
+ size_t ConfLen = confstr(ConfName, 0, 0);
+ if (ConfLen > 0) {
+ do {
+ result.resize(ConfLen);
+ ConfLen = confstr(ConfName, result.data(), result.size());
+ } while (ConfLen > 0 && ConfLen != result.size());
+
+ if (ConfLen > 0) {
+ assert(result.back() == 0);
+ result.pop_back();
+ return;
+ }
+
+ result.clear();
+ }
+#endif
+
// Check whether the temporary directory is specified by an environment
// variable.
const char *EnvironmentVariable;
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index e00394ec6a..e9b26bdb80 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr,
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
# elif defined(__mips__)
const char *Start = static_cast<const char *>(Addr);
+# if defined(ANDROID)
+ // The declaration of "cacheflush" in Android bionic:
+ // extern int cacheflush(long start, long end, long flags);
+ const char *End = Start + Len;
+ long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
+ long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
+ cacheflush(LStart, LEnd, BCACHE);
+# else
cacheflush(const_cast<char *>(Start), Len, BCACHE);
+# endif
# endif
#endif // end apple
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 44b31b3202..a3dfd4b0a3 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -475,12 +475,14 @@ rety_open_create:
return error_code::success();
}
-error_code mapped_file_region::init(int fd, uint64_t offset) {
- AutoFD FD(fd);
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+ AutoFD ScopedFD(FD);
+ if (!CloseFD)
+ ScopedFD.take();
// Figure out how large the file is.
struct stat FileInfo;
- if (fstat(fd, &FileInfo) == -1)
+ if (fstat(FD, &FileInfo) == -1)
return error_code(errno, system_category());
uint64_t FileSize = FileInfo.st_size;
@@ -488,7 +490,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
Size = FileSize;
else if (FileSize < Size) {
// We need to grow the file.
- if (ftruncate(fd, Size) == -1)
+ if (ftruncate(FD, Size) == -1)
return error_code(errno, system_category());
}
@@ -497,7 +499,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
#ifdef MAP_FILE
flags |= MAP_FILE;
#endif
- Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+ Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
return error_code(errno, system_category());
return error_code::success();
@@ -526,12 +528,13 @@ mapped_file_region::mapped_file_region(const Twine &path,
return;
}
- ec = init(ofd, offset);
+ ec = init(ofd, true, offset);
if (ec)
Mapping = 0;
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -545,7 +548,7 @@ mapped_file_region::mapped_file_region(int fd,
return;
}
- ec = init(fd, offset);
+ ec = init(fd, closefd, offset);
if (ec)
Mapping = 0;
}
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 2e6cc96e7f..23f3d14f91 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -593,6 +593,10 @@ retry_random_path:
random_path_utf16.push_back(0);
random_path_utf16.pop_back();
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
// Try to create + open the path.
retry_create_file:
HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
@@ -610,7 +614,9 @@ retry_create_file:
if (ec == windows_error::file_exists)
goto retry_random_path;
// Check for non-existing parent directories.
- if (ec == windows_error::path_not_found) {
+ if (ec == windows_error::path_not_found && !TriedToCreateParent) {
+ TriedToCreateParent = true;
+
// Create the directories using result_path as temp storage.
if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
random_path_utf16.size(), result_path))
@@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len,
return error_code::success();
}
-error_code mapped_file_region::init(int FD, uint64_t Offset) {
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
FileDescriptor = FD;
// Make sure that the requested size fits within SIZE_T.
if (Size > std::numeric_limits<SIZE_T>::max()) {
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return make_error_code(errc::invalid_argument);
}
@@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
0);
if (FileMappingHandle == NULL) {
error_code ec = windows_error(GetLastError());
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
if (Mapping == NULL) {
error_code ec = windows_error(GetLastError());
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
error_code ec = windows_error(GetLastError());
::UnmapViewOfFile(Mapping);
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
Size = mbi.RegionSize;
}
+
+ // Close all the handles except for the view. It will keep the other handles
+ // alive.
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor); // Also closes FileHandle.
+ } else
+ ::CloseHandle(FileHandle);
return error_code::success();
}
@@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
FileDescriptor = 0;
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, true, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd,
, FileMappingHandle() {
FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
if (FileHandle == INVALID_HANDLE_VALUE) {
- _close(FileDescriptor);
+ if (closefd)
+ _close(FileDescriptor);
FileDescriptor = 0;
ec = make_error_code(errc::bad_file_descriptor);
return;
}
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, closefd, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd,
mapped_file_region::~mapped_file_region() {
if (Mapping)
::UnmapViewOfFile(Mapping);
- if (FileMappingHandle)
- ::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else if (FileHandle != INVALID_HANDLE_VALUE)
- ::CloseHandle(FileHandle);
}
#if LLVM_HAS_RVALUE_REFERENCES
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f71abd3b24..da26a371a7 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
size_t BytesToWrite = Size - (Size % NumBytes);
write_impl(Ptr, BytesToWrite);
- copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+ size_t BytesRemaining = Size - BytesToWrite;
+ if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
+ // Too much left over to copy into our buffer.
+ return write(Ptr + BytesToWrite, BytesRemaining);
+ }
+ copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
return *this;
}
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index ec84a72454..928b1203cd 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -20,9 +20,15 @@
namespace llvm {
SourceMgr SrcMgr;
+unsigned ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
const Twine &Msg) {
+ // Count the total number of errors printed.
+ // This is used to exit with an error code if there were any errors.
+ if (Kind == SourceMgr::DK_Error)
+ ++ErrorsPrinted;
+
SMLoc NullLoc;
if (Loc.empty())
Loc = NullLoc;
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index e1cd623783..dc4167b305 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
if (MainFn(Out.os(), Records))
return 1;
+ if (ErrorsPrinted > 0) {
+ errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
+ return 1;
+ }
+
// Declare success.
Out.keep();
return 0;
-
- return 1;
}
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12e16..572617cc07 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
return TopOfFrameOffset - FrameRegPos;
}
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- // FIXME: Make generic? Really consider after upstreaming. This code is now
- // shared between PEI, ARM *and* here.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-save register for this purpose or allocate an extra spill slot.
bool BigStack =
- (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695dacb4..69bb80a485 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
SMLoc StartLoc, EndLoc;
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
union {
- struct {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
- } ImmWithLSL;
-
- struct {
- A64CC::CondCodes Code;
- } CondCode;
-
- struct {
- double Val;
- } FPImm;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
- } ShiftExtend;
-
- struct {
- const char *Data;
- unsigned Length;
- } SysReg;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
};
AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 0000000000..f0d4dbe2bf
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 5faf8c320c..80e5f37eb0 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46915eecf6..68380847a0 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -143,14 +143,12 @@ include "ARMSchedule.td"
// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 58c779830e..13ec208793 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
.addReg(ARM::PC)
- .addImm(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed001ea24a..ed8b9cd9a1 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0ca6450e2b..3b12408768 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
return FnSize;
}
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
/// estimateRSStackSizeLimit - Look at each instruction that references stack
/// frames and return the stack size limit beyond which some of these
/// instructions will require a scratch register during their expansion later.
@@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = estimateStackSize(MF);
+ unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
MRI.setPhysRegUsed(ARM::R4);
}
@@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// worth the effort and added fragility?
bool BigStack =
(RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF, this)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 514971f01e..40d2e8d265 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -3433,6 +3443,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -5621,6 +5672,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e11314d4fc..739300e4ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
+ const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
+ , Options(Options)
, TargetABI(ARM_ABI_APCS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
@@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() {
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ UnsafeFPMath = false;
}
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
@@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// configuration.
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8ce22e1de2..5b5ee6aeb8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,6 +26,7 @@
namespace llvm {
class GlobalValue;
class StringRef;
+class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
@@ -159,6 +160,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -175,6 +179,9 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
public:
enum {
isELF, isDarwin
@@ -189,7 +196,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 774521852a..b0f9e56db7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
+ Subtarget(TT, CPU, FS, Options),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
@@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
return true;
}
@@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
- if (getARMSubtarget().hasNEON())
+ if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
+ getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 01c04b48cf..1019b972e9 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
@@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return NEONFloatConversionTbl[Idx].Cost;
}
-
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
@@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return ARMIntegerConversionTbl[Idx].Cost;
}
-
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
@@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 6c678fdbd7..ed7b7ec9d2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SmallVector<unsigned, 8> Registers;
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
union {
- struct {
- ARMCC::CondCodes Val;
- } CC;
-
- struct {
- unsigned Val;
- } Cop;
-
- struct {
- unsigned Val;
- } CoprocOption;
-
- struct {
- unsigned Mask:4;
- } ITMask;
-
- struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
-
- struct {
- ARM_PROC::IFlags Val;
- } IFlags;
-
- struct {
- unsigned Val;
- } MMask;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- // A vector register list is a sequential list of 1 to 4 registers.
- struct {
- unsigned RegNum;
- unsigned Count;
- unsigned LaneIndex;
- bool isDoubleSpaced;
- } VectorList;
-
- struct {
- unsigned Val;
- } VectorIndex;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- /// Combined record for all forms of ARM address expressions.
- struct {
- unsigned BaseRegNum;
- // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
- // was specified.
- const MCConstantExpr *OffsetImm; // Offset immediate value
- unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
- ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
- unsigned ShiftImm; // shift for OffsetReg.
- unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (2, 4, 8, 16, or 32)
- unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
- } Memory;
-
- struct {
- unsigned RegNum;
- bool isAdd;
- ARM_AM::ShiftOpc ShiftTy;
- unsigned ShiftImm;
- } PostIdxReg;
-
- struct {
- bool isASR;
- unsigned Imm;
- } ShifterImm;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftReg;
- unsigned ShiftImm;
- } RegShiftedReg;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftImm;
- } RegShiftedImm;
- struct {
- unsigned Imm;
- } RotImm;
- struct {
- unsigned LSB;
- unsigned Width;
- } Bitfield;
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!tryParseRegisterWithWriteBack(Operands))
- return false;
- int Res = tryParseShiftRegister(Operands);
- if (Res == 0) // success
- return false;
- else if (Res == -1) // irrecoverable error
- return true;
- // If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" &&
- Parser.getTok().getString().equals_lower("apsr_nzcv")) {
- S = Parser.getTok().getLoc();
- Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
- return false;
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
}
// Fall though for the Identifier case that is not a register or a
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 586834cf73..b832508a08 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
+ A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 4c0f93c6cd..96a252e1b8 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1949,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const {
// the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
switch(Opcode) {
@@ -1958,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
- assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
@@ -1968,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STrid:
case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
- assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -2002,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMw_SUBr_MEM_V4 :
case Hexagon::MEMw_ANDr_MEM_V4 :
case Hexagon::MEMw_ORr_MEM_V4 :
- assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
return (0 <= Offset && Offset <= 255);
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
@@ -2017,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMh_SUBr_MEM_V4 :
case Hexagon::MEMh_ANDr_MEM_V4 :
case Hexagon::MEMh_ORr_MEM_V4 :
- assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
return (0 <= Offset && Offset <= 127);
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index ad495ff306..dda6e247ac 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- unsigned OffReg;
- const MCExpr *Off;
- } Mem;
-
- struct {
- const MCExpr *Val;
- } FslImm;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
};
MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index d0fd7dcec1..bd83afc1cc 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 99a2fac95c..497f3866c9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index ae2e55617d..e504011dfd 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index a077dd7351..c673f59b5e 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -50,7 +50,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ade6084752..c403f216b0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy
parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterClass);
+
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
@@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCInst> &Instructions);
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res);
@@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetAssignment();
+
bool parseDirectiveWord(unsigned Size, SMLoc L);
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getReg(int RC,int RegNo);
int getATReg();
+
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -211,25 +222,30 @@ private:
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- RegisterKind Kind;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- const MCExpr *Off;
- } Mem;
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
SMLoc StartLoc, EndLoc;
@@ -385,6 +401,56 @@ public:
};
}
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ Inst.setLoc(IDLoc);
+ if (MCID.mayLoad() || MCID.mayStore()) {
+ // Check the offset of memory operand, if it is a symbol
+ // reference or immediate we may have to expand instructions
+ for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ MCOperand &Op = Inst.getOperand(i);
+ if (Op.isImm()) {
+ int MemOffset = Op.getImm();
+ if (MemOffset < -32768 || MemOffset > 32767) {
+ // Offset can't exceed 16bit value
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ return false;
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() == MCExpr::SymbolRef){
+ const MCSymbolRefExpr *SR =
+ static_cast<const MCSymbolRefExpr*>(Expr);
+ if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+ // Expand symbol
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (needsExpansion(Inst))
+ expandInstruction(Inst, IDLoc, Instructions);
+ else
+ Instructions.push_back(Inst);
+
+ return false;
+}
+
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch(Inst.getOpcode()) {
@@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
}
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd) {
+ const MCSymbolRefExpr *SR;
+ MCInst TempInst;
+ unsigned ImmOffset,HiOffset,LoOffset;
+ const MCExpr *ExprOffset;
+ unsigned TmpRegNum;
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID,
+ getATReg());
+ // 1st operand is either source or dst register
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ unsigned RegOpNum = Inst.getOperand(0).getReg();
+ // 2nd operand is base register
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ unsigned BaseRegNum = Inst.getOperand(1).getReg();
+ // 3rd operand is either immediate or expression
+ if (isImmOpnd) {
+ assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+ ImmOffset = Inst.getOperand(2).getImm();
+ LoOffset = ImmOffset & 0x0000ffff;
+ HiOffset = (ImmOffset & 0xffff0000) >> 16;
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ if (LoOffset & 0x8000)
+ HiOffset++;
+ }
+ else
+ ExprOffset = Inst.getOperand(2).getExpr();
+ // All instructions will have the same location
+ TempInst.setLoc(IDLoc);
+ // 1st instruction in expansion is LUi. For load instruction we can use
+ // the dst register as a temporary if base and dst are different,
+ // but for stores we must use $at
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ TempInst.setOpcode(Mips::LUi);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ }
+ }
+ // Add the instruction to the list
+ Instructions.push_back(TempInst);
+ // and prepare TempInst for next instruction
+ TempInst.clear();
+ // which is add temp register to base
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ // and finaly, create original instruction with low part
+ // of offset and new base
+ TempInst.setOpcode(Inst.getOpcode());
+ TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ }
+ }
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
+ SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success: {
- if (needsExpansion(Inst)) {
- SmallVector<MCInst, 4> Instructions;
- expandInstruction(Inst, IDLoc, Instructions);
- for(unsigned i =0; i < Instructions.size(); i++){
- Out.EmitInstruction(Instructions[i]);
- }
- } else {
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
- }
+ if (processInstruction(Inst,IDLoc,Instructions))
+ return true;
+ for(unsigned i =0; i < Instructions.size(); i++)
+ Out.EmitInstruction(Instructions[i]);
return false;
}
case Match_MissingFeature:
@@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return false;
}
case AsmToken::Identifier:
+ // Look for the existing symbol, we should check if
+ // we need to assigne the propper RegisterKind
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+ return false;
+ //else drop to expression parsing
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
// Check the type of the expression
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
- // it's a constant, evaluate lo or hi value
- int Val = MCE->getValue();
+ // It's a constant, evaluate lo or hi value
if (Str == "lo") {
- Val = Val & 0xffff;
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
} else if (Str == "hi") {
+ int Val = MCE->getValue();
int LoSign = Val & 0x8000;
Val = (Val & 0xffff0000) >> 16;
- //lower part is treated as signed int, so if it is negative
- //we must add 1 to hi part to compensate
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate
if (LoSign)
Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
}
- Res = MCConstantExpr::Create(Val, getContext());
return false;
}
if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
- // it's a symbol, create symbolic expression from symbol
+ // It's a symbol, create symbolic expression from symbol
StringRef Symbol = MSRE->getSymbol().getName();
MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
@@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
switch(getLexer().getKind()) {
default:
return true;
+ case AsmToken::Identifier:
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!isMips64())
return MatchOperand_NoMatch;
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
}
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterKind) {
+
+ MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+ if (Sym) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *Expr;
+ if (Sym->isVariable())
+ Expr = Sym->getVariableValue();
+ else
+ return false;
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const StringRef DefSymbol = Ref->getSymbol().getName();
+ if (DefSymbol.startswith("$")) {
+ // Lookup for the register with corresponding name
+ int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ if (RegNum > -1) {
+ Parser.Lex();
+ MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ } else if (Expr->getKind() == MCExpr::Constant) {
+ Parser.Lex();
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+ MipsOperand *op = MipsOperand::CreateImm(Const,S,
+ Parser.getTok().getLoc());
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ return false;
+}
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
}
bool MipsAsmParser::parseSetNoAtDirective() {
- // line should look like:
+ // Line should look like:
// .set noat
// set at reg to 0
Options.setATReg(0);
// eat noat
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
} else if (getLexer().is(AsmToken::Equal)) {
- getParser().Lex(); //eat '='
+ getParser().Lex(); // eat '='
if (getLexer().isNot(AsmToken::Dollar)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // eat '$'
+ Parser.Lex(); // Eat '$'
const AsmToken &Reg = Parser.getTok();
if (Reg.is(AsmToken::Identifier)) {
AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() {
reportParseError("unexpected token in statement");
return false;
}
- getParser().Lex(); //eat reg
+ getParser().Lex(); // Eat reg
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
@@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() {
bool MipsAsmParser::parseSetReorderDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
}
+
+bool MipsAsmParser::parseSetAssignment() {
+ StringRef Name;
+ const MCExpr *Value;
+
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier after .set");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return reportParseError("unexpected token in .set directive");
+ Lex(); //eat comma
+
+ if (Parser.parseExpression(Value))
+ reportParseError("expected valid expression after comma");
+
+ // check if the Name already exists as a symbol
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ return reportParseError("symbol already defined");
+ }
+ Sym = getContext().GetOrCreateSymbol(Name);
+ Sym->setVariableValue(Value);
+
+ return false;
+}
bool MipsAsmParser::parseDirectiveSet() {
// get next token
@@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() {
// ignore this directive for now
Parser.eatToEndOfStatement();
return false;
+ } else {
+ // it is just an identifier, look for assignment
+ parseSetAssignment();
+ return false;
}
return true;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 58aa1be34d..cf8bb189e4 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Mips16FrameLowering.cpp
Mips16InstrInfo.cpp
+ Mips16ISelDAGToDAG.cpp
+ Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
@@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
+ MipsSEISelDAGToDAG.cpp
+ MipsSEISelLowering.cpp
MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 96f93a0789..e198a7c983 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
assert(MO.isExpr() &&
"getBranchTargetOpValue expects only expressions or immediates");
@@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm()>>2;
+
assert(MO.isExpr() &&
"getJumpTargetOpValue expects only expressions or an immediate");
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 0000000000..00b3449300
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 0000000000..baa85877d9
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 0000000000..23eb5375ac
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 0000000000..b23e2a1f37
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index a9e9c52716..629382965b 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
// Mips Address
//
def addr16 :
- ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
//
// Address operand
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 6dff548505..77b08cb11e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -45,270 +45,11 @@ using namespace llvm;
// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
// instructions for SelectionDAG operations.
//===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to MipsTargetMachine.
- MipsTargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
-
-public:
- explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
- // Include the pieces autogenerated from the target description.
- #include "MipsGenDAGISel.inc"
-
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const MipsTargetMachine &getTargetMachine() {
- return static_cast<const MipsTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const MipsInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
-
- SDValue getMips16SPAliasReg();
-
- void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
- std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
- EVT Ty, bool HasLo, bool HasHi);
-
- SDNode *Select(SDNode *N);
-
- // Complex Pattern.
- /// (reg + imm).
- bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Fall back on this function if all else fails.
- bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Match integer address pattern.
- bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Alias);
-
- // getImm - Return a target constant with the specified value.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
- }
-
- void ProcessFunctionAfterISel(MachineFunction &MF);
- bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
- void InitGlobalBaseReg(MachineFunction &MF);
- void InitMips16SPAliasReg(MachineFunction &MF);
-
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
-
- if (Subtarget.isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
- else if (Subtarget.inMips16Mode())
- RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- V2 = RegInfo.createVirtualRegister(RC);
-
- if (Subtarget.isABI_N64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (Subtarget.inMips16Mode()) {
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
- BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
- .addReg(V1).addReg(V2);
- return;
- }
-
- if (MF.getTarget().getRelocationModel() == Reloc::Static) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(Subtarget.isABI_O32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->mips16SPAliasRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
- BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
- .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
- const MachineInstr& MI) {
- unsigned DstReg = 0, ZeroReg = 0;
-
- // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
- if ((MI.getOpcode() == Mips::ADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO;
- } else if ((MI.getOpcode() == Mips::DADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO_64;
- }
-
- if (!DstReg)
- return false;
-
- // Replace uses with ZeroReg.
- for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E;) {
- MachineOperand &MO = U.getOperand();
- unsigned OpNo = U.getOperandNo();
- MachineInstr *MI = MO.getParent();
- ++U;
-
- // Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
- continue;
-
- MO.setReg(ZeroReg);
- }
-
- return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
- InitGlobalBaseReg(MF);
- InitMips16SPAliasReg(MF);
-
- MachineRegisterInfo *MRI = &MF.getRegInfo();
-
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
- ++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- ReplaceUsesWithZeroReg(MRI, *I);
-}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- ProcessFunctionAfterISel(MF);
+ processFunctionAfterISel(MF);
return Ret;
}
@@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
- unsigned Mips16SPAliasReg =
- MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- EVT ValTy = Addr.getValueType();
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
- }
-
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
-
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
- }
-
+ llvm_unreachable("Unimplemented function.");
return false;
}
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- return selectAddrRegImm(Addr, Base, Offset) ||
- selectAddrDefault(Addr, Base, Offset);
-}
-
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
- if (Parent) {
- switch (Parent->getOpcode()) {
- case ISD::LOAD: {
- LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- case ISD::STORE: {
- StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- }
- }
- AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
- return;
-
-}
-bool MipsDAGToDAGISel::SelectAddr16(
- SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
- SDValue &Alias) {
- EVT ValTy = Addr.getValueType();
-
- Alias = CurDAG->getTargetConstant(0, ValTy);
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- getMips16SPRefReg(Parent, Alias);
- return true;
- }
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- getMips16SPRefReg(Parent, Alias);
- }
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
- (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
- (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
- DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- ///
- // Instruction Selection not handled by the auto-generated
- // tablegen selection should be handled here.
- ///
- EVT NodeTy = Node->getValueType(0);
- unsigned MultOpc;
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
switch(Opcode) {
default: break;
- case ISD::SUBE:
- case ISD::ADDE: {
- bool inMips16Mode = Subtarget.inMips16Mode();
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- if (inMips16Mode)
- MOp = Mips::AdduRxRyRz16;
- else
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- if (inMips16Mode)
- MOp = Mips::SubuRxRyRz16;
- else
- MOp = Mips::SUBu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
-
- EVT VT = LHS.getValueType();
-
- unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
- unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
- SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
- SDValue(Carry,0), RHS);
-
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
-
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
- Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return NULL;
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
- dl, NodeTy, true, false).first;
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::MULHU ?
- Mips::MultuRxRy16 : Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
- }
-
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.hasMips64()) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO_64, MVT::i64);
- return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
- }
-
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
- break;
- }
-
- case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
- unsigned Size = CN->getValueSizeInBits(0);
-
- if (Size == 32)
- break;
-
- MipsAnalyzeImmediate AnalyzeImm;
- int64_t Imm = CN->getSExtValue();
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, false);
-
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
- DebugLoc DL = CN->getDebugLoc();
- SDNode *RegOpnd;
- SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
-
- // The first instruction can be a LUi which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == Mips::LUi64)
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
- else
- RegOpnd =
- CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
- ImmOpnd);
-
- // The remaining instructions in the sequence are handled here.
- for (++Inst; Inst != Seq.end(); ++Inst) {
- ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- SDValue(RegOpnd, 0), ImmOpnd);
- }
-
- return RegOpnd;
- }
-
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
@@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
"Unexpected unaligned loads/stores.");
break;
#endif
-
- case MipsISD::ThreadPointer: {
- EVT PtrVT = TLI.getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
-
- if (PtrVT == MVT::i32) {
- RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
- DestReg = Mips::V1;
- } else {
- RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
- DestReg = Mips::V1_64;
- }
-
- SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
- Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
- }
}
// Select the default instruction
@@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
- return new MipsDAGToDAGISel(TM);
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 0000000000..cf0f9c58aa
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c452dee8d0..4bf43f42e3 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-lower"
-#include <set>
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -42,26 +41,9 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
-static cl::opt<bool>
-Mips16HardFloat("mips16-hard-float", cl::NotHidden,
- cl::desc("MIPS: mips16 hard float enable."),
- cl::init(false));
-
-static cl::opt<bool> DontExpandCondPseudos16(
- "mips16-dont-expand-cond-pseudo",
- cl::init(false),
- cl::desc("Dont expand conditional move related "
- "pseudos for Mips 16"),
- cl::Hidden);
-
-
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
-static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
@@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
@@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
}
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
@@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
MachinePointerInfo::getGOT(), false, false, false, 0);
}
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
@@ -211,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-namespace {
- struct LTStr {
- bool operator()(const char *S1, const char *S2) const
- {
- return strcmp(S1, S2) < 0;
- }
- };
-
- std::set<const char*, LTStr> NoHelperNeeded;
-}
-
-void MipsTargetLowering::setMips16LibcallName
- (RTLIB::Libcall L, const char *Name) {
- setLibcallName(L, Name);
- NoHelperNeeded.insert(Name);
-}
-
-void MipsTargetLowering::setMips16HardFloatLibCalls() {
- setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
- setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
- setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
- setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
- setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
- setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
- setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
- setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
- setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
- setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
- setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
- setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
- setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
- setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
- setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
- setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
- setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
- setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
- setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
- setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
- setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
- setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
- setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
- setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
- setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
- setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
-}
-
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
IsO32(Subtarget->isABI_O32()) {
-
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- // Set up the register classes
- addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
- if (HasMips64)
- addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
- if (Subtarget->inMips16Mode()) {
- addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- if (Mips16HardFloat)
- setMips16HardFloatLibCalls();
- }
-
- if (Subtarget->hasDSP()) {
- MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
- // Expand all builtin opcodes.
- for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
-
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
- }
- }
-
- if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
- else
- addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
- }
- }
-
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
- else {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- }
- if (!Subtarget->inMips16Mode()) {
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- }
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -472,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
- }
-
setInsertFencesForAtomic(true);
if (!Subtarget->hasSEInReg()) {
@@ -523,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
- computeRegisterProperties();
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
@@ -531,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
MaxStoresPerMemcpy = 16;
}
-bool
-MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
- MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
- if (Subtarget->inMips16Mode())
- return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
- switch (SVT) {
- case MVT::i64:
- case MVT::i32:
- if (Fast)
- *Fast = true;
- return true;
- default:
- return false;
- }
+ return llvm::createMipsSETargetLowering(TM);
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -1099,347 +952,6 @@ static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) {
}
MachineBasicBlock *
-MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
- // $bb:
- // bposge32_pseudo $vr0
- // =>
- // $bb:
- // bposge32 $tbb
- // $fbb:
- // li $vr2, 0
- // b $sink
- // $tbb:
- // li $vr1, 1
- // $sink:
- // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
- MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
- DebugLoc DL = MI->getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, FBB);
- F->insert(It, TBB);
- F->insert(It, Sink);
-
- // Transfer the remainder of BB and its successor edges to Sink.
- Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- Sink->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add successors.
- BB->addSuccessor(FBB);
- BB->addSuccessor(TBB);
- FBB->addSuccessor(Sink);
- TBB->addSuccessor(Sink);
-
- // Insert the real bposge32 instruction to $BB.
- BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
- // Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
- .addReg(Mips::ZERO).addImm(0);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
- // Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
- BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
- .addReg(Mips::ZERO).addImm(1);
-
- // Insert phi function to $Sink.
- BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
- MI->getOperand(0).getReg())
- .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return Sink;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
- .addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSelT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addReg(MI->getOperand(4).getReg());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitSeliT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addImm(MI->getOperand(4).getImm());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock
- *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- unsigned regY = MI->getOperand(1).getReg();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- int64_t imm = MI->getOperand(1).getImm();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- unsigned CmpOpc;
- if (isUInt<8>(imm))
- CmpOpc = CmpiOpc;
- else if (isUInt<16>(imm))
- CmpOpc = CmpiXOpc;
- else
- llvm_unreachable("immediate field not usable");
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-static unsigned Mips16WhichOp8uOr16simm
- (unsigned shortOp, unsigned longOp, int64_t Imm) {
- if (isUInt<8>(Imm))
- return shortOp;
- else if (isInt<16>(Imm))
- return longOp;
- else
- llvm_unreachable("immediate field not usable");
-}
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- unsigned regY = MI->getOperand(2).getReg();
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- int64_t Imm = MI->getOperand(2).getImm();
- unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addImm(Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
@@ -1548,77 +1060,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
return emitAtomicCmpSwap(MI, BB, 8);
- case Mips::BPOSGE32_PSEUDO:
- return emitBPOSGE32(MI, BB);
- case Mips::SelBeqZ:
- return emitSel16(Mips::BeqzRxImm16, MI, BB);
- case Mips::SelBneZ:
- return emitSel16(Mips::BnezRxImm16, MI, BB);
- case Mips::SelTBteqZCmpi:
- return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBteqZSlti:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBteqZSltiu:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBtneZCmpi:
- return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBtneZSlti:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBtneZSltiu:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBteqZCmp:
- return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBteqZSlt:
- return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBteqZSltu:
- return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::SelTBtneZCmp:
- return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBtneZSlt:
- return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBtneZSltu:
- return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BteqzT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::BteqzT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BtnezT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BtnezT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::BtnezT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- break;
- case Mips::SltCCRxRy16:
- return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
- break;
- case Mips::SltiCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SltiuCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SltuCCRxRy16:
- return emitFEXT_CCRX16_ins
- (Mips::SltuRxRy16, MI, BB);
}
}
@@ -3074,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
}
-/// isEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
- if (!EnableMipsTailCalls)
- return false;
-
- // No tail call optimization for mips16.
- if (Subtarget->inMips16Mode())
- return false;
-
- // Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
- return false;
-
- // Return true if the callee's argument area is no larger than the
- // caller's.
- return NextStackOffset <= FI.getIncomingArgSize();
-}
-
SDValue
MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -3114,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
-//
-// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
-// cleaner way to do all of this but it will have to wait until the traditional
-// gcc mechanism is completed.
-//
-// For Pic, in order for Mips16 code to call Mips32 code which according the abi
-// have either arguments or returned values placed in floating point registers,
-// we use a set of helper functions. (This includes functions which return type
-// complex which on Mips are returned in a pair of floating point registers).
-//
-// This is an encoding that we inherited from gcc.
-// In Mips traditional O32, N32 ABI, floating point numbers are passed in
-// floating point argument registers 1,2 only when the first and optionally
-// the second arguments are float (sf) or double (df).
-// For Mips16 we are only concerned with the situations where floating point
-// arguments are being passed in floating point registers by the ABI, because
-// Mips16 mode code cannot execute floating point instructions to load those
-// values and hence helper functions are needed.
-// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
-// the helper function suffixs for these are:
-// 0, 1, 5, 9, 2, 6, 10
-// this suffix can then be calculated as follows:
-// for a given argument Arg:
-// Arg1x, Arg2x = 1 : Arg is sf
-// 2 : Arg is df
-// 0: Arg is neither sf or df
-// So this stub is the string for number Arg1x + Arg2x*4.
-// However not all numbers between 0 and 10 are possible, we check anyway and
-// assert if the impossible exists.
-//
-
-unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber
- (ArgListTy &Args) const {
- unsigned int resultNum = 0;
- if (Args.size() >= 1) {
- Type *t = Args[0].Ty;
- if (t->isFloatTy()) {
- resultNum = 1;
- }
- else if (t->isDoubleTy()) {
- resultNum = 2;
- }
- }
- if (resultNum) {
- if (Args.size() >=2) {
- Type *t = Args[1].Ty;
- if (t->isFloatTy()) {
- resultNum += 4;
- }
- else if (t->isDoubleTy()) {
- resultNum += 8;
- }
- }
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
- return resultNum;
-}
-//
-// prefixs are attached to stub numbers depending on the return type .
-// return type: float sf_
-// double df_
-// single complex sc_
-// double complext dc_
-// others NO PREFIX
-//
-//
-// The full name of a helper function is__mips16_call_stub +
-// return type dependent prefix + stub number
-//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
-// on something that we can enumerate in the source. Another possibility is
-// to have a python script to generate these mapping tables. This will do
-// for now. There are a whole series of helper function mapping arrays, one
-// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
-//
-// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
-// first parameter.
-//
-#define P_ "__mips16_call_stub_"
-#define MAX_STUB_NUMBER 10
-#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
-#define T P "0" , T1
-#define P P_
-static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
- {0, T1 };
-#undef P
-#define P P_ "sf_"
-static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "df_"
-static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "sc_"
-static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "dc_"
-static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#undef P_
-
-
-const char* MipsTargetLowering::
- getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
- const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
-#ifndef NDEBUG
- const unsigned int maxStubNum = 10;
- assert(stubNum <= maxStubNum);
- const bool validStubNum[maxStubNum+1] =
- {true, true, true, false, false, true, true, false, false, true, true};
- assert(validStubNum[stubNum]);
-#endif
- const char *result;
- if (RetTy->isFloatTy()) {
- result = sfMips16Helper[stubNum];
- }
- else if (RetTy ->isDoubleTy()) {
- result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
- // check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
- result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
- result = dcMips16Helper[stubNum];
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- if (stubNum == 0) {
- needHelper = false;
- return "";
- }
- result = vMips16Helper[stubNum];
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
- needHelper = true;
- return result;
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -3287,26 +2593,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
- const char* Mips16HelperFunction = 0;
- bool NeedMips16Helper = false;
-
- if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat &&
- Mips16HardFloat) {
- //
- // currently we don't have symbols tagged with the mips16 or mips32
- // qualifier so we will assume that we don't know what kind it is.
- // and generate the helper
- //
- bool LookupHelper = true;
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
- LookupHelper = false;
- }
- }
- if (LookupHelper) Mips16HelperFunction =
- getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
-
- }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
@@ -3466,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GlobalOrExternal = true;
}
- SDValue JumpTarget = Callee;
-
- // T9 should contain the address of the callee function if
- // -reloction-model=pic or it is an indirect call.
- if (IsPICCall || !GlobalOrExternal) {
- unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- unsigned V0Reg = Mips::V0;
- if (NeedMips16Helper) {
- RegsToPass.push_front(std::make_pair(V0Reg, Callee));
- JumpTarget = DAG.getExternalSymbol(
- Mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
- }
- else {
- RegsToPass.push_front(std::make_pair(T9Reg, Callee));
-
- if (!Subtarget->inMips16Mode())
- JumpTarget = SDValue();
- }
- }
-
- // Insert node "GP copy globalreg" before call to function.
- //
- // R_MIPS_CALL* operators (emitted when non-internal functions are called
- // in PIC mode) allow symbols to be resolved via lazy binding.
- // The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
- EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
- RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty)));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- SDValue InFlag;
-
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
- //
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops(1, Chain);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- if (JumpTarget.getNode())
- Ops.push_back(JumpTarget);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
if (IsTailCall)
return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -4124,14 +3347,14 @@ static bool isF128SoftLibCall(const char *CallSym) {
const char * const *End = LibCalls + array_lengthof(LibCalls);
// Check that LibCalls is sorted alphabetically.
-#ifndef NDEBUG
- LTStr Comp;
+ MipsTargetLowering::LTStr Comp;
+#ifndef NDEBUG
for (const char * const *I = LibCalls; I < End - 1; ++I)
assert(Comp(*I, *(I + 1)));
#endif
- return std::binary_search(LibCalls, End, CallSym, LTStr());
+ return std::binary_search(LibCalls, End, CallSym, Comp);
}
/// This function returns true if Ty is fp128 or i128 which was originally a
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index de925e16ab..71977d7f1c 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -152,9 +152,9 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -177,17 +177,34 @@ namespace llvm {
EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- private:
- void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
+
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- void setMips16HardFloatLibCalls();
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
- unsigned int
- getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
- const char *getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
@@ -283,6 +300,7 @@ namespace llvm {
bool HasMips64, IsN64, IsO32;
+ private:
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -321,9 +339,10 @@ namespace llvm {
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
@@ -377,10 +396,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
@@ -419,8 +434,6 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
- MachineBasicBlock *BB) const;
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
@@ -430,29 +443,11 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const;
-
};
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 25b5d240be..022987f12e 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -256,6 +256,7 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem64 : Operand<i64> {
@@ -263,18 +264,21 @@ def mem64 : Operand<i64> {
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea_64 : Operand<i64> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
// size operand of ext instruction
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 13b2a6ac17..3c210e71bb 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,7 +58,8 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f93dd86c17..6d76e8a54a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,6 +18,10 @@ def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
}
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
// We have banks of 32 registers each.
class MipsReg<bits<16> Enc, string n> : Register<n> {
let HWEncoding = Enc;
@@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S1)>;
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
// Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>,
+ Unallocatable;
def CPURegsAsmOperand : AsmOperandClass {
let Name = "CPURegsAsm";
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 0000000000..e22c3c8b3f
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,460 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 0000000000..6137ab040b
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 0000000000..287e2ede0e
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,197 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 0000000000..04a28ce54c
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,46 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index fd930f0335..33363580ab 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c4928c21eb..7e5f192264 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
const DataLayout DL; // Calculates type size & alignment
OwningPtr<const MipsInstrInfo> InstrInfo;
OwningPtr<const MipsFrameLowering> FrameLowering;
- MipsTargetLowering TLInfo;
+ OwningPtr<const MipsTargetLowering> TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
@@ -63,7 +63,7 @@ public:
}
virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ return TLInfo.get();
}
virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 7917f7736e..709daa4e4b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 4a420929d0..38a7420d97 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 972e13852e..b0680fbb8c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -17,6 +17,10 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index b98cc489f6..81a54d7015 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
(unsigned) PPC::EXTSW_32_64 :
(unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
@@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index caeb1796f7..2a680661d3 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -136,3 +136,8 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
F27, F28, F29, F30, F31, CR2, CR3, CR4,
V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0a396e6693..0108f7256e 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -281,6 +299,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
@@ -311,13 +354,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get operating system
@@ -780,7 +822,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
@@ -822,30 +864,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
-
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -853,6 +880,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -1031,6 +1059,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
}
bool
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d09e47fafd..6f5f9368c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -32,7 +32,9 @@ public:
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -41,10 +43,13 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -144,6 +149,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 17bea8a6a6..90a3a5c6a4 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -164,6 +164,12 @@ namespace {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13cb358fc0..c7d454692e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
@@ -208,6 +212,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -564,6 +576,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -1028,7 +1042,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1077,7 +1091,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1140,7 +1154,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1180,13 +1194,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
} else
return false;
@@ -1205,6 +1221,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -4555,6 +4575,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -5551,6 +5586,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
@@ -5744,7 +5782,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5863,9 +5901,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -6133,7 +6400,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6622,6 +6889,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6806,13 +7076,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
@@ -6851,6 +7124,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3931384d89..1b0427e021 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -119,6 +120,12 @@ namespace llvm {
/// are undefined.
MFCR,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
@@ -321,6 +328,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -395,6 +403,12 @@ namespace llvm {
MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -449,6 +463,10 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -604,6 +622,9 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 01201304f7..d2b1d6d392 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,12 +30,12 @@ def symbolLo64 : Operand<i64> {
let EncoderMethod = "getLO16Encoding";
}
def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
+ let MIOperandInfo = (ops i64imm:$imm);
}
def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64
let PrintMethod = "printMemRegImm";
let EncoderMethod = "getMemRIXEncoding";
- let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo64:$off, ptr_rc_nor0:$reg);
}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
@@ -273,6 +273,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -393,15 +406,16 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
[(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, s16imm64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>;
+def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA,
+ imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
@@ -536,7 +550,7 @@ def RLWINM8 : MForm_2<21,
[]>;
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
} // End FXU Operations.
@@ -555,7 +569,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set G8RC:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
@@ -567,25 +582,26 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
- ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLHAU,
- []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
@@ -612,30 +628,30 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
// Update forms.
let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -648,7 +664,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+ [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
"ld $rD, $src", LdStLD,
[]>, isPPC64;
@@ -684,12 +700,12 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -798,82 +814,11 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
[(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
-}
-
-let PPC970_Unit = 2 in {
-
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
- [(set ptr_rc:$ea_res,
- (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked, isPPC64;
-
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
"std $rT, $dst", LdStSTD,
@@ -884,6 +829,60 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
PPC970_DGroup_Cracked;
}
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STBU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STHU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STWU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(aligned4pre_store G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STDU G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+
+def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STBUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STHUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STWUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STDUX G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
//===----------------------------------------------------------------------===//
@@ -979,3 +978,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
(ADDIS8 G8RC:$in, tjumptable:$g)>;
def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS8 G8RC:$in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst),
+ (STDX G8RC:$rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 0cf28ae4b5..0ed7ff2cc4 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
(VRFIZ VRRC:$vA)>;
def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
(VRFIN VRRC:$vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21..16dbee5d29 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
let CR = 0;
}
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a0517a80a9..cf39386c67 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,11 +33,6 @@
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
@@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +522,20 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +552,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,7 +578,8 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
@@ -642,37 +605,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +638,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +667,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea43..5d4ae915b4 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 460e94342d..84cdb1f39d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -158,6 +158,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -278,6 +286,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -314,9 +354,6 @@ def s16imm : Operand<i32> {
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
}
-def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
- let PrintMethod = "printS16X4ImmOperand";
-}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
@@ -344,21 +381,30 @@ def crbitm: Operand<i8> {
let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$offreg, ptr_rc:$ptrreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
@@ -372,6 +418,10 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
@@ -473,6 +523,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
}
+// The direct BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
+ }
+}
+
// Darwin ABI Calls.
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
@@ -551,6 +609,23 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -708,69 +783,69 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -835,44 +910,39 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STBU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STHU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store F4RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STFSU F4RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store F8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STFDU F8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
// Indexed (r+r) Stores.
-//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
@@ -887,48 +957,6 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
- (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
- (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
@@ -950,6 +978,44 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
[(store F8RC:$frS, xaddr:$dst)]>;
}
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STBUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STHUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STWUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STFSUX F4RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STFDUX F8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -959,12 +1025,12 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, s16imm:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>;
+def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
@@ -974,12 +1040,13 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA,
+ imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
- [(set GPRC:$rD, (add GPRC:$rA,
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
@@ -1213,6 +1280,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1435,7 +1525,7 @@ let Uses = [RM] in {
let PPC970_Unit = 1 in { // FXU Operations.
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 045b375dd8..b1636a20b8 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
@@ -78,6 +85,8 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
LRStoreRequired(false),
MinReservedArea(0),
@@ -109,6 +118,12 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index df245cc655..75e20e4a26 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,26 +46,8 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
@@ -89,17 +71,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
}
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
-}
-
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -123,11 +105,33 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ return CSR_NoRegs_RegMask;
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
Reserved.set(PPC::R0);
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
@@ -139,35 +143,22 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
- // Note that this is over conservative, as it also prevents allocation of R31
- // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
- Reserved.set(PPC::R31);
Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- Reserved.set(PPC::X31);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -271,28 +236,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -302,17 +255,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -369,8 +315,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -412,8 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -438,6 +382,65 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ unsigned Reg = PPC::R0;
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ unsigned Reg = PPC::R0;
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
@@ -498,15 +501,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex, SPAdj, RS);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -529,7 +536,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
isIXAddr = true;
break;
}
-
+
+ bool noImmForm = false;
+ switch (OpC) {
+ case PPC::LVEBX:
+ case PPC::LVEHX:
+ case PPC::LVEWX:
+ case PPC::LVX:
+ case PPC::LVXL:
+ case PPC::LVSL:
+ case PPC::LVSR:
+ case PPC::STVEBX:
+ case PPC::STVEHX:
+ case PPC::STVEWX:
+ case PPC::STVX:
+ case PPC::STVXL:
+ noImmForm = true;
+ break;
+ }
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -553,7 +578,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -563,13 +589,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC);
// Insert a set of rA with the full offset value before the ld, st, or add
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
@@ -584,7 +606,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9840666242..ba0fb48b85 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -44,16 +44,22 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
void lowerDynamicAlloc(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const;
@@ -61,6 +67,11 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 8ee9b1ec9f..55d36bbfc9 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -86,6 +86,14 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "r0">;
+def ZERO8 : GP8<ZERO, "r0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
def CR0GT : CRBIT< 1, "1">;
@@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, LR)>;
+ R31, R0, R1, LR, FP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, LR8)>;
+ X31, X13, X0, X1, LR8, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index a59c775272..e740348717 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 0c7880d232..fa8f62de9c 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
- if (i == 2)
- break;
SDValue CstOffset;
- if (!Operand.getValueType().isVector() &&
- SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
}
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
}
- break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index be3318a0b4..08650980fd 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index efe721c00c..bf9569e659 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -53,6 +53,9 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index c5fa3347dc..8c50d54cf5 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1979,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 40aec833ea..b07a585f08 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -14,5 +14,4 @@ using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo() {
- memset(Outputs, 0, sizeof(Outputs));
}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 4b901f4bbc..13a46b8eec 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -26,7 +26,6 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
std::vector<unsigned> IndirectRegs;
- SDNode *Outputs[16];
};
} // End llvm namespace
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 19baef94c7..9074364bb3 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
@@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
-class ConstPairs {
-private:
- unsigned XYPair;
- unsigned ZWPair;
-public:
- ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
- for (unsigned i = 0; i < 3; i++) {
- unsigned ReadConstChan = ReadConst[i] & 3;
- unsigned ReadConstIndex = ReadConst[i] & (~3);
- if (ReadConstChan < 2) {
- if (!XYPair) {
- XYPair = ReadConstIndex;
- }
- } else {
- if (!ZWPair) {
- ZWPair = ReadConstIndex;
- }
- }
- }
- }
-
- bool isCompatibleWith(const ConstPairs& CP) const {
- return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
- (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
- }
-};
-
-static
-const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
- unsigned ReadConsts[3] = {0, 0, 0};
- R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
- if (!TII->isALUInstr(MI.getOpcode()))
- return ConstPairs(ReadConsts);
-
- for (unsigned i = 0; i < 3; i++) {
- int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
- if (SrcIdx < 0)
- break;
- if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
- ReadConsts[i] =MI.getOperand(
- TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
- }
- return ConstPairs(ReadConsts);
-}
-
-bool
-R600SchedStrategy::isBundleable(const MachineInstr& MI) {
- const ConstPairs &MIPair = getPairs(TII, MI);
- for (unsigned i = 0; i < 4; i++) {
- if (!InstructionsGroupCandidate[i])
- continue;
- const ConstPairs &IGPair = getPairs(TII,
- *InstructionsGroupCandidate[i]->getInstr());
- if (!IGPair.isCompatibleWith(MIPair))
- return false;
- }
- return true;
-}
-
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
if (Q.empty())
return NULL;
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
It != E; ++It) {
SUnit *SU = *It;
- if (isBundleable(*SU->getInstr())) {
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
Q.erase(It);
return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
}
}
return NULL;
@@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+ InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
- InstructionsGroupCandidate[Chan] = SU;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
}
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index d74ff1e076..3d0367fd8e 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -98,7 +98,7 @@ public:
virtual void releaseBottomNode(SUnit *SU);
private:
- SUnit *InstructionsGroupCandidate[4];
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
@@ -112,7 +112,6 @@ private:
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
SUnit* pickOther(int QID);
- bool isBundleable(const MachineInstr& MI);
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
};
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 063f5faa63..93f8c38a3a 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
@@ -229,6 +234,10 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 0411565ee3..d65622526f 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -48,6 +48,7 @@ public:
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 67fbdf7be1..98bd3dbb66 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) {
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
-bool SIInsertWaits::unorderedDefines(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- if (TSFlags & SIInstrFlags::LGKM_CNT)
- return true;
-
- if (TSFlags & SIInstrFlags::EXP_CNT)
- return ExpInstrTypesSeen == 3;
-
- return false;
-}
-
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
- bool UnorderedDefines = unorderedDefines(MI);
Counters Result = ZeroCounts;
// For each register affected by this
@@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
- if (UnorderedDefines)
- increaseCounters(Result, DefinedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
}
if (Op.isUse())
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 260c651dd4..2f10c388ad 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
opName#" $dst, $src0", pattern
>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
- (ins SReg_64:$sbase, i32imm:$offset),
+ (ins baseClass:$sbase, i32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
def _SGPR : SMRD <
op, 0, (outs dstClass:$dst),
- (ins SReg_64:$sbase, SReg_32:$soff),
+ (ins baseClass:$sbase, SReg_32:$soff),
asm#" $dst, $sbase, $soff", []
>;
}
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 0ab9e4ec0c..05b04a92b3 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT
//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
@@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
-//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
-//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
} // mayLoad = 1
@@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
let isCommutable = 1 in {
@@ -1133,6 +1149,31 @@ def SI_KILL : InstSI <
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
@@ -1241,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>;
defm : SamplePatterns<VReg_256, v8i32>;
defm : SamplePatterns<VReg_512, v16i32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1389,6 +1491,24 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
>;
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
@@ -1426,4 +1546,48 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 7c23d1706b..33bb8157b4 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -16,8 +16,7 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- /* XXX: We may need a seperate intrinsic here for loading integer values */
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
def int_SI_wqm : Intrinsic <[], [], []>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index b215aa22db..9a027e77eb 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -66,6 +66,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
+ const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -84,9 +85,14 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveKill = false;
@@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_BRANCH:
Branch(MI);
break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
}
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 3dcad506d2..4f14931a9c 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
@@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
-def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
-def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 357879bf6c..b53a1ed095 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index c6cafe59eb..ee88ce77c0 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -610,6 +610,9 @@ struct StringComparator {
// Provided for compatibility with MSVC's debug mode.
bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
};
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 7d8b49cdf2..e7282519d5 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
delete AsmInfo;
}
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ee5c2b2bfd..d5568e08d3 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -62,6 +62,9 @@ private:
X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+ SMLoc SizeDirLoc, unsigned Size);
+
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
SmallString<64> &Err);
@@ -170,30 +173,33 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc OffsetOfLoc;
bool AddressOf;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNo;
- } Reg;
-
- struct {
- const MCExpr *Val;
- bool NeedAsmRewrite;
- } Imm;
-
- struct {
- unsigned SegReg;
- const MCExpr *Disp;
- unsigned BaseReg;
- unsigned IndexReg;
- unsigned Scale;
- unsigned Size;
- bool NeedSizeDir;
- } Mem;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -231,11 +237,6 @@ struct X86Operand : public MCParsedAsmOperand {
return Imm.Val;
}
- bool needAsmRewrite() const {
- assert(Kind == Immediate && "Invalid access!");
- return Imm.NeedAsmRewrite;
- }
-
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
@@ -332,11 +333,6 @@ struct X86Operand : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
- unsigned getMemSize() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.Size;
- }
-
bool isOffsetOf() const {
return OffsetOfLoc.getPointer();
}
@@ -345,11 +341,6 @@ struct X86Operand : public MCParsedAsmOperand {
return AddressOf;
}
- bool needSizeDirective() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.NeedSizeDir;
- }
-
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -485,17 +476,15 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
- bool NeedRewrite = true){
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
- Res->Imm.NeedAsmRewrite = NeedRewrite;
return Res;
}
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -503,7 +492,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -512,7 +500,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -527,7 +515,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -890,6 +877,45 @@ public:
}
};
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+ SMLoc End, SMLoc SizeDirLoc,
+ unsigned Size) {
+ bool NeedSizeDir = false;
+ bool IsVarDecl = false;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size) {
+ Size = tType * 8; // Size is in terms of bits in this context.
+ NeedSizeDir = Size > 0;
+ }
+ }
+
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
+ if (NeedSizeDir)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+ /*Len*/0, Size));
+
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size);
+}
+
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
const AsmToken &Tok = Parser.getTok();
@@ -914,7 +940,12 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
// Adjust the EndLoc due to the ']'.
End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
Parser.Lex();
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ // We want the size directive before the '['.
+ SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+ return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
}
}
@@ -1036,40 +1067,9 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
if (getParser().parseExpression(Disp, End))
return 0;
- bool NeedSizeDir = false;
- bool IsVarDecl = false;
- if (isParsingInlineAsm()) {
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- unsigned tLength, tSize, tType;
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
- tSize, tType, IsVarDecl);
- if (!Size)
- Size = tType * 8; // Size is in terms of bits in this context.
- NeedSizeDir = Size > 0;
- }
- }
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (!IsVarDecl) {
- unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
- }
-
- // When parsing inline assembly we set the base register to a non-zero value
- // as we don't know the actual value at this time. This is necessary to
- // get the matching correct in some cases.
- return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
- /*Scale*/1, Start, End, Size, NeedSizeDir);
- }
+ return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
}
/// Parse the '.' operator.
@@ -1197,7 +1197,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
- return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+ return X86Operand::CreateImm(Imm, Start, End);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
@@ -1221,6 +1221,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
if (!getParser().parseExpression(Val, End)) {
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
return X86Operand::CreateImm(Val, Start, End);
}
}
@@ -1734,242 +1736,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
- switch (Inst.getOpcode()) {
- default: return false;
- case X86::AND16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204ae75..5fbefaec5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// dst(ModR/M), src1(ModR/M)
// dst(ModR/M), src1(ModR/M), imm8
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
- CurOp += 2;
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMDestMem:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ece38aa346..2518e02e2a 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
const MCInstrDesc *Desc) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
- CurOp += 2;
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b5c3270065..85155f55e0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (Subtarget->isTargetWindows())
+ return false;
+
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 960870dc60..23cfd6d72f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
- if (Subtarget->hasInt256()) {
- setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v4i32, Legal);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Legal);
- } else {
- setOperationAction(ISD::SRL, MVT::v2i64, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Custom);
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- }
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
@@ -1186,14 +1179,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- setOperationAction(ISD::SRL, MVT::v4i64, Legal);
- setOperationAction(ISD::SRL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v4i64, Legal);
- setOperationAction(ISD::SHL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SRA, MVT::v8i32, Legal);
-
setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
@@ -1210,15 +1195,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
+ }
- setOperationAction(ISD::SRL, MVT::v4i64, Custom);
- setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
- setOperationAction(ISD::SHL, MVT::v4i64, Custom);
- setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
- setOperationAction(ISD::SRA, MVT::v8i32, Custom);
- }
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -7834,7 +7821,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Chain.getValue(1));
}
- if (Subtarget->isTargetWindows()) {
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7854,18 +7841,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
- // %gs:0x58 (64-bit).
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58)
- : DAG.getExternalSymbol("_tls_array",
- getPointerTy()),
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
@@ -11490,16 +11478,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
-
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- if (!Subtarget->hasSSE2())
- return SDValue();
-
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
@@ -11610,6 +11595,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+ uint64_t ShiftAmt = 0;
+ for (unsigned i = 0; i != Ratio; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+ }
+ // Check remaining shift amounts.
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ SDValue BaseShAmt;
+ EVT EltVT = VT.getVectorElementType();
+
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i, j;
+ for (i = 0; i != NumElts; ++i) {
+ if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ break;
+ }
+ for (j = i; j != NumElts; ++j) {
+ SDValue Arg = Amt.getOperand(j);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != Amt.getOperand(i))
+ break;
+ }
+ if (i != NumElts && j == NumElts)
+ BaseShAmt = Amt.getOperand(i);
+ } else {
+ if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Amt = Amt.getOperand(0);
+ if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+ SDValue InVec = Amt.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx =
+ cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ if (BaseShAmt.getNode()) {
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRA:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ std::vector<SDValue> Vals(Ratio);
+ for (unsigned i = 0; i != Ratio; ++i)
+ Vals[i] = Amt.getOperand(i);
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned j = 0; j != Ratio; ++j)
+ if (Vals[j] != Amt.getOperand(i + j))
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ SDValue V;
+
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ V = LowerScalarVariableShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+ if (Subtarget->hasInt256()) {
+ if (Op.getOpcode() == ISD::SRL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return Op;
+ }
+
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
@@ -11826,8 +12029,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
- Op.getOperand(0), ShAmt, DAG);
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
}
@@ -12262,7 +12480,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UINT_TO_FP: {
- if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -15918,124 +16137,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
- // On X86 with SSE2 support, we can transform this to a vector shift if
- // all elements are shifted by the same amount. We can't do this in legalize
- // because the a constant vector is typically transformed to a constant pool
- // so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
- return SDValue();
-
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasInt256() ||
- (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
- return SDValue();
-
- SDValue ShAmtOp = N->getOperand(1);
- EVT EltVT = VT.getVectorElementType();
- DebugLoc DL = N->getDebugLoc();
- SDValue BaseShAmt = SDValue();
- if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- // Handle the case where the build_vector is all undef
- // FIXME: Should DAG allow this?
- if (i == NumElts)
- return SDValue();
-
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- if (Arg != BaseShAmt) {
- return SDValue();
- }
- }
- } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- SDValue InVec = ShAmtOp.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = InVec.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
- if (C->getZExtValue() == SplatIdx)
- BaseShAmt = InVec.getOperand(1);
- }
- }
- if (BaseShAmt.getNode() == 0) {
- // Don't create instructions with illegal types after legalize
- // types has run.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
- !DCI.isBeforeLegalize())
- return SDValue();
-
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
- }
- } else
- return SDValue();
-
- // The shift amount is an i32.
- if (EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
- // The shift amount is identical so we can do a vector shift.
- SDValue ValOp = N->getOperand(0);
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Unknown shift opcode!");
- case ISD::SHL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRA:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- }
+ return SDValue();
}
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
@@ -16346,13 +16453,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
// Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
- if (Mask.getOpcode() != X86ISD::VSRAI)
- return SDValue();
-
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(1);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ unsigned SraAmt = ~0;
+ if (Mask.getOpcode() == ISD::SRA) {
+ SDValue Amt = Mask.getOperand(1);
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+ SraAmt = C->getZExtValue();
+ }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+ SDValue SraC = Mask.getOperand(1);
+ SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ }
if ((SraAmt + 1) != EltBits)
return SDValue();
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d86a4065a7..19bdb96f05 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
@@ -36,41 +36,52 @@ let isReMaterializable = 1 in
def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
// Extra precision multiplication
// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
-
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
-
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ "mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
- IIC_MUL32_REG>;
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ "mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
- IIC_MUL64>;
-
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
-
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
- IIC_IMUL8>; // AL,AH = AL*GR8
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
- IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
- IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
- IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
- // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
} // neverHasSideEffects
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
TB;
-}
+} // isCommutable, SchedRW
// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(X86smul_flag GR64:$src1, (load addr:$src2)))],
IIC_IMUL64_RM>,
TB;
+} // SchedRW
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))],
IIC_IMUL64_RRI>;
-
+} // SchedRW
// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (load addr:$src1),
i64immSExt8:$src2))],
IIC_IMUL64_RMI>;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", [], IIC_DIV8_MEM>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", [], IIC_DIV16>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", [], IIC_DIV32>;
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", [], IIC_DIV64>;
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", [], IIC_IDIV8>;
@@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", [], IIC_IDIV8>;
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", [], IIC_IDIV32>;
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", [], IIC_IDIV64>;
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
} // hasSideEffects = 0
@@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
@@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
(implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
} // Defs = [EFLAGS]
// Note: NOT does not set EFLAGS!
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
[(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
@@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
@@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -532,7 +575,7 @@ let CodeSize = 2 in {
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
@@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []> {
+ mnemonic, operands, []>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
@@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
// register class is constrained to GR8_NOREX.
let isPseudo = 1 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
- "", [], IIC_BIN_NONMEM>;
+ "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
- IIC_BIN_NONMEM>;
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in {
// ADCX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize;
@@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adcx{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8, OpSize;
@@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
// ADOX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS;
@@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adox{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8XS;
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 8f2d0a1aae..a967a4da5c 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,7 +16,7 @@
// SetCC instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- isCommutable = 1 in {
+ isCommutable = 1, SchedRW = [WriteALU] in {
def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
IIC_CMOV32_RR>, TB;
}
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteALULd, ReadAfterLd] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
- IIC_SET_R>, TB;
+ IIC_SET_R>, TB, Sched<[WriteALU]>;
def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)],
- IIC_SET_M>, TB;
+ IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
} // Uses = [EFLAGS]
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 734e5982b2..2b27bc5bc5 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)], IIC_RET>;
+ [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)], IIC_RET>;
+ [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+ Sched<[WriteZero]>;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)],
- IIC_ALU_NONMEM>;
+ IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
@@ -990,9 +991,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
// Direct PC relative function call for small code model. 32-bit displacement
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded2..5ef0c3c13b 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,24 +112,28 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+ Sched<[WriteALU]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+ Sched<[WriteALULd]>;
// movzbq and movzwq encodings for the disassembler
def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
// Use movzbl instead of movzbq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
- IIC_MOVZX>, TB;
+ IIC_MOVZX>, TB, Sched<[WriteALULd]>;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+ Sched<[WriteALU]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
- IIC_MOVZX>;
+ IIC_MOVZX>, Sched<[WriteALULd]>;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d989ec7bb0..39165e24a8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -786,7 +786,7 @@ def LEAVE64 : I<0xC9, RawFrm,
//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG16>, OpSize;
def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -803,9 +803,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize;
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -832,29 +832,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
Requires<[In32BitMode]>;
-}
+} // mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP64r : I<0x58, AddRegFrm,
(outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>;
def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>;
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+ SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -871,17 +872,18 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
Requires<[In64BitMode]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad=1, neverHasSideEffects=1 in {
+ mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore=1, neverHasSideEffects=1 in {
+ mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
-let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -890,7 +892,7 @@ def BSWAP32r : I<0xC8, AddRegFrm,
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
// Bit scan instructions.
let Defs = [EFLAGS] in {
@@ -976,7 +978,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -987,6 +989,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
@@ -1004,7 +1007,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
}
+} // SchedRW
+let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1017,9 +1022,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1038,6 +1045,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
// FIXME: These definitions are utterly broken
// Just leave them commented out for now because they're useless outside
@@ -1055,7 +1063,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
*/
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1066,7 +1074,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1081,6 +1089,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
[(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
}
+let SchedRW = [WriteStore] in {
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1093,6 +1102,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1101,34 +1111,37 @@ let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
+ Sched<[WriteMove]>;
let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteStore]>;
let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteLoad]>;
}
// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1139,13 +1152,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
// only for now.
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
@@ -1166,6 +1180,7 @@ let mayLoad = 1, hasSideEffects = 0 in {
>, TB;
}
+let SchedRW = [WriteALU] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1178,10 +1193,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
IIC_BT_RI>, TB;
+} // SchedRW
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1194,8 +1211,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1203,8 +1222,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1214,6 +1234,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1221,8 +1242,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1232,6 +1254,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1239,6 +1262,7 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
let mayLoad = 1, mayStore = 1 in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
@@ -1250,6 +1274,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1257,8 +1282,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1268,6 +1294,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1275,8 +1302,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1286,6 +1314,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1293,8 +1322,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1483,6 +1513,7 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
// Table lookup instructions
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1512,7 +1543,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[In32BitMode]>;
+} // SchedRW
+let SchedRW = [WriteSystem] in {
// Check Array Index Against Bounds
def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1528,11 +1561,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[In32BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1545,6 +1580,8 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
T8;
+ }
+ let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1557,6 +1594,7 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
T8;
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0979752757..4d43ee1f15 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
}
class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
// scalar
+let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<
IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
>;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
def SSE_ALU_F64S : OpndItins<
IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
>;
+}
def SSE_ALU_ITINS_S : SizeItins<
SSE_ALU_F32S, SSE_ALU_F64S
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32S : OpndItins<
IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
>;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
def SSE_MUL_F64S : OpndItins<
IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
>;
+}
def SSE_MUL_ITINS_S : SizeItins<
SSE_MUL_F32S, SSE_MUL_F64S
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32S : OpndItins<
IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
>;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
def SSE_DIV_F64S : OpndItins<
IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
>;
+}
def SSE_DIV_ITINS_S : SizeItins<
SSE_DIV_F32S, SSE_DIV_F64S
>;
// parallel
+let Sched = WriteFAdd in {
def SSE_ALU_F32P : OpndItins<
IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
>;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
def SSE_ALU_F64P : OpndItins<
IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
>;
+}
def SSE_ALU_ITINS_P : SizeItins<
SSE_ALU_F32P, SSE_ALU_F64P
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32P : OpndItins<
IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
>;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
def SSE_MUL_F64P : OpndItins<
IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
>;
+}
def SSE_MUL_ITINS_P : SizeItins<
SSE_MUL_F32P, SSE_MUL_F64P
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32P : OpndItins<
IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
>;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
def SSE_DIV_F64P : OpndItins<
IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
>;
+}
def SSE_DIV_ITINS_P : SizeItins<
SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
>;
+let Sched = WriteVecALU in {
def SSE_INTALU_ITINS_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
def SSE_INTALUQ_ITINS_P : OpndItins<
IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
>;
+}
+let Sched = WriteVecIMul in
def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], itins.rr>;
+ RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, mem_cpat:$src2))], itins.rm>;
+ RC:$src1, mem_cpat:$src2))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, IIC_DEFAULT, d>;
+ pat_rr, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogic]>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, IIC_DEFAULT, d>;
+ pat_rm, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
}
//===----------------------------------------------------------------------===//
@@ -345,7 +370,7 @@ let Predicates = [HasAVX] in {
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
@@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX] in {
+ isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
@@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
@@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
// For the disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
- [], IIC_SSE_MOV_S_RR>;
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- VEX, VEX_LIG;
+ VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
@@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
@@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
TB, OpSize;
+let SchedRW = [WriteStore] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
+let SchedRW = [WriteStore] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in {
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>, VEX;
@@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
let isCodeGenOnly = 1 in {
def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
@@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
[(set VR128:$dst,
(psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- itin, SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- itin, SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
@@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// Shuffle with VMOVLPS
@@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// VMOVHPS patterns
@@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
let Predicates = [HasAVX] in {
@@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_PS : OpndItins<
IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_Scalar : OpndItins<
IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_32 : OpndItins<
IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_64 : OpndItins<
IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
@@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
OpndItins itins> {
let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>;
+ [], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>;
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2F]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2FLd, ReadAfterLd]>;
} // neverHasSideEffects = 1
}
@@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
@@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))],
- IIC_SSE_CVT_Scalar_RR>;
+ IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert scalar single to scalar double
@@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
- XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f64 (fextend FR32:$src)),
@@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[UseSSE2]>;
+ Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
// Convert Packed Double FP to Packed DW Integers
@@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- VEX;
+ VEX, Sched<[WriteCvtF2I]>;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtF2I]>;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
@@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+ Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in {
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>,
+ Sched<[WriteCvtF2ILd]>;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB;
+ IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB;
+ IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in {
let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX;
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+ Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2F]>;
}
let neverHasSideEffects = 1, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
@@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
// AVX 256-bit register conversion intrinsics
@@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RR>;
+ IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RM>;
+ IIC_SSE_ALU_F32S_RM>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
- itins.rr>;
+ itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
@@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
- IIC_SSE_COMIS_RR, d>;
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))],
- IIC_SSE_COMIS_RM, d>;
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
@@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W;
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in {
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L;
+ SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L;
+ OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
>;
@@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
>;
+}
+let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
>;
@@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins<
def SSE_RCPS : OpndItins<
IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
>;
+}
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
@@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
let Constraints = "$src1 = $dst" in {
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
+ [], itins.rr>, Sched<[itins.Sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int VR256:$src))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR64:$src1, FR64:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1,f64mem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
// Square root.
@@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
-let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
IIC_SSE_MOVNT>;
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
@@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
IIC_SSE_PREFETCH>, TB;
}
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
TB, Requires<[HasSSE2]>;
+} // SchedRW
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
@@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
}
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecIMul in
def SSE_PMADD : OpndItins<
IIC_SSE_PMADD, IIC_SSE_PMADD
>;
@@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
@@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
- itins.rr>;
+ itins.rr>, Sched<[WriteVecShift]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
@@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>;
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX;
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX2] in {
@@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L;
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [UseSSE2] in {
@@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
}
}
} // ExeDomain = SSEPackedInt
@@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (OpNode VR256:$src1,
- (bc_frag (memopv4i64 addr:$src2))))]>;
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))], IIC_SSE_PINSRW>;
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Extract
@@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>;
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
let Predicates = [HasAVX] in {
@@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in {
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst" in
@@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteMove]>;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
@@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
+let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
+} //SchedRW
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in
def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
@@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX;
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
+let SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
+} // SchedRW
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))],
IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
//
+
+let SchedRW = [WriteLoad] in {
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
//
+let SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
+} // SchedRW
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+ Sched<[WriteStore]>;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[UseSSE2]>;
+ XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
+let SchedRW = [WriteVecLogic] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
XS, Requires<[UseSSE2]>;
+} // SchedRW
+let SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let AddedComplexity = 20 in {
let Predicates = [HasAVX] in {
@@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in {
}
// Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let SchedRW = [WriteMove] in {
let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (vt (OpNode RC:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpNode (mem_frag addr:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
let neverHasSideEffects = 1 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [], IIC_SSE_MOV_LH>;
+ [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (X86Movddup
(scalar_to_vector (loadf64 addr:$src)))))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+ Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>;
+ (scalar_to_vector (loadf64 addr:$src)))))]>,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in {
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
IIC_SSE_LDDQU>;
+}
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
@@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : I<0xD0, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
@@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst,
(IntId128
(bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
- OpSize;
+ OpSize, Sched<[WriteVecALULd]>;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (IntId256 VR256:$src))]>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+ Sched<[WriteVecALULd]>;
}
let Predicates = [HasAVX] in {
@@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def SSE_PHADDSUBD : OpndItins<
IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
>;
@@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
+}
+let Sched = WriteShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
+let Sched = WriteVecALU in
def SSE_PSIGN : OpndItins<
IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULHRSW : OpndItins<
IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
>;
@@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
+} // SchedRW
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 3caa1b538c..053417ccde 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
def : Pat<(debugtrap),
(INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
+let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
+let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
"in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
+let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
+let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// Moves to and from segment registers.
//
+let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
+let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
@@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
+let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
"sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
@@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-
+} // SchedRW
+
//===----------------------------------------------------------------------===//
// Specialized register support
+let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
+let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
+let SchedRW = [WriteSystem] in {
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
@@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in {
def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
"xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 3af1b3e06b..a8a9fd8acc 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -407,6 +407,57 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
// TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
// inputs modeled as normal uses instead of implicit uses. As such, truncate
// off all but the first operand (the callee). FIXME: Change isel.
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d99d085298..bcdd0eb56d 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,6 +7,88 @@
//
//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
def IIC_DEFAULT : InstrItinClass;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index be2a997b8e..3e3b86edbb 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry<MVT> AVX2CostTable[] = {
+ // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+ // customize them to detect the cases where shift amount is a scalar one.
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 1 },
+ { ISD::SRA, MVT::v4i32, 1 },
+ { ISD::SHL, MVT::v8i32, 1 },
+ { ISD::SRL, MVT::v8i32, 1 },
+ { ISD::SRA, MVT::v8i32, 1 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 1 },
+ { ISD::SHL, MVT::v4i64, 1 },
+ { ISD::SRL, MVT::v4i64, 1 },
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2CostTable[Idx].Cost;
+ }
+
static const CostTblEntry<MVT> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
@@ -257,8 +280,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
};
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index a75212a386..bc5109b4d4 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,4 +1,4 @@
-//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
// to the function does not create any copies of the pointer value that
// outlive the call. This more or less means that the pointer is only
// dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
// This pass is implemented as a bottom-up traversal of the call-graph.
//
//===----------------------------------------------------------------------===//
@@ -32,12 +34,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
@@ -62,14 +66,63 @@ namespace {
// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+ // Utility methods used by inferPrototypeAttributes to add attributes
+ // and maintain annotation statistics.
+
+ void setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ }
+ }
+
+ // inferPrototypeAttributes - Analyze the name and prototype of the
+ // given function and set any applicable attributes. Returns true
+ // if any attributes were set and false otherwise.
+ bool inferPrototypeAttributes(Function &F);
+
+ // annotateLibraryCalls - Adds attributes to well-known standard library
+ // call declarations.
+ bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
private:
AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
};
}
@@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes. Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+ return false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strcmp:
+ case LibFunc::strspn:
+ case LibFunc::strncmp:
+ case LibFunc::strcspn:
+ case LibFunc::strcoll:
+ case LibFunc::strcasecmp:
+ case LibFunc::strncasecmp:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::scanf:
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat:
+ case LibFunc::sscanf:
+ case LibFunc::sprintf:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::mkdir:
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::rmdir:
+ case LibFunc::rewind:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::rename:
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ break;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ case LibFunc::fread:
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ case LibFunc::fputs:
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::uname:
+ case LibFunc::unlink:
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::pread:
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vsscanf:
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ break;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ break;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ default:
+ // Didn't mark any attributes.
+ return false;
+ }
+
+ return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+ bool MadeChange = false;
+
+ // Check each function in turn annotating well-known library function
+ // declarations with attributes.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F != 0 && F->isDeclaration())
+ MadeChange |= inferPrototypeAttributes(*F);
+ }
+
+ return MadeChange;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
- bool Changed = AddReadAttrs(SCC);
+ bool Changed = annotateLibraryCalls(SCC);
+ Changed |= AddReadAttrs(SCC);
Changed |= AddNoCaptureAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
return Changed;
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6f00..3c5781ca73 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -150,7 +150,9 @@ namespace {
typedef SmallVector<const FAddend*, 4> AddendVect;
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
@@ -159,6 +161,7 @@ namespace {
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
@@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bad46b4dab..32fdb9b708 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6877475b1d..92b42ee64b 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v1";
+static const char *kAsanInitName = "__asan_init_v2";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = false,
+ AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
StringRef BlacklistFile = StringRef(),
@@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = false,
+ AddressSanitizerModule(bool CheckInitOrder = true,
StringRef BlacklistFile = StringRef(),
bool ZeroBaseShadow = false)
: ModulePass(ID),
@@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst,
kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
}
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
@@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // const char *module_name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
- IntptrTy, NULL);
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
@@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// this TU. Used in initialization order checking.
Value *FirstDynamic = 0, *LastDynamic = 0;
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
@@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- SmallString<2048> DescriptionOfGlobal = G->getName();
- DescriptionOfGlobal += " (";
- DescriptionOfGlobal += M.getModuleIdentifier();
- DescriptionOfGlobal += ")";
- GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
@@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index a79873cbf6..3310ed5e2b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/InstIterator.h"
@@ -39,35 +40,57 @@
#include <utility>
using namespace llvm;
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true),
- UseExtraChecksum(false), NoRedZone(false),
- NoFunctionNamesInData(false) {
- memcpy(Version, DefaultGCovVersion, 4);
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, const char (&Version)[4],
- bool UseExtraChecksum, bool NoRedZone,
- bool NoFunctionNamesInData)
- : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- UseExtraChecksum(UseExtraChecksum), NoRedZone(NoRedZone),
- NoFunctionNamesInData(NoFunctionNamesInData) {
- memcpy(this->Version, Version, 4);
- assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "GCOV Profiler";
}
+
private:
bool runOnModule(Module &M);
- // Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO();
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
@@ -78,6 +101,8 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getDeleteWriteoutFunctionListFunc();
+ Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
// Create or retrieve an i32 state value that is used to represent the
@@ -88,23 +113,22 @@ namespace {
// block number.
GlobalVariable *buildEdgeLookupTable(Function *F,
GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
std::string mangleName(DICompileUnit CU, const char *NewStem);
- bool EmitNotes;
- bool EmitData;
- char Version[4];
- bool UseExtraChecksum;
- bool NoRedZone;
- bool NoFunctionNamesInData;
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
Module *M;
LLVMContext *Ctx;
@@ -115,13 +139,14 @@ char GCOVProfiler::ID = 0;
INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- const char (&Version)[4],
- bool UseExtraChecksum,
- bool NoRedZone,
- bool NoFunctionNamesInData) {
- return new GCOVProfiler(EmitNotes, EmitData, Version, UseExtraChecksum,
- NoRedZone, NoFunctionNamesInData);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
+}
+
+static std::string getFunctionName(DISubprogram SP) {
+ if (!SP.getLinkageName().empty())
+ return SP.getLinkageName();
+ return SP.getName();
}
namespace {
@@ -260,7 +285,7 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
- bool UseExtraChecksum) {
+ bool UseCfgChecksum) {
this->os = os;
Function *F = SP.getFunction();
@@ -272,16 +297,16 @@ namespace {
ReturnBlock = new GCOVBlock(i++, os);
writeBytes(FunctionTag, 4);
- uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
write(Ident);
write(0); // lineno checksum
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
write(0); // cfg checksum
- writeGCOVString(SP.getName());
+ writeGCOVString(getFunctionName(SP));
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
}
@@ -363,12 +388,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- if (EmitNotes) emitGCNO();
- if (EmitData) return emitProfileArcs();
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -382,7 +407,7 @@ void GCOVProfiler::emitGCNO() {
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
raw_fd_ostream::F_Binary);
out.write("oncg", 4);
- out.write(Version, 4);
+ out.write(ReversedVersion, 4);
out.write("MVLL", 4);
DIArray SPs = CU.getSubprograms();
@@ -392,7 +417,7 @@ void GCOVProfiler::emitGCNO() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, i, UseExtraChecksum);
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
GCOVBlock &Block = Func.getBlock(BB);
@@ -522,8 +547,38 @@ bool GCOVProfiler::emitProfileArcs() {
}
}
- insertCounterWriteout(CountersBySP);
- insertFlush(CountersBySP);
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Type *Params[] = {
+ PointerType::get(FTy, 0),
+ PointerType::get(FTy, 0)
+ };
+ FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+ // Inialize the environment and register the local writeout and flush
+ // functions.
+ Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
if (InsertIndCounterIncrCode)
@@ -619,6 +674,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
Constant *GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -637,7 +702,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
return GV;
}
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -646,7 +711,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
WriteoutF->addFnAttr(Attribute::NoRedZone);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
@@ -664,15 +729,15 @@ void GCOVProfiler::insertCounterWriteout(
std::string FilenameGcda = mangleName(CU, "gcda");
Builder.CreateCall2(StartFile,
Builder.CreateGlobalStringPtr(FilenameGcda),
- Builder.CreateGlobalStringPtr(Version));
+ Builder.CreateGlobalStringPtr(ReversedVersion));
for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
DISubprogram SP(CountersBySP[j].second);
- Builder.CreateCall3(EmitFunction,
- Builder.getInt32(j),
- NoFunctionNamesInData ?
- Constant::getNullValue(Builder.getInt8PtrTy()) :
- Builder.CreateGlobalStringPtr(SP.getName()),
- Builder.getInt8(UseExtraChecksum));
+ Builder.CreateCall3(
+ EmitFunction, Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
@@ -684,29 +749,9 @@ void GCOVProfiler::insertCounterWriteout(
Builder.CreateCall(EndFile);
}
}
- Builder.CreateRetVoid();
- // Create a small bit of code that registers the "__llvm_gcov_writeout"
- // function to be executed at exit.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(true);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BB = BasicBlock::Create(*Ctx, "entry", F);
- Builder.SetInsertPoint(BB);
-
- FTy = FunctionType::get(Builder.getInt32Ty(),
- PointerType::get(FTy, 0), false);
- Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
- Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
+ return WriteoutF;
}
void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -715,7 +760,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
Fn->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
Fn->addFnAttr(Attribute::NoRedZone);
// Create basic blocks for function.
@@ -760,18 +805,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.CreateRetVoid();
}
-void GCOVProfiler::
+Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__gcov_flush");
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
if (!FlushF)
FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__gcov_flush", M);
+ "__llvm_gcov_flush", M);
else
FlushF->setLinkage(GlobalValue::InternalLinkage);
FlushF->setUnnamedAddr(true);
FlushF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
FlushF->addFnAttr(Attribute::NoRedZone);
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
@@ -796,8 +841,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
if (RetTy == Type::getVoidTy(*Ctx))
Builder.CreateRetVoid();
else if (RetTy->isIntegerTy())
- // Used if __gcov_flush was implicitly declared.
+ // Used if __llvm_gcov_flush was implicitly declared.
Builder.CreateRet(ConstantInt::get(RetTy, 0));
else
- report_fatal_error("invalid return type for __gcov_flush");
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fce6513a97..4e75904ded 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
cl::desc("poison uninitialized stack variables with the given patter"),
cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+ cl::desc("poison undef temps"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
@@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Clean shadow (all zeroes) means all bits of the value are defined
/// (initialized).
- Value *getCleanShadow(Value *V) {
+ Constant *getCleanShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
return 0;
@@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ConstantStruct::get(ST, Vals);
}
+ /// \brief Create a dirty shadow for a given value.
+ Constant *getPoisonedShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return 0;
+ return getPoisonedShadow(ShadowTy);
+ }
+
/// \brief Create a clean (zero) origin.
Value *getCleanOrigin() {
return Constant::getNullValue(MS.OriginTy);
@@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return Shadow;
}
if (UndefValue *U = dyn_cast<UndefValue>(V)) {
- Value *AllOnes = getPoisonedShadow(getShadowTy(V));
+ Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
(void)U;
return AllOnes;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447f1c..129af8d45d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d646..5d02c68a7a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -64,10 +65,16 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +85,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -87,6 +111,7 @@ namespace {
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
+ virtual bool doFinalization(Module &M);
const char *getPassName() const {
return "Merge internal globals";
@@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
@@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
return false;
}
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
return new GlobalMerge(tli);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 97fff7e782..8e76c78f5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
+ // Computing the value outside of the loop brings no benefit if :
+ // - it is definitely used inside the loop in a way which can not be
+ // optimized away.
+ // - no use outside of the loop can take advantage of hoisting the
+ // computation out of the loop
+ if (ExitValue->getSCEVType()>=scMulExpr) {
+ unsigned NumHardInternalUses = 0;
+ unsigned NumSoftExternalUses = 0;
+ unsigned NumUses = 0;
+ for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+ IB!=IE && NumUses<=6 ; ++IB) {
+ Instruction *UseInstr = cast<Instruction>(*IB);
+ unsigned Opc = UseInstr->getOpcode();
+ NumUses++;
+ if (L->contains(UseInstr)) {
+ if (Opc == Instruction::Call || Opc == Instruction::Ret)
+ NumHardInternalUses++;
+ } else {
+ if (Opc == Instruction::PHI) {
+ // Do not count the Phi as a use. LCSSA may have inserted
+ // plenty of trivial ones.
+ NumUses--;
+ for (Value::use_iterator PB=UseInstr->use_begin(),
+ PE=UseInstr->use_end();
+ PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+ if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ continue;
+ }
+ if (Opc != Instruction::Call && Opc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ }
+ if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+ continue;
+ }
+
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9c67e327e2..0b62050b17 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -34,13 +34,9 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop* L, LPPassManager& LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
- bool &Changed, BasicBlock *Preheader);
-
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
+
+ private:
+ bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
};
}
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
return new LoopDeletion();
}
-/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// isLoopDead - Determined if a loop is dead. This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
- SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+ SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
bool &Changed, BasicBlock *Preheader) {
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
- Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
- for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
- if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock* preheader = L->getLoopPreheader();
+ BasicBlock *preheader = L->getLoopPreheader();
if (!preheader)
return false;
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst* TI = preheader->getTerminator();
+ TerminatorInst *TI = preheader->getTerminator();
TI->replaceUsesOfWith(L->getHeader(), exitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock *exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
int j = P->getBasicBlockIndex(exitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTree>();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ LoopInfo &loopInfo = getAnalysis<LoopInfo>();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4e4cb86464..9562cf8d5d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
@@ -2716,6 +2716,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 810a553c74..25306c2681 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,15 @@
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted, "Number of instructions deleted");
-STATISTIC(NumVectorized, "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to force the pass to not use DomTree and mem2reg, instead
/// forming SSA values through the SSAUpdater infrastructure.
@@ -69,112 +73,167 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief A partition of an alloca.
+ /// \brief Support for ordering ranges.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
+
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
- /// \brief A particular use of a partition of the alloca.
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +515,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +533,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +555,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +566,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +858,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +873,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +889,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
splitAndMergePartitions();
}
+ // Record how many partitions we end up with.
+ NumAllocaPartitions += Partitions.size();
+ MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
// Now build up the user lists for each of these disjoint partitions by
// re-walking the recursive users of the alloca.
Uses.resize(Partitions.size());
@@ -1084,22 +1149,31 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
PtrI = UB.visitPtr(AI);
assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+ unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+ NumUses += Uses[Idx].size();
+#endif
+ NumAllocaPartitionUses += NumUses;
+ MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser()))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser()))
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
else
return 0; // Bail if we have weird uses.
@@ -1139,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1374,11 +1449,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1472,7 +1547,7 @@ private:
assert(!Loads.empty());
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilder<> PHIBuilder(&PN);
+ IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
@@ -1495,7 +1570,7 @@ private:
TerminatorInst *TI = Pred->getTerminator();
Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
Value *InVal = PN.getIncomingValue(Idx);
- IRBuilder<> PredBuilder(TI);
+ IRBuilderTy PredBuilder(TI);
LoadInst *Load
= PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1522,8 +1597,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1576,10 +1651,10 @@ private:
if (!isSafeSelectToSpeculate(SI, Loads))
return;
- IRBuilder<> IRB(&SI);
+ IRBuilderTy IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1590,7 +1665,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1622,8 +1697,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1640,9 +1715,8 @@ private:
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices) {
if (Indices.empty())
return BasePtr;
@@ -1651,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1663,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1695,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
}
/// \brief Recursively compute indices for a natural GEP.
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1728,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, Prefix);
+ Offset, TargetTy, Indices);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1741,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
StructType *STy = dyn_cast<StructType>(Ty);
@@ -1760,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1773,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1795,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1813,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
- Value *Ptr, APInt Offset, Type *PointerTy,
- const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1849,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
- Indices, Prefix)) {
+ Indices)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -1884,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- Prefix + ".raw_cast");
+ "raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- Prefix + ".raw_idx");
+ "raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
return Ptr;
}
@@ -1910,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1932,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// This will try various different casting techniques, such as bitcasts,
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *Ty) {
assert(canConvertValue(DL, V->getType(), Ty) &&
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1976,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1996,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2062,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2073,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2088,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2104,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2124,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
DEBUG(dbgs() << " start: " << *V << "\n");
@@ -2147,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2178,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
-static Value *extractVector(IRBuilder<> &IRB, Value *V,
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
unsigned BeginIndex, unsigned EndIndex,
const Twine &Name) {
VectorType *VecTy = cast<VectorType>(V->getType());
@@ -2206,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
unsigned BeginIndex, const Twine &Name) {
VectorType *VecTy = cast<VectorType>(Old->getType());
assert(VecTy && "Can only insert a vector into a vector");
@@ -2296,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // The name prefix to use when rewriting instructions for this alloca.
- std::string NamePrefix;
+ // Utility IR builder, whose name prefix is setup for each visited use, and
+ // the insertion point is set to point to the user.
+ IRBuilderTy IRB;
public:
AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2313,7 +2395,8 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+ IRB(NewAI.getContext(), ConstantFolder()) {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2335,14 +2418,21 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
- NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2364,14 +2454,10 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Twine getName(const Twine &Suffix) {
- return NamePrefix + Suffix;
- }
-
- Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
}
/// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2421,27 +2507,27 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
+ Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || EndOffset < NewAllocaEndOffset)
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
+ "extract");
return V;
}
@@ -2451,56 +2537,37 @@ private:
assert(OldOp == OldPtr);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
- IRBuilder<> IRB(&LI);
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(IRB, LI);
+ V = rewriteIntegerLoad(LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
canConvertValue(TD, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
} else {
Type *LTy = TargetTy->getPointerTo();
V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
getPartitionTypeAlign(TargetTy),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
IsPtrAdjusted = true;
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2510,7 +2577,7 @@ private:
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
- getName(".insert"));
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
@@ -2524,7 +2591,7 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ bool rewriteVectorizedStoreInst(Value *V,
StoreInst &SI, Value *OldOp) {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
@@ -2539,8 +2606,8 @@ private:
// Mix in the existing elements.
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2550,17 +2617,17 @@ private:
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
- getName(".insert"));
+ "insert");
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2574,7 +2641,6 @@ private:
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&SI);
Value *V = SI.getValueOperand();
@@ -2587,23 +2653,21 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
- getName(".extract"));
+ "extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(IRB, V, SI);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
@@ -2634,7 +2698,7 @@ private:
///
/// \param V The i8 value to splat.
/// \param Size The number of bytes in the output (assuming i8 is one byte)
- Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+ Value *getIntegerSplat(Value *V, unsigned Size) {
assert(Size > 0 && "Expected a positive number of bytes.");
IntegerType *VTy = cast<IntegerType>(V->getType());
assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
@@ -2642,26 +2706,25 @@ private:
return V;
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
ConstantExpr::getUDiv(
Constant::getAllOnesValue(SplatIntTy),
ConstantExpr::getZExt(
Constant::getAllOnesValue(V->getType()),
SplatIntTy)),
- getName(".isplat"));
+ "isplat");
return V;
}
/// \brief Compute a vector splat for a given element value.
- Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
- V = IRB.CreateVectorSplat(NumElements, V, NamePrefix);
+ Value *getVectorSplat(Value *V, unsigned NumElements) {
+ V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawDest() == OldPtr);
// If the memset has a variable size, it cannot be split, just adjust the
@@ -2718,31 +2781,31 @@ private:
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- Value *Splat = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ElementTy)/8);
+ Value *Splat =
+ getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
Splat = convertValue(TD, IRB, Splat, ElementTy);
if (NumElements > 1)
- Splat = getVectorSplat(IRB, Splat, NumElements);
+ Splat = getVectorSplat(Splat, NumElements);
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+ "oldload");
+ V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
// set integer.
assert(!II.isVolatile());
uint64_t Size = EndOffset - BeginOffset;
- V = getIntegerSplat(IRB, II.getValue(), Size);
+ V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ V = insertInteger(TD, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
@@ -2753,10 +2816,9 @@ private:
assert(BeginOffset == NewAllocaBeginOffset);
assert(EndOffset == NewAllocaEndOffset);
- V = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ScalarTy)/8);
+ V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
- V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+ V = getVectorSplat(V, AllocaVecTy->getNumElements());
V = convertValue(TD, IRB, V, AllocaTy);
}
@@ -2773,7 +2835,6 @@ private:
// them into two categories: split intrinsics and unsplit intrinsics.
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
bool IsDest = II.getRawDest() == OldPtr;
@@ -2857,8 +2918,7 @@ private:
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
@@ -2901,8 +2961,7 @@ private:
OtherPtrTy = SubIntTy->getPointerTo();
}
- Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
@@ -2910,31 +2969,31 @@ private:
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
Src = convertValue(TD, IRB, Src, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
- getName(".copyload"));
+ "copyload");
}
if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+ "oldload");
+ Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
@@ -2949,7 +3008,6 @@ private:
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
@@ -2977,7 +3035,9 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+ IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
@@ -2990,7 +3050,6 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// Find the operand we need to rewrite here.
bool IsTrueVal = SI.getTrueValue() == OldPtr;
@@ -3065,7 +3124,7 @@ private:
class OpSplitter {
protected:
/// The builder used to form new instructions.
- IRBuilder<> IRB;
+ IRBuilderTy IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
@@ -3380,7 +3439,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 916b37d4a8..3514e6c2aa 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
@@ -35,7 +34,6 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
//===----------------------------------------------------------------------===//
// Optimizer Base Class
@@ -91,8 +89,6 @@ namespace {
TargetLibraryInfo *TLI;
StringMap<LibCallOptimization*> Optimizations;
-
- bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
SimplifyLibCalls() : FunctionPass(ID) {
@@ -104,14 +100,6 @@ namespace {
void InitOptimizations();
bool runOnFunction(Function &F);
- void setDoesNotAccessMemory(Function &F);
- void setOnlyReadsMemory(Function &F);
- void setDoesNotThrow(Function &F);
- void setDoesNotCapture(Function &F, unsigned n);
- void setDoesNotAlias(Function &F, unsigned n);
- bool doInitialization(Module &M);
-
- void inferPrototypeAttributes(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
}
@@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
return Changed;
}
-// Utility methods for doInitialization.
-
-void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-
-
-void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
- FunctionType *FTy = F.getFunctionType();
-
- StringRef Name = F.getName();
- switch (Name[0]) {
- case 's':
- if (Name == "strlen") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strchr" ||
- Name == "strrchr") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "strcpy" ||
- Name == "stpcpy" ||
- Name == "strcat" ||
- Name == "strtol" ||
- Name == "strtod" ||
- Name == "strtof" ||
- Name == "strtoul" ||
- Name == "strtoll" ||
- Name == "strtold" ||
- Name == "strncat" ||
- Name == "strncpy" ||
- Name == "stpncpy" ||
- Name == "strtoull") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strxfrm") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strcmp" ||
- Name == "strspn" ||
- Name == "strncmp" ||
- Name == "strcspn" ||
- Name == "strcoll" ||
- Name == "strcasecmp" ||
- Name == "strncasecmp") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strstr" ||
- Name == "strpbrk") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strtok" ||
- Name == "strtok_r") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "scanf" ||
- Name == "setbuf" ||
- Name == "setvbuf") {
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strdup" ||
- Name == "strndup") {
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "stat" ||
- Name == "sscanf" ||
- Name == "sprintf" ||
- Name == "statvfs") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "snprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- } else if (Name == "setitimer") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- } else if (Name == "system") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- case 'm':
- if (Name == "malloc") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "memcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "memchr" ||
- Name == "memrchr") {
- if (FTy->getNumParams() != 3)
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "modf" ||
- Name == "modff" ||
- Name == "modfl" ||
- Name == "memcpy" ||
- Name == "memccpy" ||
- Name == "memmove") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "memalign") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotAlias(F, 0);
- } else if (Name == "mkdir" ||
- Name == "mktime") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'r':
- if (Name == "realloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "read") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- } else if (Name == "rmdir" ||
- Name == "rewind" ||
- Name == "remove" ||
- Name == "realpath") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "rename" ||
- Name == "readlink") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'w':
- if (Name == "write") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- }
- break;
- case 'b':
- if (Name == "bcopy") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bzero") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'c':
- if (Name == "calloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "chmod" ||
- Name == "chown" ||
- Name == "ctermid" ||
- Name == "clearerr" ||
- Name == "closedir") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'a':
- if (Name == "atoi" ||
- Name == "atol" ||
- Name == "atof" ||
- Name == "atoll") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "access") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'f':
- if (Name == "fopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "fdopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- } else if (Name == "feof" ||
- Name == "free" ||
- Name == "fseek" ||
- Name == "ftell" ||
- Name == "fgetc" ||
- Name == "fseeko" ||
- Name == "ftello" ||
- Name == "fileno" ||
- Name == "fflush" ||
- Name == "fclose" ||
- Name == "fsetpos" ||
- Name == "flockfile" ||
- Name == "funlockfile" ||
- Name == "ftrylockfile") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "ferror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- } else if (Name == "fputc" ||
- Name == "fstat" ||
- Name == "frexp" ||
- Name == "frexpf" ||
- Name == "frexpl" ||
- Name == "fstatvfs") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "fgets") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- } else if (Name == "fread" ||
- Name == "fwrite") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- } else if (Name == "fputs" ||
- Name == "fscanf" ||
- Name == "fprintf" ||
- Name == "fgetpos") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'g':
- if (Name == "getc" ||
- Name == "getlogin_r" ||
- Name == "getc_unlocked") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "getenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "gets" ||
- Name == "getchar") {
- setDoesNotThrow(F);
- } else if (Name == "getitimer") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "getpwnam") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'u':
- if (Name == "ungetc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "uname" ||
- Name == "unlink" ||
- Name == "unsetenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "utime" ||
- Name == "utimes") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'p':
- if (Name == "putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "puts" ||
- Name == "printf" ||
- Name == "perror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "pread" ||
- Name == "pwrite") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; these are valid pthread cancellation points.
- setDoesNotCapture(F, 2);
- } else if (Name == "putchar") {
- setDoesNotThrow(F);
- } else if (Name == "popen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "pclose") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'v':
- if (Name == "vscanf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vsscanf" ||
- Name == "vfscanf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "valloc") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "vprintf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vfprintf" ||
- Name == "vsprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "vsnprintf") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- }
- break;
- case 'o':
- if (Name == "open") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- } else if (Name == "opendir") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- }
- break;
- case 't':
- if (Name == "tmpfile") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "times") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'h':
- if (Name == "htonl" ||
- Name == "htons") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'n':
- if (Name == "ntohl" ||
- Name == "ntohs") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'l':
- if (Name == "lstat") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "lchown") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'q':
- if (Name == "qsort") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- }
- break;
- case '_':
- if (Name == "__strdup" ||
- Name == "__strndup") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "__strtok_r") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "_IO_getc") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "_IO_putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- }
- break;
- case 1:
- if (Name == "\1__isoc99_scanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1stat64" ||
- Name == "\1lstat64" ||
- Name == "\1statvfs64" ||
- Name == "\1__isoc99_sscanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fopen64") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fseeko64" ||
- Name == "\1ftello64") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1tmpfile64") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "\1fstat64" ||
- Name == "\1fstatvfs64") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1open64") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- }
-}
-
-/// doInitialization - Add attributes to well-known functions.
-///
-bool SimplifyLibCalls::doInitialization(Module &M) {
- Modified = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (F.isDeclaration() && F.hasName())
- inferPrototypeAttributes(F);
- }
- return Modified;
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a221..3e1022ef8c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,7 @@ namespace {
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
- void forwardResume(ResumeInst *RI);
+ void forwardResume(ResumeInst *RI, BasicBlock *FirstNewBlock);
/// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
/// destination block for the given basic block, using the values for the
@@ -140,8 +140,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// block. When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ BasicBlock *FirstNewBlock) {
BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -152,6 +154,36 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ Function *Caller = FirstNewBlock->getParent();
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ LandingPadInst *LPI = II->getLandingPadInst();
+ if (!LPI->hasCatchAll())
+ InlinedLPads.insert(LPI);
+ }
+
+ // Merge the catch clauses from the outer landing pad instruction into the
+ // inlined landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx) {
+ bool hasClause = false;
+ if (OuterLPad->isFilter(OuterIdx)) continue;
+ Value *OuterClause = OuterLPad->getClause(OuterIdx);
+ for (unsigned Idx = 0, N = InlinedLPad->getNumClauses(); Idx != N; ++Idx)
+ if (OuterClause == InlinedLPad->getClause(Idx)) {
+ hasClause = true;
+ break;
+ }
+ if (!hasClause)
+ InlinedLPad->addClause(OuterClause);
+ }
+ }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +261,9 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
- // rewrite. If the code doesn't have calls or unwinds, we know there is
- // nothing to rewrite.
- if (!InlinedCodeInfo.ContainsCalls) {
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- InvokeDest->removePredecessor(II->getParent());
- return;
- }
-
+ // rewrite.
InvokeInliningInfo Invoke(II);
-
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +272,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
continue;
}
+ // Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
- Invoke.forwardResume(RI);
+ Invoke.forwardResume(RI, FirstNewBlock);
}
// Now that everything is happy, we have one final detail. The PHI nodes in
// the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
+ // invoke instruction. Eliminate these entries (which might even delete the
// PHI node) now.
InvokeDest->removePredecessor(II->getParent());
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 07dd453424..930d9c412f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
- if (ScalarCond)
+ if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);