diff options
Diffstat (limited to 'lib')
163 files changed, 8630 insertions, 5112 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 1faa04623e..57bce84e83 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements an analysis that determines, for a given memory -// operation, what preceding memory operations it depends on. It builds on +// operation, what preceding memory operations it depends on. It builds on // alias analysis information, and tries to provide a lazy, caching interface to // a common kind of alias information query. // @@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, static const int BlockScanLimit = 500; char MemoryDependenceAnalysis::ID = 0; - + // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { /// RemoveFromReverseMap - This is a helper function that removes Val from /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. template <typename KeyTy> -static void RemoveFromReverseMap(DenseMap<Instruction*, +static void RemoveFromReverseMap(DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> > &ReverseMap, Instruction *Inst, KeyTy Val) { typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator @@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (LI->isUnordered()) { Loc = AA->getLocation(LI); return AliasAnalysis::Ref; - } else if (LI->getOrdering() == Monotonic) { + } + if (LI->getOrdering() == Monotonic) { Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } @@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (SI->isUnordered()) { Loc = AA->getLocation(SI); return AliasAnalysis::Mod; - } else if (SI->getOrdering() == Monotonic) { + } + if (SI->getOrdering() == Monotonic) { Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } @@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic - // running time on extreme testcases. + // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); Instruction *Inst = --ScanIt; - + // If this inst is a memory op, get the pointer it accessed AliasAnalysis::Location Loc; AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); @@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool +static bool isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, @@ -289,25 +291,25 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, if (LI->getParent()->getParent()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) return 0; - + // Get the base of this load. int64_t LIOffs = 0; - const Value *LIBase = + const Value *LIBase = GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); - + // If the two pointers are not based on the same pointer, we can't tell that // they are related. if (LIBase != MemLocBase) return 0; - + // Okay, the two values are based on the same pointer, but returned as // no-alias. This happens when we have things like two byte loads at "P+1" // and "P+3". Check to see if increasing the size of the "LI" load up to its // alignment (or the largest native integer type) will allow us to load all // the bits required by MemLoc. - + // If MemLoc is before LI, then no widening of LI will help us out. if (MemLocOffs < LIOffs) return 0; - + // Get the alignment of the load in bytes. We assume that it is safe to load // any legal integer up to this size without a problem. For example, if we're // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can @@ -316,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned LoadAlign = LI->getAlignment(); int64_t MemLocEnd = MemLocOffs+MemLocSize; - + // If no amount of rounding up will let MemLoc fit into LI, then bail out. if (LIOffs+LoadAlign < MemLocEnd) return 0; - + // This is the size of the load to try. Start with the next larger power of // two. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - + while (1) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. @@ -343,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; - + NewLoadByteSize <<= 1; } } @@ -355,7 +357,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// instruction as well; this function may take advantage of the metadata /// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { @@ -382,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa<DbgInfoIntrinsic>(II)) continue; - + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { @@ -406,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - + if (isLoad) { if (R == AliasAnalysis::NoAlias) { // If this is an over-aligned integer load (for example, @@ -423,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) return MemDepResult::getClobber(Inst); - + continue; } - + // Must aliased loads are defs of each other. if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); @@ -441,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); #endif - + // Random may-alias loads don't depend on each other without a // dependence. continue; @@ -458,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Atomic stores have complications involved. // FIXME: This is overly conservative. @@ -474,10 +476,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. AliasAnalysis::Location StoreLoc = AA->getLocation(SI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); - + if (R == AliasAnalysis::NoAlias) continue; if (R == AliasAnalysis::MustAlias) @@ -498,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); // Be conservative if the accessed pointer may alias the allocation. @@ -532,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(Inst); } } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -544,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, /// depends. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; - + // Check for a cached result MemDepResult &LocalCache = LocalDeps[QueryInst]; - + // If the cached entry is non-dirty, just return it. Note that this depends // on MemDepResult's default constructing to 'dirty'. if (!LocalCache.isDirty()) return LocalCache; - + // Otherwise, if we have a dirty entry, we know we can start the scan at that // instruction, which may save us some work. if (Instruction *Inst = LocalCache.getInst()) { ScanPos = Inst; - + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); } - + BasicBlock *QueryParent = QueryInst->getParent(); - + // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is @@ -591,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); } - + // Remember the result! if (Instruction *I = LocalCache.getInst()) ReverseLocalDeps[I].insert(QueryInst); - + return LocalCache; } @@ -636,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// the uncached case, this starts out as the set of predecessors we care /// about. SmallVector<BasicBlock*, 32> DirtyBlocks; - + if (!Cache.empty()) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. @@ -644,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { ++NumCacheNonLocal; return Cache; } - + // If we already have a partially computed set of results, scan them to // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) if (I->getResult().isDirty()) DirtyBlocks.push_back(I->getBB()); - + // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); - + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; @@ -665,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(*PI); ++NumUncacheNonLocal; } - + // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); SmallPtrSet<BasicBlock*, 64> Visited; - + unsigned NumSortedEntries = Cache.size(); DEBUG(AssertSorted(Cache)); - + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - + // Already processed this block? if (!Visited.insert(DirtyBB)) continue; - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. DEBUG(AssertSorted(Cache, NumSortedEntries)); - NonLocalDepInfo::iterator Entry = + NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; - if (Entry != Cache.begin()+NumSortedEntries && + if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. if (!Entry->getResult().isDirty()) continue; - + // Otherwise, remember this slot so we can update the value. ExistingResult = &*Entry; } - + // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); @@ -715,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { QueryCS.getInstruction()); } } - + // Find out if this block has a local dependency for QueryInst. MemDepResult Dep; - + if (ScanPos != DirtyBB->begin()) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { @@ -728,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { Dep = MemDepResult::getNonFuncLocal(); } - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! if (!Dep.isNonLocal()) { @@ -744,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); } else { - + // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) DirtyBlocks.push_back(*PI); } } - + return Cache; } @@ -769,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); - + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI @@ -794,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis:: GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = @@ -802,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, NonLocalDepEntry(BB)); if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; return ExistingResult->getResult(); - } - + } + // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. @@ -823,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; ScanPos = ExistingResult->getResult().getInst(); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } - + // Scan the block for the dependency. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache->push_back(NonLocalDepEntry(BB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! if (!Dep.isDef() && !Dep.isClobber()) return Dep; - + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); @@ -859,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. -static void +static void SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { switch (Cache.size() - NumSortedEntries) { @@ -911,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { - + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -925,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. - std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); NonLocalPointerInfo *CacheInfo = &Pair.first->second; @@ -987,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); if (VI == Visited.end() || VI->second == Pointer.getAddr()) continue; - + // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } - + Value *Addr = Pointer.getAddr(); for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { @@ -1005,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, ++NumCacheCompleteNonLocalPtr; return false; } - + // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, @@ -1014,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->Pair = BBSkipFirstBlockPair(); - + SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); - + // PredList used inside loop. SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; @@ -1028,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); - + while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1043,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); - + // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } } - + // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If not, we just add the predecessors to the worklist and scan them with @@ -1067,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NewBlocks.push_back(*PI); continue; } - + // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. @@ -1082,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } - + // We do need to do phi translation, if we know ahead of time we can't phi // translate this value, don't even try. if (!Pointer.IsPotentiallyPHITranslatable()) goto PredTranslationFailure; - + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1110,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); - + // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in @@ -1127,7 +1129,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) continue; - + // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. @@ -1143,7 +1145,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Actually process results here; this need to be a separate loop to avoid // calling getNonLocalPointerDepFromBB for blocks we don't want to return - // any results for. (getNonLocalPointerDepFromBB will modify our + // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) for (unsigned i = 0; i < PredList.size(); i++) { @@ -1186,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; } } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -1204,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // The following code is "failure"; we can't produce a sane translation // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - + if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } - + // Since we failed phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole @@ -1225,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // we have to bail out. if (SkipFirstBlock) return true; - + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->getBB() != BB) continue; - + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); @@ -1250,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, /// CachedNonLocalPointerInfo, remove it. void MemoryDependenceAnalysis:: RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { - CachedNonLocalPointerInfo::iterator It = + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; - + // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); } - + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). NonLocalPointerDeps.erase(It); } @@ -1321,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove this local dependency info. LocalDeps.erase(LocalDepEntry); } - + // If we have any cached pointer dependencies on this instruction, remove // them. If the instruction has non-pointer type, then it can't be a pointer // base. - + // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } - + // Loop over all of the things that depend on the instruction we're removing. - // + // SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1346,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); - + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && "Nothing can locally depend on a terminator"); - + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), E = ReverseDeps.end(); I != E; ++I) { Instruction *InstDependingOnRemInst = *I; assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); - + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; - + // Make sure to remember that new things depend on NewDepInst. assert(NewDirtyVal.getInst() && "There is no way something else can have " "a local dep on this if it is a terminator!"); - ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); } - + ReverseLocalDeps.erase(ReverseDepIt); // Add new reverse deps after scanning the set, to avoid invalidating the @@ -1379,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); - + PerInstNLInfo &INLD = NonLocalDeps[*I]; // The information is now dirty! INLD.second = true; - - for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); } @@ -1412,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a // value in the NonLocalPointerDeps info. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = @@ -1420,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; - + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { ValueIsLoadPair P = *I; assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; - + // The cache is not valid for any specific block anymore. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); - + // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); } - + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. std::sort(NLPDI.begin(), NLPDI.end()); } - + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); - + while (!ReversePtrDepsToAdd.empty()) { ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] .insert(ReversePtrDepsToAdd.back().second); ReversePtrDepsToAdd.pop_back(); } } - - + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); @@ -1472,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { assert(I->second.getInst() != D && "Inst occurs in data structures"); } - + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); @@ -1481,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } - + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1490,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = INLD.first.end(); II != EE; ++II) assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1498,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { @@ -1507,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseNonLocalPtrDepTy::const_iterator I = ReverseNonLocalPtrDeps.begin(), E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), E = I->second.end(); II != E; ++II) assert(*II != ValueIsLoadPair(D, false) && *II != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - + } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 72421a00c7..976cd87321 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { case Instruction::PtrToInt: // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. - if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && - OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) return TCC_Free; // Otherwise it's not a no-op. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 45b75df508..45dcc5e37e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // Check for pointer simplifications. if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, TD, Depth)) return true; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4ded2818ed..bbb0432581 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,6 +112,17 @@ DIE::~DIE() { delete Children[i]; } +/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. +DIE *DIE::getCompileUnit() const{ + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 35d7959ac1..d087c540f2 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -152,6 +152,9 @@ namespace llvm { const std::vector<DIE *> &getChildren() const { return Children; } const SmallVector<DIEValue*, 32> &getValues() const { return Values; } DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -232,9 +235,10 @@ namespace llvm { /// static unsigned BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { - if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; - if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; - if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; } else { if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 01f15e52e8..1c743c2414 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -35,7 +35,7 @@ using namespace llvm; CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0) { + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77bf6a9e50..2b180c6cc3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -87,6 +87,9 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + /// getLowerBoundDefault - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; @@ -103,6 +106,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } @@ -120,6 +124,7 @@ public: return AccelTypes; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 30bfa78989..b169602b0e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); SPCU->addDie(SPDie); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -1692,15 +1697,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Compute the size and offset of all the DIEs. void DwarfUnits::computeSizeAndOffsets() { + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; } } @@ -1774,6 +1783,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } Asm->EmitInt32(Addr); break; } @@ -1871,6 +1887,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 771bc362cb..81e345e628 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -274,6 +274,10 @@ public: /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); }; /// \brief Collects and handles dwarf debug information. diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3b28e6afb6..7793e96c35 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) { + SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5e04f2d8a3..04321f3292 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index a777f52cb2..1af00e84a6 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 103b058c13..c872355e37 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -2182,7 +2182,7 @@ public: /// Callback to select the highest priority node from the ready Q. virtual SUnit *pickNode(bool &IsTopNode) { if (ReadyQ.empty()) return NULL; - pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 32830f57bc..855a8c5f92 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -101,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 716fb93b29..61603e18f5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, DAG.getSetCC(N->getDebugLoc(), TLI.getSetCCResultType(VT), diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index e26d1656e8..b6436bf427 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) + for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } return true; } diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index d2269f8acc..7e7b8974be 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -33,8 +33,10 @@ class SDNodeOrdering { public: SDNodeOrdering() {} - void add(const SDNode *Node, unsigned O) { - OrderMap[Node] = O; + void add(const SDNode *Node, unsigned NewOrder) { + unsigned &OldOrder = OrderMap[Node]; + if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) + OldOrder = NewOrder; } void remove(const SDNode *Node) { DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 35707e86ce..64244313a3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); - if (ISD::isZEXTLoad(Op.getNode())) { + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); @@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ break; } - // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { - unsigned ExtType = LD->getExtensionType(); - switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp; + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 39a1f8a3d0..eeea9e4cfc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -784,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() { if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. - if (ResNode) + if (ResNode) { + // Propagate ordering + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); + ReplaceUses(Node, ResNode); + } // If after the replacement this node is not used any more, // remove this dead node. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 320128a999..c5bbba3ffc 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -29,6 +29,7 @@ #define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9a9ddc9b48..3bdca4c640 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -742,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; - if (Kind.isThreadLocal()) - return ".tls$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } if (Kind.isWriteable()) return ".data$"; return ".rdata$"; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index f31f67d58c..783bfa1c1a 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return DefMI->isTransient() ? 0 : 1; + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp index 2efbfd1f92..7dff9ff49a 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -16,7 +16,7 @@ using namespace llvm; void DWARFDebugArangeSet::clear() { Offset = -1U; - std::memset(&Header, 0, sizeof(Header)); + std::memset(&HeaderData, 0, sizeof(Header)); ArangeDescriptors.clear(); } @@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // descriptor on the target system. This header is followed by a series // of tuples. Each tuple consists of an address and a length, each in // the size appropriate for an address on the target architecture. - Header.Length = data.getU32(offset_ptr); - Header.Version = data.getU16(offset_ptr); - Header.CuOffset = data.getU32(offset_ptr); - Header.AddrSize = data.getU8(offset_ptr); - Header.SegSize = data.getU8(offset_ptr); + HeaderData.Length = data.getU32(offset_ptr); + HeaderData.Version = data.getU16(offset_ptr); + HeaderData.CuOffset = data.getU32(offset_ptr); + HeaderData.AddrSize = data.getU8(offset_ptr); + HeaderData.SegSize = data.getU8(offset_ptr); // Perform basic validation of the header fields. - if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) || - (Header.AddrSize != 4 && Header.AddrSize != 8)) { + if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) || + (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) { clear(); return false; } @@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // size of an address). The header is padded, if necessary, to the // appropriate boundary. const uint32_t header_size = *offset_ptr - Offset; - const uint32_t tuple_size = Header.AddrSize * 2; + const uint32_t tuple_size = HeaderData.AddrSize * 2; uint32_t first_tuple_offset = 0; while (first_tuple_offset < header_size) first_tuple_offset += tuple_size; @@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { Descriptor arangeDescriptor; assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length)); - assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize); + assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize); while (data.isValidOffset(*offset_ptr)) { - arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize); - arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize); + arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); + arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); // Each set of tuples is terminated by a 0 for the address and 0 // for the length. @@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { void DWARFDebugArangeSet::dump(raw_ostream &OS) const { OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ", - Header.Length, Header.Version) + HeaderData.Length, HeaderData.Version) << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", - Header.CuOffset, Header.AddrSize, Header.SegSize); + HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize); - const uint32_t hex_width = Header.AddrSize * 2; + const uint32_t hex_width = HeaderData.AddrSize * 2; for (DescriptorConstIter pos = ArangeDescriptors.begin(), end = ArangeDescriptors.end(); pos != end; ++pos) OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address) @@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const { std::find_if(ArangeDescriptors.begin(), end, // Range DescriptorContainsAddress(address)); // Predicate if (pos != end) - return Header.CuOffset; + return HeaderData.CuOffset; return -1U; } diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h index 9a2a6d0f00..d76867615a 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/lib/DebugInfo/DWARFDebugArangeSet.h @@ -48,7 +48,7 @@ private: typedef DescriptorColl::const_iterator DescriptorConstIter; uint32_t Offset; - Header Header; + Header HeaderData; DescriptorColl ArangeDescriptors; public: @@ -58,11 +58,11 @@ public: bool extract(DataExtractor data, uint32_t *offset_ptr); void dump(raw_ostream &OS) const; - uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; } - uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; } + uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } + uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; } uint32_t findAddress(uint64_t address) const; uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } - const struct Header &getHeader() const { return Header; } + const struct Header &getHeader() const { return HeaderData; } const Descriptor *getDescriptor(uint32_t i) const { if (i < ArangeDescriptors.size()) return &ArangeDescriptors[i]; diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index b077eb5e38..f79862d606 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -186,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const { Range range(address); RangeCollIterator begin = Aranges.begin(); RangeCollIterator end = Aranges.end(); - RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan); + RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan); if (pos != end && pos->LoPC <= address && address < pos->HiPC()) { return pos->Offset; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index ed2bf05e90..2d828914cd 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -649,6 +649,13 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, + StringRef Kind) const { + llvm::AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Idx, AttributeSet::get(C, Idx, B)); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { if (!pImpl) return Attrs; diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 0c7effb5ca..1abb656435 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -47,6 +47,16 @@ bool Constant::isNegativeZeroValue() const { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) return CFP->isZero() && CFP->isNegative(); + // Equivalent for a vector of -0.0's. + if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + return true; + + // We've already handled true FP case; any other FP vectors can't represent -0.0. + if (getType()->isFPOrFPVectorTy()) + return false; + // Otherwise, just use +0.0. return isNullValue(); } diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 5ee36abc6b..a97b740d26 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -71,6 +71,16 @@ static MDNode *getNonCompileUnitScope(MDNode *N) { return N; } +static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename, + StringRef Directory) { + assert(!Filename.empty() && "Unable to create file without name"); + Value *Pair[] = { + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + }; + return MDNode::get(VMContext, Pair); +} + /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, @@ -93,13 +103,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + createFilePathPair(VMContext, Filename, Directory), ConstantInt::get(Type::getInt32Ty(VMContext), Lang), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), MDString::get(VMContext, Producer), - // isMain field can be removed when we remove the legacy debug info. - ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), @@ -119,13 +125,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, /// createFile - Create a file descriptor to hold debugging information /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { - assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); - assert(!Filename.empty() && "Unable to create file without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - NULL // TheCU + createFilePathPair(VMContext, Filename, Directory) }; return DIFile(MDNode::get(VMContext, Elts)); } @@ -148,9 +150,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) { // ,size, alignment, offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -171,9 +173,9 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, // offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), + NULL, // File/directory name NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -190,9 +192,9 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, StringRef()), // Empty name. - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -210,9 +212,9 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -227,9 +229,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), + NULL, // Filename NULL, //TheCU, NULL, - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -248,9 +250,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), + NULL, // Filename NULL, // TheCU, NULL, // Name - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -268,9 +270,9 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -288,9 +290,9 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { assert(FriendTy.Verify() && "Invalid friend type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_friend), + NULL, Ty, NULL, // Name - Ty.getFile(), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -309,9 +311,9 @@ DIDerivedType DIBuilder::createInheritance( // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), + NULL, Ty, NULL, // Name - Ty.getFile(), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -330,9 +332,9 @@ DIDerivedType DIBuilder::createMemberType( // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -353,9 +355,9 @@ DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name, Flags |= DIDescriptor::FlagStaticMember; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/), ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/), @@ -379,9 +381,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(File), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -406,9 +408,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(File), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -492,9 +494,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -524,9 +526,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -551,9 +553,9 @@ DICompositeType DIBuilder::createUnionType( // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -574,8 +576,8 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), Constant::getNullValue(Type::getInt32Ty(VMContext)), - MDString::get(VMContext, ""), Constant::getNullValue(Type::getInt32Ty(VMContext)), + MDString::get(VMContext, ""), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -598,9 +600,9 @@ DICompositeType DIBuilder::createEnumerationType( // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -622,9 +624,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + NULL, // Filename/Directory, NULL, //TheCU, MDString::get(VMContext, ""), - NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -645,9 +647,9 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, // A vector is an array type with the FlagVector flag applied. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + NULL, // Filename/Directory, NULL, //TheCU, MDString::get(VMContext, ""), - NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -725,29 +727,6 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { return DIDescriptor(MDNode::get(VMContext, Elts)); } -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType() { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType(DIFile F) { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(VMContext, DW_TAG_base_type), - TheCU, - NULL, - F - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - /// createForwardDecl - Create a temporary forward-declared type that /// can be RAUW'd if the full type is seen. DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, @@ -758,9 +737,9 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, // Create a temporary MDNode. Value *Elts[] = { GetTagConstant(VMContext, Tag), + F.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - F, ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -798,17 +777,18 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { return DISubrange(MDNode::get(VMContext, Elts)); } -/// createGlobalVariable - Create a new descriptor for the specified global. +/// \brief Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, Value *Val) { +createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F, + unsigned LineNumber, DIType Ty, bool isLocalToUnit, + Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), NULL, // TheCU, MDString::get(VMContext, Name), MDString::get(VMContext, Name), - MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), Ty, @@ -822,6 +802,14 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, return DIGlobalVariable(Node); } +/// \brief Create a new descriptor for the specified global. +DIGlobalVariable DIBuilder:: +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, Value *Val) { + return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit, + Val); +} + /// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: @@ -920,12 +908,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), @@ -946,7 +933,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, // Create a named metadata so that we do not lose this mdnode. if (isDefinition) AllSubprograms.push_back(Node); - return DISubprogram(Node); + DISubprogram S(Node); + assert(S.Verify() && "createFunction should return a valid DISubprogram"); + return S; } /// createMethod - Create a new descriptor for the specified C++ method. @@ -966,12 +955,11 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + F.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), - F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), @@ -991,7 +979,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, MDNode *Node = MDNode::get(VMContext, Elts); if (isDefinition) AllSubprograms.push_back(Node); - return DISubprogram(Node); + DISubprogram S(Node); + assert(S.Verify() && "createMethod should return a valid DISubprogram"); + return S; } /// createNameSpace - This creates new descriptor for a namespace @@ -1000,9 +990,9 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; DINameSpace R(MDNode::get(VMContext, Elts)); @@ -1033,10 +1023,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + File, getNonCompileUnitScope(Scope), ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt32Ty(VMContext), Col), - File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; DILexicalBlock R(MDNode::get(VMContext, Elts)); diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index f09de3a731..9596674912 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -438,6 +438,12 @@ DataLayout::~DataLayout() { delete static_cast<StructLayoutMap*>(LayoutMap); } +bool DataLayout::doFinalization(Module &M) { + delete static_cast<StructLayoutMap*>(LayoutMap); + LayoutMap = 0; + return false; +} + const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); @@ -504,47 +510,6 @@ std::string DataLayout::getStringRepresentation() const { } -uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); - switch (Ty->getTypeID()) { - case Type::LabelTyID: - return getPointerSizeInBits(0); - case Type::PointerTyID: { - unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace(); - return getPointerSizeInBits(AS); - } - case Type::ArrayTyID: { - ArrayType *ATy = cast<ArrayType>(Ty); - return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); - } - case Type::StructTyID: - // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); - case Type::IntegerTyID: - return cast<IntegerType>(Ty)->getBitWidth(); - case Type::HalfTyID: - return 16; - case Type::FloatTyID: - return 32; - case Type::DoubleTyID: - case Type::X86_MMXTyID: - return 64; - case Type::PPC_FP128TyID: - case Type::FP128TyID: - return 128; - // In memory objects this is always aligned to a higher boundary, but - // only 80 bits contain information. - case Type::X86_FP80TyID: - return 80; - case Type::VectorTyID: { - VectorType *VTy = cast<VectorType>(Ty); - return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType()); - } - default: - llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); - } -} - /*! \param abi_or_pref Flag that determines which alignment is returned. true returns the ABI alignment, false returns the preferred alignment. diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index e85d4adf77..91968d8fbf 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -66,18 +66,28 @@ bool DIDescriptor::Verify() const { DITemplateValueParameter(DbgNode).Verify()); } -StringRef -DIDescriptor::getStringField(unsigned Elt) const { - if (DbgNode == 0) - return StringRef(); +static Value *getField(const MDNode *DbgNode, unsigned Elt) { + if (DbgNode == 0 || Elt >= DbgNode->getNumOperands()) + return 0; + return DbgNode->getOperand(Elt); +} - if (Elt < DbgNode->getNumOperands()) - if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt))) - return MDS->getString(); +static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { + if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt))) + return R; + return 0; +} +static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) { + if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt))) + return MDS->getString(); return StringRef(); } +StringRef DIDescriptor::getStringField(unsigned Elt) const { + return ::getStringField(DbgNode, Elt); +} + uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { if (DbgNode == 0) return 0; @@ -407,7 +417,7 @@ bool DICompileUnit::Verify() const { if (N.empty()) return false; // It is possible that directory and produce string is empty. - return DbgNode->getNumOperands() == 15; + return DbgNode->getNumOperands() == 12; } /// Verify - Verify that an ObjC property is well formed. @@ -475,7 +485,7 @@ bool DISubprogram::Verify() const { DICompositeType Ty = getType(); if (!Ty.Verify()) return false; - return DbgNode->getNumOperands() == 21; + return DbgNode->getNumOperands() == 20; } /// Verify - Verify that a global variable descriptor is well formed. @@ -529,9 +539,14 @@ bool DINameSpace::Verify() const { return DbgNode->getNumOperands() == 5; } +/// \brief Retrieve the MDNode for the directory/file pair. +MDNode *DIFile::getFileNode() const { + return const_cast<MDNode*>(getNodeField(DbgNode, 1)); +} + /// \brief Verify that the file descriptor is well formed. bool DIFile::Verify() const { - return isFile() && DbgNode->getNumOperands() == 4; + return isFile() && DbgNode->getNumOperands() == 2; } /// \brief Verify that the enumerator descriptor is well formed. @@ -627,21 +642,21 @@ bool DISubprogram::describes(const Function *F) { unsigned DISubprogram::isOptimized() const { assert (DbgNode && "Invalid subprogram descriptor!"); - if (DbgNode->getNumOperands() == 16) - return getUnsignedField(15); + if (DbgNode->getNumOperands() == 15) + return getUnsignedField(14); return 0; } MDNode *DISubprogram::getVariablesNodes() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) + if (!DbgNode || DbgNode->getNumOperands() <= 18) return NULL; - return dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)); + return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)); } DIArray DISubprogram::getVariables() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) + if (!DbgNode || DbgNode->getNumOperands() <= 18) return DIArray(); - if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18))) return DIArray(T); return DIArray(); } @@ -653,17 +668,7 @@ StringRef DIScope::getFilename() const { return DILexicalBlockFile(DbgNode).getFilename(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getFilename(); - if (isSubprogram()) - return DISubprogram(DbgNode).getFilename(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getFilename(); - if (isNameSpace()) - return DINameSpace(DbgNode).getFilename(); - if (isType()) - return DIType(DbgNode).getFilename(); - if (isFile()) - return DIFile(DbgNode).getFilename(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 0); } StringRef DIScope::getDirectory() const { @@ -673,52 +678,42 @@ StringRef DIScope::getDirectory() const { return DILexicalBlockFile(DbgNode).getDirectory(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getDirectory(); - if (isSubprogram()) - return DISubprogram(DbgNode).getDirectory(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getDirectory(); - if (isNameSpace()) - return DINameSpace(DbgNode).getDirectory(); - if (isType()) - return DIType(DbgNode).getDirectory(); - if (isFile()) - return DIFile(DbgNode).getDirectory(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 1); } DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) return DIArray(N); return DIArray(); } @@ -1026,6 +1021,8 @@ void DIDescriptor::print(raw_ostream &OS) const { DIVariable(DbgNode).printInternal(OS); } else if (this->isObjCProperty()) { DIObjCProperty(DbgNode).printInternal(OS); + } else if (this->isNameSpace()) { + DINameSpace(DbgNode).printInternal(OS); } else if (this->isScope()) { DIScope(DbgNode).printInternal(OS); } @@ -1099,6 +1096,14 @@ void DICompositeType::printInternal(raw_ostream &OS) const { OS << " [" << A.getNumElements() << " elements]"; } +void DINameSpace::printInternal(raw_ostream &OS) const { + StringRef Name = getName(); + if (!Name.empty()) + OS << " [" << Name << ']'; + + OS << " [line " << getLineNumber() << ']'; +} + void DISubprogram::printInternal(raw_ostream &OS) const { // TODO : Print context OS << " [line " << getLineNumber() << ']'; diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 5559a6c56e..1e72b90a13 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -211,7 +211,7 @@ Function::~Function() { clearGC(); // Remove the intrinsicID from the Cache. - if(getValueName() && isIntrinsic()) + if (getValueName() && isIntrinsic()) getContext().pImpl->IntrinsicIDCache.erase(this); } @@ -352,7 +352,7 @@ unsigned Function::getIntrinsicID() const { LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache = getContext().pImpl->IntrinsicIDCache; - if(!IntrinsicIDCache.count(this)) { + if (!IntrinsicIDCache.count(this)) { unsigned Id = lookupIntrinsicID(); IntrinsicIDCache[this]=Id; return Id; diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 2e3a525826..841cc5926a 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -256,6 +256,13 @@ void LandingPadInst::addClause(Value *Val) { OperandList[OpNo] = Val; } +bool LandingPadInst::hasCatchAll() const { + for (unsigned I = 0, E = getNumClauses(); I != E; ++I) + if (isCatch(I) && isa<ConstantPointerNull>(getClause(I))) + return true; + return false; +} + //===----------------------------------------------------------------------===// // CallInst Implementation //===----------------------------------------------------------------------===// @@ -3000,8 +3007,8 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { uint32_t BitWidth = C.getBitWidth(); switch (pred) { default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!"); - case ICmpInst::ICMP_EQ: Upper++; break; - case ICmpInst::ICMP_NE: Lower++; break; + case ICmpInst::ICMP_EQ: ++Upper; break; + case ICmpInst::ICMP_NE: ++Lower; break; case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); // Check for an empty-set condition. @@ -3015,25 +3022,25 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_UGT: - Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) + ++Lower; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) // Check for an empty-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_SGT: - Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) + ++Lower; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) // Check for an empty-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_ULE: - Lower = APInt::getMinValue(BitWidth); Upper++; + Lower = APInt::getMinValue(BitWidth); ++Upper; // Check for a full-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/true); break; case ICmpInst::ICMP_SLE: - Lower = APInt::getSignedMinValue(BitWidth); Upper++; + Lower = APInt::getSignedMinValue(BitWidth); ++Upper; // Check for a full-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/true); diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 0acbcfadaf..74cbdadd61 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index c8ea8ff0a9..74d24f278b 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -15,7 +15,6 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" using namespace llvm; @@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname, LLVMContext& C, unsigned flags): Context(C), Composite(new Module(modname, C)), - LibPaths(), Flags(flags), Error(), ProgramName(progname) { } @@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname, Linker::Linker(StringRef progname, Module* aModule, unsigned flags) : Context(aModule->getContext()), Composite(aModule), - LibPaths(), Flags(flags), Error(), ProgramName(progname) { } @@ -63,27 +60,9 @@ Linker::verbose(StringRef message) { errs() << " " << message << "\n"; } -void -Linker::addPath(const sys::Path& path) { - LibPaths.push_back(path); -} - -void -Linker::addPaths(const std::vector<std::string>& paths) { - for (unsigned i = 0, e = paths.size(); i != e; ++i) - LibPaths.push_back(sys::Path(paths[i])); -} - -void -Linker::addSystemPaths() { - sys::Path::GetBitcodeLibraryPaths(LibPaths); - LibPaths.insert(LibPaths.begin(),sys::Path("./")); -} - Module* Linker::releaseModule() { Module* result = Composite; - LibPaths.clear(); Error.clear(); Composite = 0; Flags = 0; diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 26d378e6c0..9adcc02b71 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -304,8 +304,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked // to not be less than one. This needs to be change to be not less than zero. - std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; - std::vector<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID]; + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; + SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID]; // Make space for this FileNumber in the MCDwarfFiles vector if needed. if (FileNumber >= MCDwarfFiles.size()) { MCDwarfFiles.resize(FileNumber + 1); @@ -366,7 +366,7 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it /// currently is assigned and false otherwise. bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { - std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size()) return false; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index d3fa906a06..4766b37476 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, // Get the assembler info needed to setup the MCContext. const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); - assert(MAI && "Unable to create target asm info!"); + if (!MAI) + return 0; const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); - assert(MII && "Unable to create target instruction info!"); + if (!MII) + return 0; const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - assert(MRI && "Unable to create target register info!"); + if (!MRI) + return 0; // Package up features to be passed to target/subtarget std::string FeaturesStr; const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU, FeaturesStr); - assert(STI && "Unable to create subtarget info!"); + if (!STI) + return 0; // Set up the MCContext for creating symbols and MCExpr's. MCContext *Ctx = new MCContext(*MAI, *MRI, 0); - assert(Ctx && "Unable to create MCContext!"); + if (!Ctx) + return 0; // Set up disassembler. MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); - assert(DisAsm && "Unable to create disassembler!"); + if (!DisAsm) + return 0; DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, *MAI, *MII, *MRI, *STI); - assert(IP && "Unable to create instruction printer!"); + if (!IP) + return 0; LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP); - assert(DC && "Allocation failure!"); + if (!DC) + return 0; return DC; } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index c81abe952a..0f8f0741bd 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -298,7 +298,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { // Put out the directory and file tables. // First the directory table. - const std::vector<StringRef> &MCDwarfDirs = + const SmallVectorImpl<StringRef> &MCDwarfDirs = context.getMCDwarfDirs(CUID); for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName @@ -307,7 +307,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { MCOS->EmitIntValue(0, 1); // Terminate the directory list // Second the file table. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(CUID); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName @@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // AT_name, the name of the source file. Reconstruct from the first directory // and file table entries. - const std::vector<StringRef> &MCDwarfDirs = + const SmallVectorImpl<StringRef> &MCDwarfDirs = context.getMCDwarfDirs(); if (MCDwarfDirs.size() > 0) { MCOS->EmitBytes(MCDwarfDirs[0]); MCOS->EmitBytes("/"); } - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); MCOS->EmitBytes(MCDwarfFiles[1]->getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 2e1a045569..bafa002e3d 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -223,6 +223,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { } void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { + // FIXME: Check this. Mips64el is using the base values, which is most likely + // incorrect. + if (T.getArch() != Triple::mips64el) + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (T.getArch() == Triple::x86) { PersonalityEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 @@ -230,15 +235,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { LSDAEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; - FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_) + FDEEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; TTypeEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; } else if (T.getArch() == Triple::x86_64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) @@ -261,8 +264,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } } else if (T.getArch() == Triple::aarch64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -282,7 +283,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { } else if (T.getArch() == Triple::ppc64) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6ab49ec92c..804734cea9 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { return TokError("unmatched .ifs or .elses"); // Check to see there are no empty DwarfFile slots. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { if (!MCDwarfFiles[i]) @@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // If we previously parsed a cpp hash file line comment then make sure the // current Dwarf File is for the CppHashFilename if not then emit the // Dwarf File table for it and adjust the line number for the .loc. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); if (CppHashFilename.size() != 0) { if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != @@ -4105,12 +4105,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, MCParsedAsmOperand *Operand = Info.ParsedOperands[i]; // Immediate. - if (Operand->isImm()) { - if (Operand->needAsmRewrite()) - AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix, - Operand->getStartLoc())); + if (Operand->isImm()) continue; - } // Register operand. if (Operand->isReg() && !Operand->needAddressOf()) { @@ -4131,11 +4127,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, continue; bool isOutput = (i == 1) && Desc.mayStore(); - if (Operand->isMem() && Operand->needSizeDirective()) - AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, - Operand->getStartLoc(), /*Len*/0, - Operand->getMemSize())); - if (isOutput) { ++InputIdx; OutputDecls.push_back(OpDecl); @@ -4184,28 +4175,31 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Build the IR assembly string. std::string AsmStringIR; - AsmRewriteKind PrevKind = AOK_Imm; raw_string_ostream OS(AsmStringIR); - const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); - assert(Loc >= Start && "Expected Loc to be after Start!"); + assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); unsigned AdditionalSkip = 0; AsmRewriteKind Kind = (*I).Kind; - // Emit everything up to the immediate/expression. If the previous rewrite - // was a size directive, then this has already been done. - if (PrevKind != AOK_SizeDirective) - OS << StringRef(Start, Loc - Start); - PrevKind = Kind; + // Emit everything up to the immediate/expression. + unsigned Len = Loc - AsmStart; + if (Len) { + // For Input/Output operands we need to remove the brackets, if present. + if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[') + --Len; + OS << StringRef(AsmStart, Len); + } // Skip the original expression. if (Kind == AOK_Skip) { - Start = Loc + (*I).Len; + AsmStart = Loc + (*I).Len; continue; } @@ -4254,14 +4248,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, } // Skip the original expression. - if (Kind != AOK_SizeDirective) - Start = Loc + (*I).Len + AdditionalSkip; + AsmStart = Loc + (*I).Len + AdditionalSkip; + + // For Input/Output operands we need to remove the brackets, if present. + if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd && + *AsmStart == ']') + ++AsmStart; } // Emit the remainder of the asm string. - const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); - if (Start != AsmEnd) - OS << StringRef(Start, AsmEnd - Start); + if (AsmStart != AsmEnd) + OS << StringRef(AsmStart, AsmEnd - AsmStart); AsmString = OS.str(); return false; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 51ef415542..d02e5535bd 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -24,7 +24,7 @@ using namespace llvm; MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.push_back(std::make_pair(section, section)); } @@ -40,7 +40,7 @@ void MCStreamer::reset() { EmitDebugFrame = false; CurrentW64UnwindInfo = 0; LastSymbol = 0; - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.clear(); SectionStack.push_back(std::make_pair(section, section)); } @@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() { if (FrameInfos.empty()) - return NULL; + return 0; return &FrameInfos.back(); } @@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); - MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset); + MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -623,5 +623,5 @@ void MCStreamer::Finish() { MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) { report_fatal_error("Not supported!"); - return *(static_cast<MCSymbolData*> (NULL)); + return *(static_cast<MCSymbolData*>(0)); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 5b68fbb270..6182e34150 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3311,10 +3311,8 @@ namespace { significand = significand.udiv(divisor); - // Truncate the significand down to its active bit count, but - // don't try to drop below 32. - unsigned newPrecision = std::max(32U, significand.getActiveBits()); - significand = significand.trunc(newPrecision); + // Truncate the significand down to its active bit count. + significand = significand.trunc(significand.getActiveBits()); } diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 07cb057b48..e8534753b4 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const { if (lhsNeg) { // Sign bit is set so perform two's complement to make it positive lhs.flipAllBits(); - lhs++; + ++lhs; } if (rhsNeg) { // Sign bit is set so perform two's complement to make it positive rhs.flipAllBits(); - rhs++; + ++rhs; } // Now we have unsigned values to compare so do the comparison if necessary @@ -2116,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { } // If its negative, put it in two's complement form if (isNeg) { - (*this)--; + --(*this); this->flipAllBits(); } } @@ -2197,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, // Flip the bits and add one to turn it into the equivalent positive // value and put a '-' in the result. Tmp.flipAllBits(); - Tmp++; + ++Tmp; Str.push_back('-'); } diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index cd430f218b..1ee69b6023 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (EC) return EC; - OwningPtr<mapped_file_region> MappedFile( - new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC)); + OwningPtr<mapped_file_region> MappedFile(new mapped_file_region( + FD, true, mapped_file_region::readwrite, Size, 0, EC)); if (EC) return EC; diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 4c558b37cf..8042237253 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -72,13 +72,15 @@ static void CopyStringRef(char *Memory, StringRef Data) { Memory[Data.size()] = 0; // Null terminate string. } -/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. -template <typename T> -static T *GetNamedBuffer(StringRef Buffer, StringRef Name, - bool RequiresNullTerminator) { - char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1)); - CopyStringRef(Mem + sizeof(T), Name); - return new (Mem) T(Buffer, RequiresNullTerminator); +struct NamedBufferAlloc { + StringRef Name; + NamedBufferAlloc(StringRef Name) : Name(Name) {} +}; + +void *operator new(size_t N, const NamedBufferAlloc &Alloc) { + char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1)); + CopyStringRef(Mem + N, Alloc.Name); + return Mem; } namespace { @@ -105,8 +107,8 @@ public: MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, bool RequiresNullTerminator) { - return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName, - RequiresNullTerminator); + return new (NamedBufferAlloc(BufferName)) + MemoryBufferMem(InputData, RequiresNullTerminator); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, @@ -183,24 +185,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename, //===----------------------------------------------------------------------===// namespace { -/// MemoryBufferMMapFile - This represents a file that was mapped in with the -/// sys::Path::MapInFilePages method. When destroyed, it calls the -/// sys::Path::UnMapFilePages method. -class MemoryBufferMMapFile : public MemoryBufferMem { -public: - MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator) - : MemoryBufferMem(Buffer, RequiresNullTerminator) { } +/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region. +/// +/// This handles converting the offset into a legal offset on the platform. +class MemoryBufferMMapFile : public MemoryBuffer { + sys::fs::mapped_file_region MFR; + + static uint64_t getLegalMapOffset(uint64_t Offset) { + return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); + } + + static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { + return Len + (Offset - getLegalMapOffset(Offset)); + } - ~MemoryBufferMMapFile() { - static int PageSize = sys::process::get_self()->page_size(); + const char *getStart(uint64_t Len, uint64_t Offset) { + return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); + } - uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart()); - size_t Size = getBufferSize(); - uintptr_t RealStart = Start & ~(PageSize - 1); - size_t RealSize = Size + (Start - RealStart); +public: + MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len, + uint64_t Offset, error_code EC) + : MFR(FD, false, sys::fs::mapped_file_region::readonly, + getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) { + if (!EC) { + const char *Start = getStart(Len, Offset); + init(Start, Start + Len, RequiresNullTerminator); + } + } - sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart), - RealSize); + virtual const char *getBufferIdentifier() const LLVM_OVERRIDE { + // The name is stored after the class itself. + return reinterpret_cast<const char *>(this + 1); } virtual BufferKind getBufferKind() const LLVM_OVERRIDE { @@ -344,17 +360,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize)) { - off_t RealMapOffset = Offset & ~(PageSize - 1); - off_t Delta = Offset - RealMapOffset; - size_t RealMapSize = MapSize + Delta; - - if (const char *Pages = sys::Path::MapInFilePages(FD, - RealMapSize, - RealMapOffset)) { - result.reset(GetNamedBuffer<MemoryBufferMMapFile>( - StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); + error_code EC; + result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( + RequiresNullTerminator, FD, MapSize, Offset, EC)); + if (!EC) return error_code::success(); - } } MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 41add96194..58a6ea720e 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -18,6 +18,9 @@ #include <cctype> #include <cstdio> #include <cstring> +#ifdef __APPLE__ +#include <unistd.h> +#endif namespace { using llvm::StringRef; @@ -493,6 +496,27 @@ bool is_separator(char value) { void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { result.clear(); +#ifdef __APPLE__ + // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. + int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR + : _CS_DARWIN_USER_CACHE_DIR; + size_t ConfLen = confstr(ConfName, 0, 0); + if (ConfLen > 0) { + do { + result.resize(ConfLen); + ConfLen = confstr(ConfName, result.data(), result.size()); + } while (ConfLen > 0 && ConfLen != result.size()); + + if (ConfLen > 0) { + assert(result.back() == 0); + result.pop_back(); + return; + } + + result.clear(); + } +#endif + // Check whether the temporary directory is specified by an environment // variable. const char *EnvironmentVariable; diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index e00394ec6a..e9b26bdb80 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr, __clear_cache(const_cast<char *>(Start), const_cast<char *>(End)); # elif defined(__mips__) const char *Start = static_cast<const char *>(Addr); +# if defined(ANDROID) + // The declaration of "cacheflush" in Android bionic: + // extern int cacheflush(long start, long end, long flags); + const char *End = Start + Len; + long LStart = reinterpret_cast<long>(const_cast<char *>(Start)); + long LEnd = reinterpret_cast<long>(const_cast<char *>(End)); + cacheflush(LStart, LEnd, BCACHE); +# else cacheflush(const_cast<char *>(Start), Len, BCACHE); +# endif # endif #endif // end apple diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 44b31b3202..a3dfd4b0a3 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -475,12 +475,14 @@ rety_open_create: return error_code::success(); } -error_code mapped_file_region::init(int fd, uint64_t offset) { - AutoFD FD(fd); +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { + AutoFD ScopedFD(FD); + if (!CloseFD) + ScopedFD.take(); // Figure out how large the file is. struct stat FileInfo; - if (fstat(fd, &FileInfo) == -1) + if (fstat(FD, &FileInfo) == -1) return error_code(errno, system_category()); uint64_t FileSize = FileInfo.st_size; @@ -488,7 +490,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { Size = FileSize; else if (FileSize < Size) { // We need to grow the file. - if (ftruncate(fd, Size) == -1) + if (ftruncate(FD, Size) == -1) return error_code(errno, system_category()); } @@ -497,7 +499,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { #ifdef MAP_FILE flags |= MAP_FILE; #endif - Mapping = ::mmap(0, Size, prot, flags, fd, offset); + Mapping = ::mmap(0, Size, prot, flags, FD, Offset); if (Mapping == MAP_FAILED) return error_code(errno, system_category()); return error_code::success(); @@ -526,12 +528,13 @@ mapped_file_region::mapped_file_region(const Twine &path, return; } - ec = init(ofd, offset); + ec = init(ofd, true, offset); if (ec) Mapping = 0; } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -545,7 +548,7 @@ mapped_file_region::mapped_file_region(int fd, return; } - ec = init(fd, offset); + ec = init(fd, closefd, offset); if (ec) Mapping = 0; } diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 2e6cc96e7f..23f3d14f91 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -593,6 +593,10 @@ retry_random_path: random_path_utf16.push_back(0); random_path_utf16.pop_back(); + // Make sure we don't fall into an infinite loop by constantly trying + // to create the parent path. + bool TriedToCreateParent = false; + // Try to create + open the path. retry_create_file: HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(), @@ -610,7 +614,9 @@ retry_create_file: if (ec == windows_error::file_exists) goto retry_random_path; // Check for non-existing parent directories. - if (ec == windows_error::path_not_found) { + if (ec == windows_error::path_not_found && !TriedToCreateParent) { + TriedToCreateParent = true; + // Create the directories using result_path as temp storage. if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), random_path_utf16.size(), result_path)) @@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } -error_code mapped_file_region::init(int FD, uint64_t Offset) { +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { FileDescriptor = FD; // Make sure that the requested size fits within SIZE_T. if (Size > std::numeric_limits<SIZE_T>::max()) { - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return make_error_code(errc::invalid_argument); } @@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { 0); if (FileMappingHandle == NULL) { error_code ec = windows_error(GetLastError()); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { if (Mapping == NULL) { error_code ec = windows_error(GetLastError()); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { error_code ec = windows_error(GetLastError()); ::UnmapViewOfFile(Mapping); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } Size = mbi.RegionSize; } + + // Close all the handles except for the view. It will keep the other handles + // alive. + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); // Also closes FileHandle. + } else + ::CloseHandle(FileHandle); return error_code::success(); } @@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } FileDescriptor = 0; - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, true, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd, , FileMappingHandle() { FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); if (FileHandle == INVALID_HANDLE_VALUE) { - _close(FileDescriptor); + if (closefd) + _close(FileDescriptor); FileDescriptor = 0; ec = make_error_code(errc::bad_file_descriptor); return; } - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, closefd, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd, mapped_file_region::~mapped_file_region() { if (Mapping) ::UnmapViewOfFile(Mapping); - if (FileMappingHandle) - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else if (FileHandle != INVALID_HANDLE_VALUE) - ::CloseHandle(FileHandle); } #if LLVM_HAS_RVALUE_REFERENCES diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index f71abd3b24..da26a371a7 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { size_t BytesToWrite = Size - (Size % NumBytes); write_impl(Ptr, BytesToWrite); - copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); + size_t BytesRemaining = Size - BytesToWrite; + if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) { + // Too much left over to copy into our buffer. + return write(Ptr + BytesToWrite, BytesRemaining); + } + copy_to_buffer(Ptr + BytesToWrite, BytesRemaining); return *this; } diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index ec84a72454..928b1203cd 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -20,9 +20,15 @@ namespace llvm { SourceMgr SrcMgr; +unsigned ErrorsPrinted = 0; static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind, const Twine &Msg) { + // Count the total number of errors printed. + // This is used to exit with an error code if there were any errors. + if (Kind == SourceMgr::DK_Error) + ++ErrorsPrinted; + SMLoc NullLoc; if (Loc.empty()) Loc = NullLoc; diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index e1cd623783..dc4167b305 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) { if (MainFn(Out.os(), Records)) return 1; + if (ErrorsPrinted > 0) { + errs() << argv0 << ": " << ErrorsPrinted << " errors.\n"; + return 1; + } + // Declare success. Out.keep(); return 0; - - return 1; } } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12e16..572617cc07 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695dacb4..69bb80a485 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000000..f0d4dbe2bf --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 5faf8c320c..80e5f37eb0 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 46915eecf6..68380847a0 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -143,14 +143,12 @@ include "ARMSchedule.td" // ARM processor families. def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c779830e..13ec208793 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) - .addImm(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed001ea24a..ed8b9cd9a1 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0ca6450e2b..3b12408768 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 514971f01e..40d2e8d265 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + // Custom expand long extensions to vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -3433,6 +3443,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), +/// and size(DestVec) > 128-bits. +/// This is achieved by doing the one extension from the SrcVec, splitting the +/// result, extending these parts, and then concatenating these into the +/// destination. +static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DestVT = N->getValueType(0); + + assert(DestVT.getSizeInBits() > 128 && + "Custom sext/zext expansion needs >128-bit vector."); + // If this is a normal length extension, use the default expansion. + if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && + SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) + return SDValue(); + + DebugLoc dl = N->getDebugLoc(); + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); + LLVMContext &Ctx = *DAG.getContext(); + SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; + + EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts); + EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts/2); + EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), + NumElts/2); + + Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); + SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(0)); + SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(NumElts/2)); + ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); + ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -5621,6 +5672,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = ExpandVectorExtension(N, DAG); + break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e11314d4fc..739300e4ef 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) + const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) + , Options(Options) , TargetABI(ARM_ABI_APCS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); @@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + UnsafeFPMath = false; } void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { @@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // configuration. if (!StrictAlign && hasV6Ops() && isTargetDarwin()) AllowsUnalignedMem = true; + + // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. + uint64_t Bits = getFeatureBits(); + if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + (Options.UnsafeFPMath || isTargetDarwin())) + UseNEONForSinglePrecisionFP = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8ce22e1de2..5b5ee6aeb8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,6 +26,7 @@ namespace llvm { class GlobalValue; class StringRef; +class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: @@ -159,6 +160,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -175,6 +179,9 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// Options passed via command line that could influence the target + const TargetOptions &Options; + public: enum { isELF, isDarwin @@ -189,7 +196,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 774521852a..b0f9e56db7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), + Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI @@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } @@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } - if (getARMSubtarget().hasNEON()) + if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) && + getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 01c04b48cf..1019b972e9 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + // Operations that we legalize using load/stores to the stack. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { @@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return NEONFloatConversionTbl[Idx].Cost; } - // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, @@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } @@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6c678fdbd7..ed7b7ec9d2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!tryParseRegisterWithWriteBack(Operands)) - return false; - int Res = tryParseShiftRegister(Operands); - if (Res == 0) // success - return false; - else if (Res == -1) // irrecoverable error - return true; - // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && - Parser.getTok().getString().equals_lower("apsr_nzcv")) { - S = Parser.getTok().getLoc(); - Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); - return false; + // If we've seen a branch mnemonic, the next operand must be a label. This + // is true even if the label is a register name. So "br r1" means branch to + // label "r1". + bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl"; + if (!ExpectLabel) { + if (!tryParseRegisterWithWriteBack(Operands)) + return false; + int Res = tryParseShiftRegister(Operands); + if (Res == 0) // success + return false; + else if (Res == -1) // irrecoverable error + return true; + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); + return false; + } } // Fall though for the Identifier case that is not a register or a diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 586834cf73..b832508a08 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 4c0f93c6cd..96a252e1b8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1949,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const { // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. switch(Opcode) { @@ -1958,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: - assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -1968,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STrid: case Hexagon::STrid_indexed: case Hexagon::STrid_f: - assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2002,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMw_SUBr_MEM_V4 : case Hexagon::MEMw_ANDr_MEM_V4 : case Hexagon::MEMw_ORr_MEM_V4 : - assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); return (0 <= Offset && Offset <= 255); case Hexagon::MEMh_ADDi_indexed_MEM_V4 : @@ -2017,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMh_SUBr_MEM_V4 : case Hexagon::MEMh_ANDr_MEM_V4 : case Hexagon::MEMh_ORr_MEM_V4 : - assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); return (0 <= Offset && Offset <= 127); case Hexagon::MEMb_ADDi_indexed_MEM_V4 : diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index ad495ff306..dda6e247ac 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + }; + + struct FslImmOp { + const MCExpr *Val; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - unsigned OffReg; - const MCExpr *Off; - } Mem; - - struct { - const MCExpr *Val; - } FslImm; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + struct FslImmOp FslImm; }; MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index d0fd7dcec1..bd83afc1cc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MBlazeRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99a2fac95c..497f3866c9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ae2e55617d..e504011dfd 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void -MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index a077dd7351..c673f59b5e 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -50,7 +50,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ade6084752..c403f216b0 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterClass); + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCInst> &Instructions); void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); @@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseSetAssignment(); + bool parseDirectiveWord(unsigned Size, SMLoc L); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); @@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); int getATReg(); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -211,25 +222,30 @@ private: MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + const MCExpr *Off; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - RegisterKind Kind; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - const MCExpr *Off; - } Mem; + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; SMLoc StartLoc, EndLoc; @@ -385,6 +401,56 @@ public: }; } +namespace llvm { +extern const MCInstrDesc MipsInsts[]; +} +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return MipsInsts[Opcode]; +} + +bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + Inst.setLoc(IDLoc); + if (MCID.mayLoad() || MCID.mayStore()) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions + for (unsigned i=0;i<MCID.getNumOperands();i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + // Offset can't exceed 16bit value + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true); + return false; + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef){ + const MCSymbolRefExpr *SR = + static_cast<const MCSymbolRefExpr*>(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + // Expand symbol + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false); + return false; + } + } + } + } + } + } + + if (needsExpansion(Inst)) + expandInstruction(Inst, IDLoc, Instructions); + else + Instructions.push_back(Inst); + + return false; +} + bool MipsAsmParser::needsExpansion(MCInst &Inst) { switch(Inst.getOpcode()) { @@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } } +void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd) { + const MCSymbolRefExpr *SR; + MCInst TempInst; + unsigned ImmOffset,HiOffset,LoOffset; + const MCExpr *ExprOffset; + unsigned TmpRegNum; + unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID, + getATReg()); + // 1st operand is either source or dst register + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + unsigned RegOpNum = Inst.getOperand(0).getReg(); + // 2nd operand is base register + assert(Inst.getOperand(1).isReg() && "expected register operand kind"); + unsigned BaseRegNum = Inst.getOperand(1).getReg(); + // 3rd operand is either immediate or expression + if (isImmOpnd) { + assert(Inst.getOperand(2).isImm() && "expected immediate operand kind"); + ImmOffset = Inst.getOperand(2).getImm(); + LoOffset = ImmOffset & 0x0000ffff; + HiOffset = (ImmOffset & 0xffff0000) >> 16; + // If msb of LoOffset is 1(negative number) we must increment HiOffset + if (LoOffset & 0x8000) + HiOffset++; + } + else + ExprOffset = Inst.getOperand(2).getExpr(); + // All instructions will have the same location + TempInst.setLoc(IDLoc); + // 1st instruction in expansion is LUi. For load instruction we can use + // the dst register as a temporary if base and dst are different, + // but for stores we must use $at + TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum; + TempInst.setOpcode(Mips::LUi); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(HiOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + SR = static_cast<const MCSymbolRefExpr*>(ExprOffset); + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_HI, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); + } + } + // Add the instruction to the list + Instructions.push_back(TempInst); + // and prepare TempInst for next instruction + TempInst.clear(); + // which is add temp register to base + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + // and finaly, create original instruction with low part + // of offset and new base + TempInst.setOpcode(Inst.getOpcode()); + TempInst.addOperand(MCOperand::CreateReg(RegOpNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(LoOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_LO, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); + } + } + Instructions.push_back(TempInst); + TempInst.clear(); +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; + SmallVector<MCInst, 8> Instructions; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: { - if (needsExpansion(Inst)) { - SmallVector<MCInst, 4> Instructions; - expandInstruction(Inst, IDLoc, Instructions); - for(unsigned i =0; i < Instructions.size(); i++){ - Out.EmitInstruction(Instructions[i]); - } - } else { - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); - } + if (processInstruction(Inst,IDLoc,Instructions)) + return true; + for(unsigned i =0; i < Instructions.size(); i++) + Out.EmitInstruction(Instructions[i]); return false; } case Match_MissingFeature: @@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return false; } case AsmToken::Identifier: + // Look for the existing symbol, we should check if + // we need to assigne the propper RegisterKind + if (searchSymbolAlias(Operands,MipsOperand::Kind_None)) + return false; + //else drop to expression parsing case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: @@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { // Check the type of the expression if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { - // it's a constant, evaluate lo or hi value - int Val = MCE->getValue(); + // It's a constant, evaluate lo or hi value if (Str == "lo") { - Val = Val & 0xffff; + short Val = MCE->getValue(); + Res = MCConstantExpr::Create(Val, getContext()); } else if (Str == "hi") { + int Val = MCE->getValue(); int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; - //lower part is treated as signed int, so if it is negative - //we must add 1 to hi part to compensate + // Lower part is treated as a signed int, so if it is negative + // we must add 1 to the hi part to compensate if (LoSign) Val++; + Res = MCConstantExpr::Create(Val, getContext()); } - Res = MCConstantExpr::Create(Val, getContext()); return false; } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { - // it's a symbol, create symbolic expression from symbol + // It's a symbol, create symbolic expression from symbol StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); @@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { switch(getLexer().getKind()) { default: return true; + case AsmToken::Identifier: case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: @@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; } +bool MipsAsmParser:: +searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterKind) { + + MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); + if (Sym) { + SMLoc S = Parser.getTok().getLoc(); + const MCExpr *Expr; + if (Sym->isVariable()) + Expr = Sym->getVariableValue(); + else + return false; + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + // Lookup for the register with corresponding name + int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64()); + if (RegNum > -1) { + Parser.Lex(); + MipsOperand *op = MipsOperand::CreateReg(RegNum,S, + Parser.getTok().getLoc()); + op->setRegKind((MipsOperand::RegisterKind)RegisterKind); + Operands.push_back(op); + return true; + } + } + } else if (Expr->getKind() == MCExpr::Constant) { + Parser.Lex(); + const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr); + MipsOperand *op = MipsOperand::CreateImm(Const,S, + Parser.getTok().getLoc()); + Operands.push_back(op); + return true; + } + } + return false; +} MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { } bool MipsAsmParser::parseSetNoAtDirective() { - // line should look like: + // Line should look like: // .set noat // set at reg to 0 Options.setATReg(0); // eat noat Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } else if (getLexer().is(AsmToken::Equal)) { - getParser().Lex(); //eat '=' + getParser().Lex(); // eat '=' if (getLexer().isNot(AsmToken::Dollar)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // eat '$' + Parser.Lex(); // Eat '$' const AsmToken &Reg = Parser.getTok(); if (Reg.is(AsmToken::Identifier)) { AtRegNo = matchCPURegisterName(Reg.getIdentifier()); @@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() { reportParseError("unexpected token in statement"); return false; } - getParser().Lex(); //eat reg + getParser().Lex(); // Eat reg if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); @@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() { bool MipsAsmParser::parseSetReorderDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } + +bool MipsAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + + if (Parser.parseIdentifier(Name)) + reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token in .set directive"); + Lex(); //eat comma + + if (Parser.parseExpression(Value)) + reportParseError("expected valid expression after comma"); + + // check if the Name already exists as a symbol + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + return reportParseError("symbol already defined"); + } + Sym = getContext().GetOrCreateSymbol(Name); + Sym->setVariableValue(Value); + + return false; +} bool MipsAsmParser::parseDirectiveSet() { // get next token @@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() { // ignore this directive for now Parser.eatToEndOfStatement(); return false; + } else { + // it is just an identifier, look for assignment + parseSetAssignment(); + return false; } return true; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 58aa1be34d..cf8bb189e4 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen Mips16FrameLowering.cpp Mips16InstrInfo.cpp + Mips16ISelDAGToDAG.cpp + Mips16ISelLowering.cpp Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp @@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp + MipsSEISelDAGToDAG.cpp + MipsSEISelLowering.cpp MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 96f93a0789..e198a7c983 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions or immediates"); @@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions or an immediate"); diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp new file mode 100644 index 0000000000..00b3449300 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -0,0 +1,308 @@ +//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips16ISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = Mips::Mflo16; + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = Mips::Mfhi16; + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC = + (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); + + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); +} + +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + +void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + initMips16SPAliasReg(MF); +} + +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + +void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} + +bool Mips16DAGToDAGISel::selectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasFPIdx()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::AdduRxRyRz16; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SubuRxRyRz16; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + EVT VT = LHS.getValueType(); + + unsigned Sltu_op = Mips::SltuRxRyRz16; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + unsigned Addu_op = Mips::AdduRxRyRz16; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, + SDValue(Carry,0), RHS); + + SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry,0)); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16); + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + case ISD::MULHS: + case ISD::MULHU: { + MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16); + SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { + return new Mips16DAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h new file mode 100644 index 0000000000..baa85877d9 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -0,0 +1,51 @@ +//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16ISELDAGTODAG_H +#define MIPS16ISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class Mips16DAGToDAGISel : public MipsDAGToDAGISel { +public: + explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, + EVT Ty, bool HasLo, bool HasHi); + + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); + + void initMips16SPAliasReg(MachineFunction &MF); +}; + +FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp new file mode 100644 index 0000000000..23eb5375ac --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -0,0 +1,689 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-lower" +#include "Mips16ISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <set> + +using namespace llvm; + +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); + +namespace { + std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded; +} + +Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // + // set up as if mips32 and then revert so we can test the mechanism + // for switching + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + computeRegisterProperties(); + clearRegisterClasses(); + + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMips16TargetLowering(MipsTargetMachine &TM) { + return new Mips16TargetLowering(TM); +} + +bool +Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + return false; +} + +MachineBasicBlock * +Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::SelBeqZ: + return emitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return emitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return emitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); + } +} + +bool Mips16TargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + // No tail call optimization for mips16. + return false; +} + +void Mips16TargetLowering::setMips16LibcallName + (RTLIB::Libcall L, const char *Name) { + setLibcallName(L, Name); + NoHelperNeeded.insert(Name); +} + +void Mips16TargetLowering::setMips16HardFloatLibCalls() { + setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + + +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* Mips16TargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + +void Mips16TargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + SelectionDAG &DAG = CLI.DAG; + const char* Mips16HelperFunction = 0; + bool NeedMips16Helper = false; + + if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool LookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + LookupHelper = false; + } + } + if (LookupHelper) Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); + + } + + SDValue JumpTarget = Callee; + + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned V0Reg = Mips::V0; + if (NeedMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } else + RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); + } + + Ops.push_back(JumpTarget); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock *Mips16TargetLowering:: +emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock *Mips16TargetLowering::emitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock + *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h new file mode 100644 index 0000000000..b23e2a1f37 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -0,0 +1,80 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef Mips16ISELLOWERING_H +#define Mips16ISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class Mips16TargetLowering : public MipsTargetLowering { + public: + explicit Mips16TargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + void setMips16LibcallName(RTLIB::Libcall, const char *Name); + + void setMips16HardFloatLibCalls(); + + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; +} + +#endif // Mips16ISELLOWERING_H diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index a9e9c52716..629382965b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -15,7 +15,7 @@ // Mips Address // def addr16 : - ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 6dff548505..77b08cb11e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-isel" +#include "MipsISelDAGToDAG.h" +#include "Mips16ISelDAGToDAG.h" +#include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -45,270 +45,11 @@ using namespace llvm; // MipsDAGToDAGISel - MIPS specific code to select MIPS machine // instructions for SelectionDAG operations. //===----------------------------------------------------------------------===// -namespace { - -class MipsDAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to MipsTargetMachine. - MipsTargetMachine &TM; - - /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can - /// make the right decision when generating code for different targets. - const MipsSubtarget &Subtarget; - -public: - explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - - // Pass Name - virtual const char *getPassName() const { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - -private: - // Include the pieces autogenerated from the target description. - #include "MipsGenDAGISel.inc" - - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const MipsTargetMachine &getTargetMachine() { - return static_cast<const MipsTargetMachine &>(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const MipsInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - - SDValue getMips16SPAliasReg(); - - void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); - - std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, - EVT Ty, bool HasLo, bool HasHi); - - SDNode *Select(SDNode *N); - - // Complex Pattern. - /// (reg + imm). - bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Fall back on this function if all else fails. - bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Match integer address pattern. - bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Alias); - - // getImm - Return a target constant with the specified value. - inline SDValue getImm(const SDNode *Node, uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); - } - - void ProcessFunctionAfterISel(MachineFunction &MF); - bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); - void InitGlobalBaseReg(MachineFunction &MF); - void InitMips16SPAliasReg(MachineFunction &MF); - - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); -}; - -} - -// Insert instructions to initialize the global base register in the -// first MBB of the function. When the ABI is O32 and the relocation model is -// PIC, the necessary instructions are emitted later to prevent optimization -// passes from moving them. -void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->globalBaseRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - const TargetRegisterClass *RC; - - if (Subtarget.isABI_N64()) - RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - else if (Subtarget.inMips16Mode()) - RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; - else - RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - V2 = RegInfo.createVirtualRegister(RC); - - if (Subtarget.isABI_N64()) { - MF.getRegInfo().addLiveIn(Mips::T9_64); - MBB.addLiveIn(Mips::T9_64); - - // lui $v0, %hi(%neg(%gp_rel(fname))) - // daddu $v1, $v0, $t9 - // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) - .addReg(Mips::T9_64); - BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - if (Subtarget.inMips16Mode()) { - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); - BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) - .addReg(V1).addReg(V2); - return; - } - - if (MF.getTarget().getRelocationModel() == Reloc::Static) { - // Set global register to __gnu_local_gp. - // - // lui $v0, %hi(__gnu_local_gp) - // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - return; - } - - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - assert(Subtarget.isABI_O32()); - - // For O32 ABI, the following instruction sequence is emitted to initialize - // the global base register: - // - // 0. lui $2, %hi(_gp_disp) - // 1. addiu $2, $2, %lo(_gp_disp) - // 2. addu $globalbasereg, $2, $t9 - // - // We emit only the last instruction here. - // - // GNU linker requires that the first two instructions appear at the beginning - // of a function and no instructions be inserted before or between them. - // The two instructions are emitted during lowering to MC layer in order to - // avoid any reordering. - // - // Register $2 (Mips::V0) is added to the list of live-in registers to ensure - // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) - // reads it. - MF.getRegInfo().addLiveIn(Mips::V0); - MBB.addLiveIn(Mips::V0); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) - .addReg(Mips::V0).addReg(Mips::T9); -} - -// Insert instructions to initialize the Mips16 SP Alias register in the -// first MBB of the function. -// -void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->mips16SPAliasRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); - - BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) - .addReg(Mips::SP); -} - - -bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, - const MachineInstr& MI) { - unsigned DstReg = 0, ZeroReg = 0; - - // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". - if ((MI.getOpcode() == Mips::ADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO; - } else if ((MI.getOpcode() == Mips::DADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO_64) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO_64; - } - - if (!DstReg) - return false; - - // Replace uses with ZeroReg. - for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E;) { - MachineOperand &MO = U.getOperand(); - unsigned OpNo = U.getOperandNo(); - MachineInstr *MI = MO.getParent(); - ++U; - - // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) - continue; - - MO.setReg(ZeroReg); - } - - return true; -} - -void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { - InitGlobalBaseReg(MF); - InitMips16SPAliasReg(MF); - - MachineRegisterInfo *MRI = &MF.getRegInfo(); - - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; - ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - ReplaceUsesWithZeroReg(MRI, *I); -} bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - ProcessFunctionAfterISel(MF); + processFunctionAfterISel(MF); return Ret; } @@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// getMips16SPAliasReg - Output the instructions required to put the -/// SP into a Mips16 accessible aliased register. -SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { - unsigned Mips16SPAliasReg = - MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); -} - /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { - EVT ValTy = Addr.getValueType(); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; - } - - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - } - + llvm_unreachable("Unimplemented function."); return false; } bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); - return true; + llvm_unreachable("Unimplemented function."); + return false; } bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Addr, Base, Offset) || - selectAddrDefault(Addr, Base, Offset); -} - -void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); - if (Parent) { - switch (Parent->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - case ISD::STORE: { - StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - } - } - AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); - return; - -} -bool MipsDAGToDAGISel::SelectAddr16( - SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, - SDValue &Alias) { - EVT ValTy = Addr.getValueType(); - - Alias = CurDAG->getTargetConstant(0, ValTy); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - getMips16SPRefReg(Parent, Alias); - return true; - } - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - getMips16SPRefReg(Parent, Alias); - } - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; + llvm_unreachable("Unimplemented function."); + return false; } -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : - (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : - (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); - } - return std::make_pair(Lo, Hi); +bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) { + llvm_unreachable("Unimplemented function."); + return false; } - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return NULL; } - /// - // Instruction Selection not handled by the auto-generated - // tablegen selection should be handled here. - /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; + // See if subclasses can handle this node. + std::pair<bool, SDNode*> Ret = selectNode(Node); + + if (Ret.first) + return Ret.second; switch(Opcode) { default: break; - case ISD::SUBE: - case ISD::ADDE: { - bool inMips16Mode = Subtarget.inMips16Mode(); - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - if (inMips16Mode) - MOp = Mips::AdduRxRyRz16; - else - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - if (inMips16Mode) - MOp = Mips::SubuRxRyRz16; - else - MOp = Mips::SUBu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - - EVT VT = LHS.getValueType(); - - unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); - unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; - SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, - SDValue(Carry,0), RHS); - - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } - - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : - Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return NULL; - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, - dl, NodeTy, true, false).first; - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::MULHU ? - Mips::MultuRxRy16 : Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; - } - // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.hasMips64()) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO_64, MVT::i64); - return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); - } - - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } - break; - } - - case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); - unsigned Size = CN->getValueSizeInBits(0); - - if (Size == 32) - break; - - MipsAnalyzeImmediate AnalyzeImm; - int64_t Imm = CN->getSExtValue(); - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, false); - - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - DebugLoc DL = CN->getDebugLoc(); - SDNode *RegOpnd; - SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - - // The first instruction can be a LUi which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == Mips::LUi64) - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); - else - RegOpnd = - CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - CurDAG->getRegister(Mips::ZERO_64, MVT::i64), - ImmOpnd); - - // The remaining instructions in the sequence are handled here. - for (++Inst; Inst != Seq.end(); ++Inst) { - ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - SDValue(RegOpnd, 0), ImmOpnd); - } - - return RegOpnd; - } - #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: @@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { "Unexpected unaligned loads/stores."); break; #endif - - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); - } } // Select the default instruction @@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { - return new MipsDAGToDAGISel(TM); + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16ISelDag(TM); + + return llvm::createMipsSEISelDag(TM); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h new file mode 100644 index 0000000000..cf0f9c58aa --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -0,0 +1,93 @@ +//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSISELDAGTODAG_H +#define MIPSISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsDAGToDAGISel : public SelectionDAGISel { +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + +protected: + SDNode *getGlobalBaseReg(); + + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + // Complex Pattern. + /// (reg + imm). + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual SDNode *Select(SDNode *N); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0; + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + virtual void processFunctionAfterISel(MachineFunction &MF) = 0; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c452dee8d0..4bf43f42e3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" -#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -42,26 +41,9 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); - -static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); -static cl::opt<bool> -Mips16HardFloat("mips16-hard-float", cl::NotHidden, - cl::desc("MIPS: mips16 hard float enable."), - cl::init(false)); - -static cl::opt<bool> DontExpandCondPseudos16( - "mips16-dont-expand-cond-pseudo", - cl::init(false), - cl::desc("Dont expand conditional move related " - "pseudos for Mips 16"), - cl::Hidden); - - static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } -static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) { +SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } @@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, + bool HasMips64) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; @@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, + unsigned Flag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), @@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { MachinePointerInfo::getGOT(), false, false, false, 0); } -static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) { +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, + unsigned LoFlag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); @@ -211,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -namespace { - struct LTStr { - bool operator()(const char *S1, const char *S2) const - { - return strcmp(S1, S2) < 0; - } - }; - - std::set<const char*, LTStr> NoHelperNeeded; -} - -void MipsTargetLowering::setMips16LibcallName - (RTLIB::Libcall L, const char *Name) { - setLibcallName(L, Name); - NoHelperNeeded.insert(Name); -} - -void MipsTargetLowering::setMips16HardFloatLibCalls() { - setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} - MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget<MipsSubtarget>()), HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), IsO32(Subtarget->isABI_O32()) { - // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - // Set up the register classes - addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); - - if (HasMips64) - addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); - - if (Subtarget->inMips16Mode()) { - addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Mips16HardFloat) - setMips16HardFloatLibCalls(); - } - - if (Subtarget->hasDSP()) { - MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } - } - - if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, &Mips::FGR64RegClass); - else - addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); - } - } - // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } - else { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - } - if (!Subtarget->inMips16Mode()) { - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -472,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - } - setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -523,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); - computeRegisterProperties(); setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); @@ -531,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM) MaxStoresPerMemcpy = 16; } -bool -MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { - MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - - if (Subtarget->inMips16Mode()) - return false; +const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16TargetLowering(TM); - switch (SVT) { - case MVT::i64: - case MVT::i32: - if (Fast) - *Fast = true; - return true; - default: - return false; - } + return llvm::createMipsSETargetLowering(TM); } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -1099,347 +952,6 @@ static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) { } MachineBasicBlock * -MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ - // $bb: - // bposge32_pseudo $vr0 - // => - // $bb: - // bposge32 $tbb - // $fbb: - // li $vr2, 0 - // b $sink - // $tbb: - // li $vr1, 1 - // $sink: - // $vr0 = phi($vr2, $fbb, $vr1, $tbb) - - MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetRegisterClass *RC = &Mips::CPURegsRegClass; - DebugLoc DL = MI->getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); - MachineFunction *F = BB->getParent(); - MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, FBB); - F->insert(It, TBB); - F->insert(It, Sink); - - // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - Sink->transferSuccessorsAndUpdatePHIs(BB); - - // Add successors. - BB->addSuccessor(FBB); - BB->addSuccessor(TBB); - FBB->addSuccessor(Sink); - TBB->addSuccessor(Sink); - - // Insert the real bposge32 instruction to $BB. - BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); - - // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) - .addReg(Mips::ZERO).addImm(0); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); - - // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); - BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) - .addReg(Mips::ZERO).addImm(1); - - // Insert phi function to $Sink. - BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), - MI->getOperand(0).getReg()) - .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return Sink; -} - -MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) - .addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock *MipsTargetLowering::emitSelT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addReg(MI->getOperand(4).getReg()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock *MipsTargetLowering::emitSeliT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addImm(MI->getOperand(4).getImm()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock - *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - unsigned regY = MI->getOperand(1).getReg(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - int64_t imm = MI->getOperand(1).getImm(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -static unsigned Mips16WhichOp8uOr16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - unsigned regY = MI->getOperand(2).getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - int64_t Imm = MI->getOperand(2).getImm(); - unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} -MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { @@ -1548,77 +1060,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return emitAtomicCmpSwap(MI, BB, 8); - case Mips::BPOSGE32_PSEUDO: - return emitBPOSGE32(MI, BB); - case Mips::SelBeqZ: - return emitSel16(Mips::BeqzRxImm16, MI, BB); - case Mips::SelBneZ: - return emitSel16(Mips::BnezRxImm16, MI, BB); - case Mips::SelTBteqZCmpi: - return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBteqZSlti: - return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBteqZSltiu: - return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBtneZCmpi: - return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBtneZSlti: - return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBtneZSltiu: - return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBteqZCmp: - return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBteqZSlt: - return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBteqZSltu: - return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::SelTBtneZCmp: - return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBtneZSlt: - return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBtneZSltu: - return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::BteqzT8SltX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::BtnezT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::BtnezT8SltX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - break; - case Mips::SltCCRxRy16: - return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); - break; - case Mips::SltiCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::SltiuCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SltuCCRxRy16: - return emitFEXT_CCRX16_ins - (Mips::SltuRxRy16, MI, BB); } } @@ -3074,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) { return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -/// isEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. -bool MipsTargetLowering:: -isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { - if (!EnableMipsTailCalls) - return false; - - // No tail call optimization for mips16. - if (Subtarget->inMips16Mode()) - return false; - - // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) - return false; - - // Return true if the callee's argument area is no larger than the - // caller's. - return NextStackOffset <= FI.getIncomingArgSize(); -} - SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, DebugLoc DL, @@ -3114,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } -// -// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much -// cleaner way to do all of this but it will have to wait until the traditional -// gcc mechanism is completed. -// -// For Pic, in order for Mips16 code to call Mips32 code which according the abi -// have either arguments or returned values placed in floating point registers, -// we use a set of helper functions. (This includes functions which return type -// complex which on Mips are returned in a pair of floating point registers). -// -// This is an encoding that we inherited from gcc. -// In Mips traditional O32, N32 ABI, floating point numbers are passed in -// floating point argument registers 1,2 only when the first and optionally -// the second arguments are float (sf) or double (df). -// For Mips16 we are only concerned with the situations where floating point -// arguments are being passed in floating point registers by the ABI, because -// Mips16 mode code cannot execute floating point instructions to load those -// values and hence helper functions are needed. -// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) -// the helper function suffixs for these are: -// 0, 1, 5, 9, 2, 6, 10 -// this suffix can then be calculated as follows: -// for a given argument Arg: -// Arg1x, Arg2x = 1 : Arg is sf -// 2 : Arg is df -// 0: Arg is neither sf or df -// So this stub is the string for number Arg1x + Arg2x*4. -// However not all numbers between 0 and 10 are possible, we check anyway and -// assert if the impossible exists. -// - -unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber - (ArgListTy &Args) const { - unsigned int resultNum = 0; - if (Args.size() >= 1) { - Type *t = Args[0].Ty; - if (t->isFloatTy()) { - resultNum = 1; - } - else if (t->isDoubleTy()) { - resultNum = 2; - } - } - if (resultNum) { - if (Args.size() >=2) { - Type *t = Args[1].Ty; - if (t->isFloatTy()) { - resultNum += 4; - } - else if (t->isDoubleTy()) { - resultNum += 8; - } - } +void MipsTargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // Insert node "GP copy globalreg" before call to function. + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } - return resultNum; -} -// -// prefixs are attached to stub numbers depending on the return type . -// return type: float sf_ -// double df_ -// single complex sc_ -// double complext dc_ -// others NO PREFIX -// -// -// The full name of a helper function is__mips16_call_stub + -// return type dependent prefix + stub number -// -// -// This is something that probably should be in a different source file and -// perhaps done differently but my main purpose is to not waste runtime -// on something that we can enumerate in the source. Another possibility is -// to have a python script to generate these mapping tables. This will do -// for now. There are a whole series of helper function mapping arrays, one -// for each return type class as outlined above. There there are 11 possible -// entries. Ones with 0 are ones which should never be selected -// -// All the arrays are similar except for ones which return neither -// sf, df, sc, dc, in which only care about ones which have sf or df as a -// first parameter. -// -#define P_ "__mips16_call_stub_" -#define MAX_STUB_NUMBER 10 -#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" -#define T P "0" , T1 -#define P P_ -static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; -#undef P -#define P P_ "sf_" -static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "df_" -static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "sc_" -static char const * scMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "dc_" -static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#undef P_ - - -const char* MipsTargetLowering:: - getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); -#ifndef NDEBUG - const unsigned int maxStubNum = 10; - assert(stubNum <= maxStubNum); - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; - assert(validStubNum[stubNum]); -#endif - const char *result; - if (RetTy->isFloatTy()) { - result = sfMips16Helper[stubNum]; - } - else if (RetTy ->isDoubleTy()) { - result = dfMips16Helper[stubNum]; - } - else if (RetTy->isStructTy()) { - // check if it's complex - if (RetTy->getNumContainedTypes() == 2) { - if ((RetTy->getContainedType(0)->isFloatTy()) && - (RetTy->getContainedType(1)->isFloatTy())) { - result = scMips16Helper[stubNum]; - } - else if ((RetTy->getContainedType(0)->isDoubleTy()) && - (RetTy->getContainedType(1)->isDoubleTy())) { - result = dcMips16Helper[stubNum]; - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - if (stubNum == 0) { - needHelper = false; - return ""; - } - result = vMips16Helper[stubNum]; + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } - needHelper = true; - return result; + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to @@ -3287,26 +2593,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; - const char* Mips16HelperFunction = 0; - bool NeedMips16Helper = false; - - if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && - Mips16HardFloat) { - // - // currently we don't have symbols tagged with the mips16 or mips32 - // qualifier so we will assume that we don't know what kind it is. - // and generate the helper - // - bool LookupHelper = true; - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { - LookupHelper = false; - } - } - if (LookupHelper) Mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); - - } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -3466,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue JumpTarget = Callee; - - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - unsigned V0Reg = Mips::V0; - if (NeedMips16Helper) { - RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol( - Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); - } - else { - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); - } - } - - // Insert node "GP copy globalreg" before call to function. - // - // R_MIPS_CALL* operators (emitted when non-internal functions are called - // in PIC mode) allow symbols to be resolved via lazy binding. - // The lazy binding stub requires GP to point to the GOT. - if (IsPICCall && !InternalLinkage) { - unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; - EVT Ty = IsN64 ? MVT::i64 : MVT::i32; - RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty))); - } - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... - // - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (JumpTarget.getNode()) - Ops.push_back(JumpTarget); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); + getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, + CLI, Callee, Chain); if (IsTailCall) return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, @@ -4124,14 +3347,14 @@ static bool isF128SoftLibCall(const char *CallSym) { const char * const *End = LibCalls + array_lengthof(LibCalls); // Check that LibCalls is sorted alphabetically. -#ifndef NDEBUG - LTStr Comp; + MipsTargetLowering::LTStr Comp; +#ifndef NDEBUG for (const char * const *I = LibCalls; I < End - 1; ++I) assert(Comp(*I, *(I + 1))); #endif - return std::binary_search(LibCalls, End, CallSym, LTStr()); + return std::binary_search(LibCalls, End, CallSym, Comp); } /// This function returns true if Ty is fp128 or i128 which was originally a diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index de925e16ab..71977d7f1c 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -152,9 +152,9 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -177,17 +177,34 @@ namespace llvm { EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - private: - void setMips16LibcallName(RTLIB::Libcall, const char *Name); + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; + + protected: + SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - void setMips16HardFloatLibCalls(); + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - unsigned int - getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; - const char *getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, unsigned LoFlag) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; /// ByValArgInfo - Byval argument information. struct ByValArgInfo { @@ -283,6 +300,7 @@ namespace llvm { bool HasMips64, IsN64, IsO32; + private: // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -321,9 +339,10 @@ namespace llvm { /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval @@ -377,10 +396,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; @@ -419,8 +434,6 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, - MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, @@ -430,29 +443,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const; - }; + + /// Create MipsTargetLowering objects. + const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); + const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 25b5d240be..022987f12e 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -256,6 +256,7 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem64 : Operand<i64> { @@ -263,18 +264,21 @@ def mem64 : Operand<i64> { let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } def mem_ea_64 : Operand<i64> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } // size operand of ext instruction diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 13b2a6ac17..3c210e71bb 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -58,7 +58,8 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86c17..6d76e8a54a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -18,6 +18,10 @@ def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; } +class Unallocatable { + bit isAllocatable = 0; +} + // We have banks of 32 registers each. class MipsReg<bits<16> Enc, string n> : Register<n> { let HWEncoding = Enc; @@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Callee save S0, S1)>; -def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable; -def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; -def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; +def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>, + Unallocatable; def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp new file mode 100644 index 0000000000..e22c3c8b3f --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -0,0 +1,460 @@ +//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "MipsSEISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E;) { + MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); + MachineInstr *MI = MO.getParent(); + ++U; + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); +} + +void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + replaceUsesWithZeroReg(MRI, *I); +} + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, DebugLoc DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, + SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry, 0)); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; + } + + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); + return true; +} + +bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + SDNode *Result; + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + return std::make_pair(true, Result); + } + + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); + + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; + Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; + return std::make_pair(true, Result); + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO_64, MVT::i64); + Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); + } else { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO, MVT::i32); + Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, + Zero); + } + + return std::make_pair(true, Result); + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return std::make_pair(true, RegOpnd); + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return std::make_pair(true, ResNode.getNode()); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { + return new MipsSEDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h new file mode 100644 index 0000000000..6137ab040b --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -0,0 +1,57 @@ +//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEISELDAGTODAG_H +#define MIPSSEISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { + +public: + explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); + + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + + SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + DebugLoc DL, SDNode *Node) const; + + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); +}; + +FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp new file mode 100644 index 0000000000..287e2ede0e --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -0,0 +1,197 @@ +//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// +#include "MipsSEISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + + if (HasMips64) + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); + else + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); + } + } + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { + return new MipsSETargetLowering(TM); +} + + +bool +MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } +} + +MachineBasicBlock * +MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::BPOSGE32_PSEUDO: + return emitBPOSGE32(MI, BB); + } +} + +bool MipsSETargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; + + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +void MipsSETargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + } else + Ops.push_back(Callee); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock * MipsSETargetLowering:: +emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h new file mode 100644 index 0000000000..04a28ce54c --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -0,0 +1,46 @@ +//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsSEISELLOWERING_H +#define MipsSEISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class MipsSETargetLowering : public MipsTargetLowering { + public: + explicit MipsSETargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; + }; +} + +#endif // MipsSEISELLOWERING_H diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index fd930f0335..33363580ab 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c4928c21eb..7e5f192264 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine { const DataLayout DL; // Calculates type size & alignment OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; - MipsTargetLowering TLInfo; + OwningPtr<const MipsTargetLowering> TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -63,7 +63,7 @@ public: } virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; + return TLInfo.get(); } virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f7736e..709daa4e4b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a420929d0..38a7420d97 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e13852e..b0680fbb8c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,6 +17,10 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6..81a54d7015 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index caeb1796f7..2a680661d3 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -136,3 +136,8 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6693..0108f7256e 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -281,6 +299,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -311,13 +354,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -780,7 +822,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +864,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +880,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -1031,6 +1059,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } } bool diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47fafd..6f5f9368c6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -41,10 +43,13 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 17bea8a6a6..90a3a5c6a4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -164,6 +164,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..c7d454692e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -208,6 +212,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -564,6 +576,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -1028,7 +1042,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1077,7 +1091,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1140,7 +1154,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1180,13 +1194,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); } else return false; @@ -1205,6 +1221,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -4555,6 +4575,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -5551,6 +5586,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -5744,7 +5782,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5863,9 +5901,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -6133,7 +6400,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6622,6 +6889,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6806,13 +7076,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6851,6 +7124,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..1b0427e021 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -119,6 +120,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -321,6 +328,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -395,6 +403,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -449,6 +463,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -604,6 +622,9 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f7..d2b1d6d392 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,12 +30,12 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); + let MIOperandInfo = (ops i64imm:$imm); } def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 let PrintMethod = "printMemRegImm"; let EncoderMethod = "getMemRIXEncoding"; - let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo64:$off, ptr_rc_nor0:$reg); } def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; @@ -273,6 +273,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -393,15 +406,16 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set G8RC:$rD, (add G8RC_NOX0:$rA, + imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), @@ -536,7 +550,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -555,7 +569,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set G8RC:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, @@ -567,25 +582,26 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { @@ -612,30 +628,30 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -648,7 +664,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64; def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), "ld $rD, $src", LdStLD, []>, isPPC64; @@ -684,12 +700,12 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -798,82 +814,11 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; -} - -let PPC970_Unit = 2 in { - -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked, isPPC64; - // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), "std $rT, $dst", LdStSTD, @@ -884,6 +829,60 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), PPC970_DGroup_Cracked; } +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(aligned4pre_store G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STDU G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; + +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STDUX G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; //===----------------------------------------------------------------------===// @@ -979,3 +978,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), (ADDIS8 G8RC:$in, tjumptable:$g)>; def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 G8RC:$in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst), + (STDX G8RC:$rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae4b5..0ed7ff2cc4 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), (VRFIZ VRRC:$vA)>; def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), (VRFIN VRRC:$vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c3c171cd21..16dbee5d29 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a80a9..cf39386c67 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { @@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +522,20 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +552,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,7 +578,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), @@ -642,37 +605,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +638,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +667,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213ea43..5d4ae915b4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342d..84cdb1f39d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -158,6 +158,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -278,6 +286,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -314,9 +354,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -344,21 +381,30 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$offreg, ptr_rc:$ptrreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. def pred : PredicateOperand<OtherVT, (ops imm, CRRC), @@ -372,6 +418,10 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; @@ -473,6 +523,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } +// The direct BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; + } +} + // Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions @@ -551,6 +609,23 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -708,69 +783,69 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -835,44 +910,39 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFSU F4RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFDU F8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, @@ -887,48 +957,6 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -950,6 +978,44 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), [(store F8RC:$frS, xaddr:$dst)]>; } +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFSUX F4RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFDUX F8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -959,12 +1025,12 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; +def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, @@ -974,12 +1040,13 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, + imm16ShiftedSExt:$imm))]>; +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set GPRC:$rD, (add GPRC_NOR0:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, @@ -1213,6 +1280,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1435,7 +1525,7 @@ let Uses = [RM] in { let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd8..b1636a20b8 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; @@ -78,6 +85,8 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), LRStoreRequired(false), MinReservedArea(0), @@ -109,6 +118,12 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc655..75e20e4a26 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -89,17 +71,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; } -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); -} - - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -123,11 +105,33 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R0); Reserved.set(PPC::R1); Reserved.set(PPC::LR); @@ -139,35 +143,22 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -271,28 +236,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +255,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -369,8 +315,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -412,8 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -438,6 +382,65 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { @@ -498,15 +501,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex, SPAdj, RS); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -529,7 +536,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, isIXAddr = true; break; } - + + bool noImmForm = false; + switch (OpC) { + case PPC::LVEBX: + case PPC::LVEHX: + case PPC::LVEWX: + case PPC::LVX: + case PPC::LVXL: + case PPC::LVSL: + case PPC::LVSR: + case PPC::STVEBX: + case PPC::STVEHX: + case PPC::STVEWX: + case PPC::STVX: + case PPC::STVXL: + noImmForm = true; + break; + } + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +578,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,13 +589,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC); // Insert a set of rA with the full offset value before the ld, st, or add BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) @@ -584,7 +606,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666242..ba0fb48b85 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -44,16 +44,22 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; @@ -61,6 +67,11 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8ee9b1ec9f..55d36bbfc9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -86,6 +86,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "r0">; +def ZERO8 : GP8<ZERO, "r0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, LR, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, LR8, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index a59c775272..e740348717 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass, (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) >; -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < +class Vector4_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0c7880d232..fa8f62de9c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index be3318a0b4..08650980fd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c00c..bf9569e659 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c5fa3347dc..8c50d54cf5 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1979,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; // bitconvert patterns diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 40aec833ea..b07a585f08 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -14,5 +14,4 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo() { - memset(Outputs, 0, sizeof(Outputs)); } diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 4b901f4bbc..13a46b8eec 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -26,7 +26,6 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; - SDNode *Outputs[16]; }; } // End llvm namespace diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 19baef94c7..9074364bb3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); InstKindLimit[IDAlu] = 120; // 120 minus 8 for security @@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -class ConstPairs { -private: - unsigned XYPair; - unsigned ZWPair; -public: - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { - for (unsigned i = 0; i < 3; i++) { - unsigned ReadConstChan = ReadConst[i] & 3; - unsigned ReadConstIndex = ReadConst[i] & (~3); - if (ReadConstChan < 2) { - if (!XYPair) { - XYPair = ReadConstIndex; - } - } else { - if (!ZWPair) { - ZWPair = ReadConstIndex; - } - } - } - } - - bool isCompatibleWith(const ConstPairs& CP) const { - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); - } -}; - -static -const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { - unsigned ReadConsts[3] = {0, 0, 0}; - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - if (!TII->isALUInstr(MI.getOpcode())) - return ConstPairs(ReadConsts); - - for (unsigned i = 0; i < 3; i++) { - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); - if (SrcIdx < 0) - break; - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) - ReadConsts[i] =MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); - } - return ConstPairs(ReadConsts); -} - -bool -R600SchedStrategy::isBundleable(const MachineInstr& MI) { - const ConstPairs &MIPair = getPairs(TII, MI); - for (unsigned i = 0; i < 4; i++) { - if (!InstructionsGroupCandidate[i]) - continue; - const ConstPairs &IGPair = getPairs(TII, - *InstructionsGroupCandidate[i]->getInstr()); - if (!IGPair.isCompatibleWith(MIPair)) - return false; - } - return true; -} - SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { if (Q.empty()) return NULL; for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; - if (isBundleable(*SU->getInstr())) { + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); Q.erase(It); return SU; + } else { + InstructionsGroupCandidate.pop_back(); } } return NULL; @@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); + InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit *SU = AttemptFillSlot(Chan); if (SU) { OccupedSlotsMask |= (1 << Chan); - InstructionsGroupCandidate[Chan] = SU; + InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index d74ff1e076..3d0367fd8e 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -98,7 +98,7 @@ public: virtual void releaseBottomNode(SUnit *SU); private: - SUnit *InstructionsGroupCandidate[4]; + std::vector<MachineInstr *> InstructionsGroupCandidate; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; @@ -112,7 +112,6 @@ private: void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - bool isBundleable(const MachineInstr& MI); void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); }; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 063f5faa63..93f8c38a3a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -229,6 +234,10 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } +MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { + return MVT::i32; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 0411565ee3..d65622526f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -48,6 +48,7 @@ public: virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; + virtual MVT getScalarShiftAmountTy(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 67fbdf7be1..98bd3dbb66 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) { Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); } -bool SIInsertWaits::unorderedDefines(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::LGKM_CNT) - return true; - - if (TSFlags & SIInstrFlags::EXP_CNT) - return ExpInstrTypesSeen == 3; - - return false; -} - Counters SIInsertWaits::handleOperands(MachineInstr &MI) { - bool UnorderedDefines = unorderedDefines(MI); Counters Result = ZeroCounts; // For each register affected by this @@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); - if (UnorderedDefines) - increaseCounters(Result, DefinedRegs[j]); + increaseCounters(Result, DefinedRegs[j]); } if (Op.isUse()) diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 260c651dd4..2f10c388ad 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < opName#" $dst, $src0", pattern >; -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass, + RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins SReg_64:$sbase, i32imm:$offset), + (ins baseClass:$sbase, i32imm:$offset), asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), - (ins SReg_64:$sbase, SReg_32:$soff), + (ins baseClass:$sbase, SReg_32:$soff), asm#" $dst, $sbase, $soff", [] >; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0ab9e4ec0c..05b04a92b3 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; @@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; } // mayLoad = 1 @@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; let isCommutable = 1 in { @@ -1133,6 +1149,31 @@ def SI_KILL : InstSI < } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] +let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { + +def SI_INDIRECT_SRC : InstSI < + (outs VReg_32:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off", + [] +>; + +class SI_INDIRECT_DST<RegisterClass rc> : InstSI < + (outs rc:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val), + "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val", + [] +> { + let Constraints = "$src = $dst"; +} + +def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; +def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; +def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; +def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; + +} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1241,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>; defm : SamplePatterns<VReg_256, v8i32>; defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; +/********** ============================================ **********/ +/********** Extraction, Insertion, Building and Casting **********/ +/********** ============================================ **********/ + +foreach Index = 0-2 in { + def Extract_Element_v2i32_#Index : Extract_Element < + i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2i32_#Index : Insert_Element < + i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v2f32_#Index : Extract_Element < + f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2f32_#Index : Insert_Element < + f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-3 in { + def Extract_Element_v4i32_#Index : Extract_Element < + i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4i32_#Index : Insert_Element < + i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v4f32_#Index : Extract_Element < + f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4f32_#Index : Insert_Element < + f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-7 in { + def Extract_Element_v8i32_#Index : Extract_Element < + i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8i32_#Index : Insert_Element < + i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; + def Extract_Element_v8f32_#Index : Extract_Element < + f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8f32_#Index : Insert_Element < + f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-15 in { + def Extract_Element_v16i32_#Index : Extract_Element < + i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16i32_#Index : Insert_Element < + i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v16f32_#Index : Extract_Element < + f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16f32_#Index : Insert_Element < + f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; +} def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; +def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; +def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1389,6 +1491,24 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; +// 1. Offset as 8bit DWORD immediate +def : Pat < + (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) +>; + +// 3. Offset in an 32Bit VGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1426,4 +1546,48 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; +/********** ====================== **********/ +/********** Indirect adressing **********/ +/********** ====================== **********/ + +multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, + SI_INDIRECT_DST IndDst> { + // 1. Extract with offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + >; + + // 2. Extract without offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 VReg_32:$idx))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + >; + + // 3. Insert with offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + >; + + // 4. Insert without offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 VReg_32:$idx))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + >; +} + +defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; + } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7c23d1706b..33bb8157b4 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -16,8 +16,7 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index b215aa22db..9a027e77eb 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -66,6 +66,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; + const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -84,9 +85,14 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void IndirectSrc(MachineInstr &MI); + void IndirectDst(MachineInstr &MI); + public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + MachineFunctionPass(ID), TRI(tm.getRegisterInfo()), + TII(tm.getInstrInfo()) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I = MI; + + unsigned Save = MI.getOperand(1).getReg(); + unsigned Idx = MI.getOperand(3).getReg(); + + if (AMDGPU::SReg_32RegClass.contains(Idx)) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + MBB.insert(I, MovRel); + MI.eraseFromParent(); + return; + } + + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); + + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); + + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) + .addReg(Idx); + + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC); + + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); + + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); + + // Do the actual move + MBB.insert(I, MovRel); + + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); + + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vec = MI.getOperand(2).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Vec, RegState::Implicit); + + LoadM0(MI, MovRel); +} + +void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + unsigned Val = MI.getOperand(5).getReg(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) + .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(Val) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Dst, RegState::Implicit); + + LoadM0(MI, MovRel); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; @@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::S_BRANCH: Branch(MI); break; + + case AMDGPU::SI_INDIRECT_SRC: + IndirectSrc(MI); + break; + + case AMDGPU::SI_INDIRECT_DST_V2: + case AMDGPU::SI_INDIRECT_DST_V4: + case AMDGPU::SI_INDIRECT_DST_V8: + case AMDGPU::SI_INDIRECT_DST_V16: + IndirectDst(MI); + break; } } } diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 3dcad506d2..4f14931a9c 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; +def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// // [SV]Src_* register classes, can have either an immediate or an register @@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 357879bf6c..b53a1ed095 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c6cafe59eb..ee88ce77c0 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -610,6 +610,9 @@ struct StringComparator { // Provided for compatibility with MSVC's debug mode. bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } }; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 7d8b49cdf2..e7282519d5 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() { delete AsmInfo; } +/// \brief Reset the target options based on the function's attributes. +void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { + const Function *F = MF->getFunction(); + TargetOptions &TO = MF->getTarget().Options; + +#define RESET_OPTION(X, Y) \ + do { \ + if (F->hasFnAttribute(Y)) \ + TO.X = \ + (F->getAttributes(). \ + getAttribute(AttributeSet::FunctionIndex, \ + Y).getValueAsString() == "true"); \ + } while (0) + + RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim"); + RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf"); + RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad"); + RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); + RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); + RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(UseSoftFloat, "use-soft-float"); + RESET_OPTION(DisableTailCalls, "disable-tail-calls"); +} + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model TargetMachine::getRelocationModel() const { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ee5c2b2bfd..d5568e08d3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -62,6 +62,9 @@ private: X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, + SMLoc SizeDirLoc, unsigned Size); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, SmallString<64> &Err); @@ -170,30 +173,33 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -231,11 +237,6 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } - bool needAsmRewrite() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.NeedAsmRewrite; - } - const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -332,11 +333,6 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } - unsigned getMemSize() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Size; - } - bool isOffsetOf() const { return OffsetOfLoc.getPointer(); } @@ -345,11 +341,6 @@ struct X86Operand : public MCParsedAsmOperand { return AddressOf; } - bool needSizeDirective() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.NeedSizeDir; - } - bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -485,17 +476,15 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, - bool NeedRewrite = true){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; - Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -503,7 +492,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -512,7 +500,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -527,7 +515,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -890,6 +877,45 @@ public: } }; +X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, + SMLoc End, SMLoc SizeDirLoc, + unsigned Size) { + bool NeedSizeDir = false; + bool IsVarDecl = false; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) { + Size = tType * 8; // Size is in terms of bits in this context. + NeedSizeDir = Size > 0; + } + } + + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + + if (NeedSizeDir) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, + /*Len*/0, Size)); + + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size); +} + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { const AsmToken &Tok = Parser.getTok(); @@ -914,7 +940,12 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Adjust the EndLoc due to the ']'. End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + + // We want the size directive before the '['. + SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); + return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); } } @@ -1036,40 +1067,9 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { if (getParser().parseExpression(Disp, End)) return 0; - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (isParsingInlineAsm()) { - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); - if (!Size) - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size, NeedSizeDir); - } + return CreateMemForInlineAsm(Disp, Start, End, Start, Size); } /// Parse the '.' operator. @@ -1197,7 +1197,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); - return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); + return X86Operand::CreateImm(Imm, Start, End); } X86Operand *X86AsmParser::ParseIntelOperand() { @@ -1221,6 +1221,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() { getLexer().is(AsmToken::Minus)) { const MCExpr *Val; if (!getParser().parseExpression(Val, End)) { + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); return X86Operand::CreateImm(Val, Start, End); } } @@ -1734,242 +1736,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..5fbefaec5e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa346..2518e02e2a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270065..85155f55e0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 960870dc60..23cfd6d72f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); - if (Subtarget->hasInt256()) { - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); - } setOperationAction(ISD::SDIV, MVT::v8i16, Custom); setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } @@ -1186,14 +1179,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); @@ -1210,15 +1195,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + } - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); - } + setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -7834,7 +7821,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7854,18 +7841,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -11490,16 +11478,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - +static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if (!Subtarget->hasSSE2()) - return SDValue(); - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -11610,6 +11595,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + unsigned RatioInLog2 = Log2_32_Ceil(Ratio); + uint64_t ShiftAmt = 0; + for (unsigned i = 0; i != Ratio; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); + } + // Check remaining shift amounts. + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Amt.getOperand(i + j)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + } + } + + return SDValue(); +} + +static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasInt256() && + ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v8i32 || VT == MVT::v16i16))) { + SDValue BaseShAmt; + EVT EltVT = VT.getVectorElementType(); + + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i, j; + for (i = 0; i != NumElts; ++i) { + if (Amt.getOperand(i).getOpcode() == ISD::UNDEF) + continue; + break; + } + for (j = i; j != NumElts; ++j) { + SDValue Arg = Amt.getOperand(j); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != Amt.getOperand(i)) + break; + } + if (i != NumElts && j == NumElts) + BaseShAmt = Amt.getOperand(i); + } else { + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Amt = Amt.getOperand(0); + if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(Amt)->isSplat()) { + SDValue InVec = Amt.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = + cast<ShuffleVectorSDNode>(Amt)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, + DAG.getIntPtrConstant(0)); + } + } + + if (BaseShAmt.getNode()) { + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRA: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); + } + } + } + } + + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + std::vector<SDValue> Vals(Ratio); + for (unsigned i = 0; i != Ratio; ++i) + Vals[i] = Amt.getOperand(i); + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + for (unsigned j = 0; j != Ratio; ++j) + if (Vals[j] != Amt.getOperand(i + j)) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + SDValue V; + + if (!Subtarget->hasSSE2()) + return SDValue(); + + V = LowerScalarImmediateShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + V = LowerScalarVariableShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + // AVX2 has VPSLLV/VPSRAV/VPSRLV. + if (Subtarget->hasInt256()) { + if (Op.getOpcode() == ISD::SRL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return Op; + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); @@ -11826,8 +12029,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } @@ -12262,7 +12480,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -15918,124 +16137,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { SDValue V = PerformSHLCombine(N, DAG); if (V.getNode()) return V; } - // On X86 with SSE2 support, we can transform this to a vector shift if - // all elements are shifted by the same amount. We can't do this in legalize - // because the a constant vector is typically transformed to a constant pool - // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) - return SDValue(); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue ShAmtOp = N->getOperand(1); - EVT EltVT = VT.getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt = SDValue(); - if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - // Handle the case where the build_vector is all undef - // FIXME: Should DAG allow this? - if (i == NumElts) - return SDValue(); - - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - if (Arg != BaseShAmt) { - return SDValue(); - } - } - } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - SDValue InVec = ShAmtOp.getOperand(0); - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = InVec.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); - if (C->getZExtValue() == SplatIdx) - BaseShAmt = InVec.getOperand(1); - } - } - if (BaseShAmt.getNode() == 0) { - // Don't create instructions with illegal types after legalize - // types has run. - if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && - !DCI.isBeforeLegalize()) - return SDValue(); - - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); - } - } else - return SDValue(); - - // The shift amount is an i32. - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); - - // The shift amount is identical so we can do a vector shift. - SDValue ValOp = N->getOperand(0); - switch (N->getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); - } - } + return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) @@ -16346,13 +16453,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - if (Mask.getOpcode() != X86ISD::VSRAI) - return SDValue(); - - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(1); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + unsigned SraAmt = ~0; + if (Mask.getOpcode() == ISD::SRA) { + SDValue Amt = Mask.getOperand(1); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) + SraAmt = C->getZExtValue(); + } + } else if (Mask.getOpcode() == X86ISD::VSRAI) { + SDValue SraC = Mask.getOperand(1); + SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + } if ((SraAmt + 1) != EltBits) return SDValue(); diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a4065a7..19bdb96f05 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []> { + mnemonic, operands, []>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", [], IIC_BIN_NONMEM>; + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// @@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in { // ADCX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize; @@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; @@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { // ADOX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; @@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 8f2d0a1aae..a967a4da5c 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // SetCC instructions. multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1 in { + isCommutable = 1, SchedRW = [WriteALU] in { def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { IIC_CMOV32_RR>, TB; } - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteALULd, ReadAfterLd] in { def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], - IIC_SET_R>, TB; + IIC_SET_R>, TB, Sched<[WriteALU]>; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), [(store (X86setcc OpNode, EFLAGS), addr:$dst)], - IIC_SET_M>, TB; + IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 734e5982b2..2b27bc5bc5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. @@ -990,9 +991,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 2eb454ded2..5ef0c3c13b 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB, + Sched<[WriteALULd]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB, + Sched<[WriteALULd]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -106,24 +112,28 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>, + Sched<[WriteALU]>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>, + Sched<[WriteALULd]>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), @@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], - IIC_MOVZX>, TB; + IIC_MOVZX>, TB, Sched<[WriteALULd]>; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>, + Sched<[WriteALU]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], - IIC_MOVZX>; + IIC_MOVZX>, Sched<[WriteALULd]>; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d989ec7bb0..39165e24a8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -786,7 +786,7 @@ def LEAVE64 : I<0xC9, RawFrm, // let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], @@ -803,9 +803,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; -} +} // mayLoad, SchedRW -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], @@ -832,29 +832,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; -} +} // mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], IIC_POP_MEM>; -} -let mayStore = 1 in { +} // mayLoad, SchedRW +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>; -} +} // mayStore, SchedRW } -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1, + SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), @@ -871,17 +872,18 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], - mayLoad=1, neverHasSideEffects=1 in { + mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], - mayStore=1, neverHasSideEffects=1 in { + mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +let Constraints = "$src = $dst", SchedRW = [WriteALU] in { +// GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -890,7 +892,7 @@ def BSWAP32r : I<0xC8, AddRegFrm, def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { @@ -976,7 +978,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; //===----------------------------------------------------------------------===// // Move Instructions. // - +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; @@ -987,6 +989,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1004,7 +1007,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } +} // SchedRW +let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; @@ -1017,9 +1022,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. +let SchedRW = [WriteALU] in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1038,6 +1045,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} // FIXME: These definitions are utterly broken // Just leave them commented out for now because they're useless outside @@ -1055,7 +1063,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1066,7 +1074,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; @@ -1081,6 +1089,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } +let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1093,6 +1102,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1101,34 +1111,37 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, + Sched<[WriteMove]>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... +let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>; // AH = flags - +} // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, @@ -1139,13 +1152,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; +} // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), @@ -1166,6 +1180,7 @@ let mayLoad = 1, hasSideEffects = 0 in { >, TB; } +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], @@ -1178,10 +1193,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; +} // SchedRW // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. +let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) @@ -1194,8 +1211,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; +} // SchedRW let hasSideEffects = 0 in { +let SchedRW = [WriteALU] in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1203,8 +1222,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1214,6 +1234,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1221,8 +1242,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1232,6 +1254,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1239,6 +1262,7 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +} // SchedRW let mayLoad = 1, mayStore = 1 in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), @@ -1250,6 +1274,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1257,8 +1282,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1294,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1275,8 +1302,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1286,6 +1314,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1293,8 +1322,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1483,6 +1513,7 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; // Table lookup instructions def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; +let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, @@ -1512,7 +1543,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[In32BitMode]>; +} // SchedRW +let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, @@ -1528,11 +1561,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { + let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, @@ -1545,6 +1580,8 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8; + } + let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, @@ -1557,6 +1594,7 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8; + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752757..4d43ee1f15 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, // scalar +let Sched = WriteFAdd in { def SSE_ALU_F32S : OpndItins< IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM >; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, IIC_DEFAULT, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, IIC_DEFAULT, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), !strconcat(base_opc, asm_opr), - [], IIC_SSE_MOV_S_RR>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, @@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int @@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { +let AddedComplexity = 20, SchedRW = [WriteLoad] in { def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // +let SchedRW = [WriteStore] in { def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler +let SchedRW = [WriteMove] in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; @@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (vt (OpNode RC:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (OpNode (mem_frag addr:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (memopv4i64 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } let Predicates = [HasAVX] in { @@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, @@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 3caa1b538c..053417ccde 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; @@ -35,6 +36,7 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; +} // SchedRW def : Pat<(debugtrap), (INT3)>; @@ -43,6 +45,7 @@ def : Pat<(debugtrap), // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; +let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Input/Output Instructions. // +let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; @@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers +let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), @@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers +let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), @@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // Moves to and from segment registers. // +let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), @@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. +let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), @@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions +let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), @@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; - +} // SchedRW + //===----------------------------------------------------------------------===// // Specialized register support +let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; @@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Cache instructions +let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // XSAVE instructions +let SchedRW = [WriteSystem] in { let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; @@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in { def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e06b..a8a9fd8acc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085298..bcdd0eb56d 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,6 +7,88 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 def IIC_DEFAULT : InstrItinClass; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997b8e..3e3b86edbb 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry<MVT> AVX2CostTable[] = { + // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to + // customize them to detect the cases where shift amount is a scalar one. + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v8i32, 1 }, + { ISD::SRL, MVT::v8i32, 1 }, + { ISD::SRA, MVT::v8i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, + { ISD::SHL, MVT::v4i64, 1 }, + { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), + ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. @@ -257,8 +280,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a75212a386..bc5109b4d4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1,4 +1,4 @@ -//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ // to the function does not create any copies of the pointer value that // outlive the call. This more or less means that the pointer is only // dereferenced, and not returned from the function or stored in a global. +// Finally, well-known library call declarations are marked with all +// attributes that are consistent with the function's standard definition. // This pass is implemented as a bottom-up traversal of the call-graph. // //===----------------------------------------------------------------------===// @@ -32,12 +34,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { @@ -62,14 +66,63 @@ namespace { // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const CallGraphSCC &SCC); + // Utility methods used by inferPrototypeAttributes to add attributes + // and maintain annotation statistics. + + void setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + } + } + + void setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + } + } + + void setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + } + } + + void setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + } + } + + void setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + } + } + + // inferPrototypeAttributes - Analyze the name and prototype of the + // given function and set any applicable attributes. Returns true + // if any attributes were set and false otherwise. + bool inferPrototypeAttributes(Function &F); + + // annotateLibraryCalls - Adds attributes to well-known standard library + // call declarations. + bool annotateLibraryCalls(const CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); CallGraphSCCPass::getAnalysisUsage(AU); } private: AliasAnalysis *AA; + TargetLibraryInfo *TLI; }; } @@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } +/// inferPrototypeAttributes - Analyze the name and prototype of the +/// given function and set any applicable attributes. Returns true +/// if any attributes were set and false otherwise. +bool FunctionAttrs::inferPrototypeAttributes(Function &F) { + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) + return false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strcmp: + case LibFunc::strspn: + case LibFunc::strncmp: + case LibFunc::strcspn: + case LibFunc::strcoll: + case LibFunc::strcasecmp: + case LibFunc::strncasecmp: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::scanf: + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat: + case LibFunc::sscanf: + case LibFunc::sprintf: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + break; + case LibFunc::system: + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotAlias(F, 0); + break; + case LibFunc::mkdir: + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::read: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::rmdir: + case LibFunc::rewind: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::rename: + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::chmod: + case LibFunc::chown: + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + break; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + break; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + case LibFunc::fread: + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + case LibFunc::fputs: + case LibFunc::fscanf: + case LibFunc::fprintf: + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::gets: + case LibFunc::getchar: + setDoesNotThrow(F); + break; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::uname: + case LibFunc::unlink: + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::pread: + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + break; + case LibFunc::putchar: + setDoesNotThrow(F); + break; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vsscanf: + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + break; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + break; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + default: + // Didn't mark any attributes. + return false; + } + + return true; +} + +/// annotateLibraryCalls - Adds attributes to well-known standard library +/// call declarations. +bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { + bool MadeChange = false; + + // Check each function in turn annotating well-known library function + // declarations with attributes. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F != 0 && F->isDeclaration()) + MadeChange |= inferPrototypeAttributes(*F); + } + + return MadeChange; +} + bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { AA = &getAnalysis<AliasAnalysis>(); + TLI = &getAnalysis<TargetLibraryInfo>(); - bool Changed = AddReadAttrs(SCC); + bool Changed = annotateLibraryCalls(SCC); + Changed |= AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); return Changed; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..3c5781ca73 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -150,7 +150,9 @@ namespace { typedef SmallVector<const FAddend*, 4> AddendVect; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); @@ -159,6 +161,7 @@ namespace { Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); @@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bad46b4dab..32fdb9b708 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475b1d..92b42ee64b 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v2"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // this TU. Used in initialization order checking. Value *FirstDynamic = 0, *LastDynamic = 0; + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; @@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); @@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a79873cbf6..3310ed5e2b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/InstIterator.h" @@ -39,35 +40,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), - UseExtraChecksum(false), NoRedZone(false), - NoFunctionNamesInData(false) { - memcpy(Version, DefaultGCovVersion, 4); + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, const char (&Version)[4], - bool UseExtraChecksum, bool NoRedZone, - bool NoFunctionNamesInData) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - UseExtraChecksum(UseExtraChecksum), NoRedZone(NoRedZone), - NoFunctionNamesInData(NoFunctionNamesInData) { - memcpy(this->Version, Version, 4); - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -78,6 +101,8 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteWriteoutFunctionListFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -88,23 +113,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - char Version[4]; - bool UseExtraChecksum; - bool NoRedZone; - bool NoFunctionNamesInData; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -115,13 +139,14 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - const char (&Version)[4], - bool UseExtraChecksum, - bool NoRedZone, - bool NoFunctionNamesInData) { - return new GCOVProfiler(EmitNotes, EmitData, Version, UseExtraChecksum, - NoRedZone, NoFunctionNamesInData); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); +} + +static std::string getFunctionName(DISubprogram SP) { + if (!SP.getLinkageName().empty()) + return SP.getLinkageName(); + return SP.getName(); } namespace { @@ -260,7 +285,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseExtraChecksum) { + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -272,16 +297,16 @@ namespace { ReturnBlock = new GCOVBlock(i++, os); writeBytes(FunctionTag, 4); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum - writeGCOVString(SP.getName()); + writeGCOVString(getFunctionName(SP)); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); } @@ -363,12 +388,12 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -382,7 +407,7 @@ void GCOVProfiler::emitGCNO() { raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); out.write("oncg", 4); - out.write(Version, 4); + out.write(ReversedVersion, 4); out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); @@ -392,7 +417,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, i, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -522,8 +547,38 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Type *Params[] = { + PointerType::get(FTy, 0), + PointerType::get(FTy, 0) + }; + FTy = FunctionType::get(Builder.getVoidTy(), Params, false); + + // Inialize the environment and register the local writeout and flush + // functions. + Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); + Builder.CreateCall2(GCOVInit, WriteoutF, FlushF); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -619,6 +674,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() { return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy); +} + +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -637,7 +702,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -646,7 +711,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -664,15 +729,15 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(CU, "gcda"); Builder.CreateCall2(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda), - Builder.CreateGlobalStringPtr(Version)); + Builder.CreateGlobalStringPtr(ReversedVersion)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { DISubprogram SP(CountersBySP[j].second); - Builder.CreateCall3(EmitFunction, - Builder.getInt32(j), - NoFunctionNamesInData ? - Constant::getNullValue(Builder.getInt8PtrTy()) : - Builder.CreateGlobalStringPtr(SP.getName()), - Builder.getInt8(UseExtraChecksum)); + Builder.CreateCall3( + EmitFunction, Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(getFunctionName(SP)) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = @@ -684,29 +749,9 @@ void GCOVProfiler::insertCounterWriteout( Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Builder.getInt32Ty(), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -715,7 +760,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); // Create basic blocks for function. @@ -760,18 +805,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -796,8 +841,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index fce6513a97..4e75904ded 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given patter"), cl::Hidden, cl::init(0xff)); +static cl::opt<bool> ClPoisonUndef("msan-poison-undef", + cl::desc("poison undef temps"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), @@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Clean shadow (all zeroes) means all bits of the value are defined /// (initialized). - Value *getCleanShadow(Value *V) { + Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) return 0; @@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return ConstantStruct::get(ST, Vals); } + /// \brief Create a dirty shadow for a given value. + Constant *getPoisonedShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return getPoisonedShadow(ShadowTy); + } + /// \brief Create a clean (zero) origin. Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); @@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return Shadow; } if (UndefValue *U = dyn_cast<UndefValue>(V)) { - Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V); DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); (void)U; return AllOnes; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447f1c..129af8d45d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d646..5d02c68a7a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -64,10 +65,16 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +85,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -87,6 +111,7 @@ namespace { virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); + virtual bool doFinalization(Module &M); const char *getPassName() const { return "Merge internal globals"; @@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } @@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) { return false; } +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + Pass *llvm::createGlobalMergePass(const TargetLowering *tli) { return new GlobalMerge(tli); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 97fff7e782..8e76c78f5a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; + // Computing the value outside of the loop brings no benefit if : + // - it is definitely used inside the loop in a way which can not be + // optimized away. + // - no use outside of the loop can take advantage of hoisting the + // computation out of the loop + if (ExitValue->getSCEVType()>=scMulExpr) { + unsigned NumHardInternalUses = 0; + unsigned NumSoftExternalUses = 0; + unsigned NumUses = 0; + for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); + IB!=IE && NumUses<=6 ; ++IB) { + Instruction *UseInstr = cast<Instruction>(*IB); + unsigned Opc = UseInstr->getOpcode(); + NumUses++; + if (L->contains(UseInstr)) { + if (Opc == Instruction::Call || Opc == Instruction::Ret) + NumHardInternalUses++; + } else { + if (Opc == Instruction::PHI) { + // Do not count the Phi as a use. LCSSA may have inserted + // plenty of trivial ones. + NumUses--; + for (Value::use_iterator PB=UseInstr->use_begin(), + PE=UseInstr->use_end(); + PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { + unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode(); + if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) + NumSoftExternalUses++; + } + continue; + } + if (Opc != Instruction::Call && Opc != Instruction::Ret) + NumSoftExternalUses++; + } + } + if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses) + continue; + } + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9c67e327e2..0b62050b17 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -34,13 +34,9 @@ namespace { } // Possibly eliminate loop L if it is dead. - bool runOnLoop(Loop* L, LPPassManager& LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM); - bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, - bool &Changed, BasicBlock *Preheader); - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); AU.addRequired<ScalarEvolution>(); @@ -53,6 +49,12 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); } + + private: + bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, + bool &Changed, BasicBlock *Preheader); + }; } @@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) -Pass* llvm::createLoopDeletionPass() { +Pass *llvm::createLoopDeletionPass() { return new LoopDeletion(); } -/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// isLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. -bool LoopDeletion::IsLoopDead(Loop* L, - SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, +bool LoopDeletion::isLoopDead(Loop *L, + SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop @@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L, // sufficient to guarantee that no loop-variant values are used outside // of the loop. BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]); // Make sure all exiting blocks produce the same incoming value for the exit // block. If there are different incoming values for different exiting // blocks, then it is impossible to statically determine which value should // be used. - for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) { if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - if (Instruction* I = dyn_cast<Instruction>(incoming)) + if (Instruction *I = dyn_cast<Instruction>(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; @@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L, /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. -bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { +bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. - BasicBlock* preheader = L->getLoopPreheader(); + BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; @@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); + ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { SE.forgetLoop(L); // Connect the preheader directly to the exit block. - TerminatorInst* TI = preheader->getTerminator(); + TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. - BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { + while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); @@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. - DominatorTree& DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { @@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. - LoopInfo& loopInfo = getAnalysis<LoopInfo>(); + LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4e4cb86464..9562cf8d5d 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, } if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); - if (isLoser()) + if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } @@ -2716,6 +2716,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // by LSR. const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); + // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); Value *IVSrc = 0; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 810a553c74..25306c2681 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -57,11 +57,15 @@ using namespace llvm; STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); -STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); -STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); +STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); +STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); +STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); +STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); +STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); +STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); -STATISTIC(NumDeleted, "Number of instructions deleted"); -STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumDeleted, "Number of instructions deleted"); +STATISTIC(NumVectorized, "Number of vectorized aggregates"); /// Hidden option to force the pass to not use DomTree and mem2reg, instead /// forming SSA values through the SSAUpdater infrastructure. @@ -69,112 +73,167 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { +/// \brief A custom IRBuilder inserter which prefixes all names if they are +/// preserved. +template <bool preserveNames = true> +class IRBuilderPrefixedInserter : + public IRBuilderDefaultInserter<preserveNames> { + std::string Prefix; + public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; + void SetNamePrefix(const Twine &P) { Prefix = P.str(); } - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; +protected: + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<preserveNames>::InsertHelper( + I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); + } +}; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} +// Specialization for not preserving the name is trivial. +template <> +class IRBuilderPrefixedInserter<false> : + public IRBuilderDefaultInserter<false> { +public: + void SetNamePrefix(const Twine &P) {} +}; - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } +/// \brief Provide a typedef for IRBuilder that drops names in release builds. +#ifndef NDEBUG +typedef llvm::IRBuilder<true, ConstantFolder, + IRBuilderPrefixedInserter<true> > IRBuilderTy; +#else +typedef llvm::IRBuilder<false, ConstantFolder, + IRBuilderPrefixedInserter<false> > IRBuilderTy; +#endif +} - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } +namespace { +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief A partition of an alloca. + /// \brief Support for ordering ranges. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } + + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; + +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. + /// + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } + + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; + +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} - /// \brief A particular use of a partition of the alloca. + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} + +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +515,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +533,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +555,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +566,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +858,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +873,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +889,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) splitAndMergePartitions(); } + // Record how many partitions we end up with. + NumAllocaPartitions += Partitions.size(); + MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); + // Now build up the user lists for each of these disjoint partitions by // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); @@ -1084,22 +1149,31 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) PtrI = UB.visitPtr(AI); assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); + + unsigned NumUses = 0; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) + NumUses += Uses[Idx].size(); +#endif + NumAllocaPartitionUses += NumUses; + MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); } Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); else return 0; // Bail if we have weird uses. @@ -1139,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1374,11 +1449,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1472,7 +1547,7 @@ private: assert(!Loads.empty()); Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); - IRBuilder<> PHIBuilder(&PN); + IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -1495,7 +1570,7 @@ private: TerminatorInst *TI = Pred->getTerminator(); Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); Value *InVal = PN.getIncomingValue(Idx); - IRBuilder<> PredBuilder(TI); + IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + @@ -1522,8 +1597,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1576,10 +1651,10 @@ private: if (!isSafeSelectToSpeculate(SI, Loads)) return; - IRBuilder<> IRB(&SI); + IRBuilderTy IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1590,7 +1665,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1622,8 +1697,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1640,9 +1715,8 @@ private: /// /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { +static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, + SmallVectorImpl<Value *> &Indices) { if (Indices.empty()) return BasePtr; @@ -1651,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); + return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1663,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, /// TargetTy. If we can't find one with the same type, we at least try to use /// one with the same size. If none of that works, we just produce the GEP as /// indicated by Indices to have the correct offset. -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, Value *BasePtr, Type *Ty, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Ty == TargetTy) - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); // See if we can descend into a struct and locate a field with the correct // type. @@ -1695,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, if (ElementTy != TargetTy) Indices.erase(Indices.end() - NumLayers, Indices.end()); - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); } /// \brief Recursively compute indices for a natural GEP. /// /// This is the recursive step for getNaturalGEPWithOffset that walks down the /// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Offset == 0) - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); // We can't recurse through pointer types. if (Ty->isPointerTy()) @@ -1728,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, Prefix); + Offset, TargetTy, Indices); } if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { @@ -1741,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } StructType *STy = dyn_cast<StructType>(Ty); @@ -1760,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Get a natural GEP from a base pointer to a particular offset and @@ -1773,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, /// Indices, and setting Ty to the result subtype. /// /// If no natural GEP can be constructed, this function returns null. -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, APInt Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { PointerType *Ty = cast<PointerType>(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1795,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the @@ -1813,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. -static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, - Value *Ptr, APInt Offset, Type *PointerTy, - const Twine &Prefix) { +static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, + Value *Ptr, APInt Offset, Type *PointerTy) { // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; @@ -1849,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, // See if we can perform a natural GEP here. Indices.clear(); if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, - Indices, Prefix)) { + Indices)) { if (P->getType() == PointerTy) { // Zap any offset pointer that we ended up computing in previous rounds. if (OffsetPtr && OffsetPtr->use_empty()) @@ -1884,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, if (!OffsetPtr) { if (!Int8Ptr) { Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), - Prefix + ".raw_cast"); + "raw_cast"); Int8PtrOffset = Offset; } OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), - Prefix + ".raw_idx"); + "raw_idx"); } Ptr = OffsetPtr; // On the off chance we were targeting i8*, guard the bitcast here. if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); + Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); return Ptr; } @@ -1910,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1932,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { /// This will try various different casting techniques, such as bitcasts, /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test /// two types for viability with this routine. -static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *Ty) { assert(canConvertValue(DL, V->getType(), Ty) && "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1976,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1996,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2062,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2073,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2088,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2104,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2124,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, return WholeAllocaOp; } -static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { DEBUG(dbgs() << " start: " << *V << "\n"); @@ -2147,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, return V; } -static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, +static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name) { IntegerType *IntTy = cast<IntegerType>(Old->getType()); IntegerType *Ty = cast<IntegerType>(V->getType()); @@ -2178,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } -static Value *extractVector(IRBuilder<> &IRB, Value *V, +static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(V->getType()); @@ -2206,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V, return V; } -static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, +static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(Old->getType()); assert(VecTy && "Can only insert a vector into a vector"); @@ -2296,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; - // The name prefix to use when rewriting instructions for this alloca. - std::string NamePrefix; + // Utility IR builder, whose name prefix is setup for each visited use, and + // the insertion point is set to point to the user. + IRBuilderTy IRB; public: AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, @@ -2313,7 +2395,8 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), + IRB(NewAI.getContext(), ConstantFolder()) { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2335,14 +2418,21 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); - NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); + + Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); + IRB.SetInsertPoint(OldUserI); + IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); + + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2364,14 +2454,10 @@ private: llvm_unreachable("No rewrite rule for this instruction!"); } - Twine getName(const Twine &Suffix) { - return NamePrefix + Suffix; - } - - Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { + Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); - return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); + return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); } /// \brief Compute suitable alignment to access an offset into the new alloca. @@ -2421,27 +2507,27 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { + Value *rewriteVectorizedLoadInst() { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); + "load"); + return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); } - Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; if (Offset > 0 || EndOffset < NewAllocaEndOffset) V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); + "extract"); return V; } @@ -2451,56 +2537,37 @@ private: assert(OldOp == OldPtr); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - IRBuilder<> IRB(&LI); - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB); + V = rewriteVectorizedLoadInst(); } else if (IntTy && LI.getType()->isIntegerTy()) { - V = rewriteIntegerLoad(IRB, LI); + V = rewriteIntegerLoad(LI); } else if (BeginOffset == NewAllocaBeginOffset && canConvertValue(TD, NewAllocaTy, LI.getType())) { V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); } else { Type *LTy = TargetTy->getPointerTo(); V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), getPartitionTypeAlign(TargetTy), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); IsPtrAdjusted = true; } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2510,7 +2577,7 @@ private: Value *Placeholder = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, - getName(".insert")); + "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; @@ -2524,7 +2591,7 @@ private: return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); @@ -2539,8 +2606,8 @@ private: // Mix in the existing elements. Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + "load"); + V = insertVector(IRB, Old, V, BeginIndex, "vec"); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2550,17 +2617,17 @@ private: return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { + bool rewriteIntegerStore(Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, - getName(".insert")); + "insert"); } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); @@ -2574,7 +2641,6 @@ private: DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); assert(OldOp == OldPtr); - IRBuilder<> IRB(&SI); Value *V = SI.getValueOperand(); @@ -2587,23 +2653,21 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, - getName(".extract")); + "extract"); } if (VecTy) - return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + return rewriteVectorizedStoreInst(V, SI, OldOp); if (IntTy && V->getType()->isIntegerTy()) - return rewriteIntegerStore(IRB, V, SI); + return rewriteIntegerStore(V, SI); StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && @@ -2634,7 +2698,7 @@ private: /// /// \param V The i8 value to splat. /// \param Size The number of bytes in the output (assuming i8 is one byte) - Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + Value *getIntegerSplat(Value *V, unsigned Size) { assert(Size > 0 && "Expected a positive number of bytes."); IntegerType *VTy = cast<IntegerType>(V->getType()); assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); @@ -2642,26 +2706,25 @@ private: return V; Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), ConstantExpr::getUDiv( Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), SplatIntTy)), - getName(".isplat")); + "isplat"); return V; } /// \brief Compute a vector splat for a given element value. - Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { - V = IRB.CreateVectorSplat(NumElements, V, NamePrefix); + Value *getVectorSplat(Value *V, unsigned NumElements) { + V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); DEBUG(dbgs() << " splat: " << *V << "\n"); return V; } bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawDest() == OldPtr); // If the memset has a variable size, it cannot be split, just adjust the @@ -2718,31 +2781,31 @@ private: unsigned NumElements = EndIndex - BeginIndex; assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Value *Splat = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ElementTy)/8); + Value *Splat = + getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); Splat = convertValue(TD, IRB, Splat, ElementTy); if (NumElements > 1) - Splat = getVectorSplat(IRB, Splat, NumElements); + Splat = getVectorSplat(Splat, NumElements); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + "oldload"); + V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); } else if (IntTy) { // If this is a memset on an alloca where we can widen stores, insert the // set integer. assert(!II.isVolatile()); uint64_t Size = EndOffset - BeginOffset; - V = getIntegerSplat(IRB, II.getValue(), Size); + V = getIntegerSplat(II.getValue(), Size); if (IntTy && (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaBeginOffset)) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + V = insertInteger(TD, IRB, Old, V, Offset, "insert"); } else { assert(V->getType() == IntTy && "Wrong type for an alloca wide integer!"); @@ -2753,10 +2816,9 @@ private: assert(BeginOffset == NewAllocaBeginOffset); assert(EndOffset == NewAllocaEndOffset); - V = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ScalarTy)/8); + V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) - V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + V = getVectorSplat(V, AllocaVecTy->getNumElements()); V = convertValue(TD, IRB, V, AllocaTy); } @@ -2773,7 +2835,6 @@ private: // them into two categories: split intrinsics and unsplit intrinsics. DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); bool IsDest = II.getRawDest() == OldPtr; @@ -2857,8 +2918,7 @@ private: // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() @@ -2901,8 +2961,7 @@ private: OtherPtrTy = SubIntTy->getPointerTo(); } - Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); @@ -2910,31 +2969,31 @@ private: Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); + "load"); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); Src = convertValue(TD, IRB, Src, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract")); + Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), - getName(".copyload")); + "copyload"); } if (VecTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + "oldload"); + Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert")); + Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); Src = convertValue(TD, IRB, Src, NewAllocaTy); } @@ -2949,7 +3008,6 @@ private: assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. @@ -2977,7 +3035,9 @@ private: // as local as possible to the PHI. To do that, we re-use the location of // the old pointer, which necessarily must be in the right position to // dominate the PHI. - IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr)); + IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); + PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); // Replace the operands which were using the old pointer. @@ -2990,7 +3050,6 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // Find the operand we need to rewrite here. bool IsTrueVal = SI.getTrueValue() == OldPtr; @@ -3065,7 +3124,7 @@ private: class OpSplitter { protected: /// The builder used to form new instructions. - IRBuilder<> IRB; + IRBuilderTy IRB; /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector<unsigned, 4> Indices; @@ -3380,7 +3439,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 916b37d4a8..3514e6c2aa 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! @@ -35,7 +34,6 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); //===----------------------------------------------------------------------===// // Optimizer Base Class @@ -91,8 +89,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - - bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID) { @@ -104,14 +100,6 @@ namespace { void InitOptimizations(); bool runOnFunction(Function &F); - void setDoesNotAccessMemory(Function &F); - void setOnlyReadsMemory(Function &F); - void setDoesNotThrow(Function &F); - void setDoesNotCapture(Function &F, unsigned n); - void setDoesNotAlias(Function &F, unsigned n); - bool doInitialization(Module &M); - - void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfo>(); } @@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { return Changed; } -// Utility methods for doInitialization. - -void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - Modified = true; - } -} - - -void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "stpncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } -} - -/// doInitialization - Add attributes to well-known functions. -/// -bool SimplifyLibCalls::doInitialization(Module &M) { - Modified = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; - if (F.isDeclaration() && F.hasName()) - inferPrototypeAttributes(F); - } - return Modified; -} - // TODO: // Additional cases that we need to add to this file: // diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d2598a221..3e1022ef8c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -82,7 +82,7 @@ namespace { /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. - void forwardResume(ResumeInst *RI); + void forwardResume(ResumeInst *RI, BasicBlock *FirstNewBlock); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -140,8 +140,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// block. When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI) { +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + BasicBlock *FirstNewBlock) { BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -152,6 +154,36 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + Function *Caller = FirstNewBlock->getParent(); + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + LandingPadInst *LPI = II->getLandingPadInst(); + if (!LPI->hasCatchAll()) + InlinedLPads.insert(LPI); + } + + // Merge the catch clauses from the outer landing pad instruction into the + // inlined landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) { + bool hasClause = false; + if (OuterLPad->isFilter(OuterIdx)) continue; + Value *OuterClause = OuterLPad->getClause(OuterIdx); + for (unsigned Idx = 0, N = InlinedLPad->getNumClauses(); Idx != N; ++Idx) + if (OuterClause == InlinedLPad->getClause(Idx)) { + hasClause = true; + break; + } + if (!hasClause) + InlinedLPad->addClause(OuterClause); + } + } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -229,19 +261,9 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. If the code doesn't have calls or unwinds, we know there is - // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls) { - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); - return; - } - + // rewrite. InvokeInliningInfo Invoke(II); - + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { @@ -250,13 +272,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, continue; } + // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI); + Invoke.forwardResume(RI, FirstNewBlock); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the + // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 07dd453424..930d9c412f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); |