diff options
Diffstat (limited to 'lib/Transforms')
19 files changed, 582 insertions, 252 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index b94dd69deb..10f5b6e658 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -592,14 +592,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *RetTy = FTy->getReturnType(); - // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which - // have zero fixed arguments. - bool ExtraArgHack = false; - if (Params.empty() && FTy->isVarArg()) { - ExtraArgHack = true; - Params.push_back(Type::getInt32Ty(F->getContext())); - } - // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); @@ -711,9 +703,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } } - if (ExtraArgHack) - Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); @@ -870,16 +859,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Increment I2 past all of the arguments added for this promoted pointer. - for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) - ++I2; + std::advance(I2, ArgIndices.size()); } - // Notify the alias analysis implementation that we inserted a new argument. - if (ExtraArgHack) - AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), - NF->arg_begin()); - - // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index fd23a935b9..c7429c5954 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -717,9 +717,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) - RAttrs &= ~Attribute::typeIncompatible(NRetTy); + RAttrs &= ~Attributes::typeIncompatible(NRetTy); else - assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0 && "Return attributes no longer compatible?"); if (RAttrs) @@ -786,7 +786,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes RAttrs = CallPAL.getRetAttributes(); Attributes FnAttrs = CallPAL.getFnAttributes(); // Adjust in case the function was changed to return void. - RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType()); + RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType()); if (RAttrs) AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index b888e95982..b1ba6be5ff 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -962,7 +962,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // If we get here we could have other crazy uses that are transitively // loaded. assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || - isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || + isa<BitCastInst>(GlobalUser) || + isa<GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"); } } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 664ddf6f7a..42f0991360 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { /// \brief Minimal filter to detect invalid constructs for inlining. static bool isInlineViable(Function &F) { - bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice); + bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr(); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain an indirect branch. if (isa<IndirectBrInst>(BI->getTerminator())) @@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { if (Callee->isDeclaration()) return InlineCost::getNever(); // Return never for anything not marked as always inline. - if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + if (!Callee->getFnAttributes().hasAlwaysInlineAttr()) return InlineCost::getNever(); // Do some minimal analysis to preclude non-viable functions. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index a9263baa44..7932b40bdc 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -93,10 +93,10 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. - if (Callee->hasFnAttr(Attribute::StackProtectReq)) + if (Callee->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->hasFnAttr(Attribute::StackProtect) && - !Caller->hasFnAttr(Attribute::StackProtectReq)) + else if (Callee->getFnAttributes().hasStackProtectAttr() && + !Caller->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we @@ -209,7 +209,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && - Caller->hasFnAttr(Attribute::OptimizeForSize); + Caller->getFnAttributes().hasOptimizeForSizeAttr(); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; @@ -217,7 +217,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // Listen to the inlinehint attribute when it would increase the threshold. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint); + Callee->getFnAttributes().hasInlineHintAttr(); if (InlineHint && HintThreshold > thres) thres = HintThreshold; @@ -533,7 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline)) + if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr()) continue; // If the only remaining users of the function are dead constants, remove diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index c81b333813..9e328b9ac9 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -211,13 +211,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes - // GlobalOpt already deletes dead functions and globals, at -O3 try a + // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. - if (OptLevel > 2) + if (OptLevel > 1) { MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - - if (OptLevel > 1) MPM.add(createConstantMergePass()); // Merge dup global constants + } } addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 23c08699ff..ac30dcdcbf 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + if (RAttrs & Attributes::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } @@ -1067,7 +1067,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attribute::typeIncompatible(ParamTy)) + if (Attrs & Attributes::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1141,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + RAttrs &= ~Attributes::typeIncompatible(NewRetTy); // Add the new return attributes. if (RAttrs) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5b6cf4a4a8..a446e427e5 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -264,26 +264,28 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - EraseInstFromFunction(*ToDelete[i]); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); - EraseInstFromFunction(*Copy); - ++NumGlobalCopies; - return NewI; + if (TD) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 291e80019e..0ba7340e64 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -903,7 +903,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -912,6 +912,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, V); return &SI; } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { + // Form a shufflevector instruction. + SmallVector<Constant *, 8> Mask(VWidth); + Type *Int32Ty = Type::getInt32Ty(CV->getContext()); + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elem = cast<Constant>(CV->getOperand(i)); + if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) + Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); + else if (isa<UndefValue>(Elem)) + Mask[i] = UndefValue::get(Int32Ty); + else + return 0; + } + Constant *MaskVal = ConstantVector::get(Mask); + Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); + return ReplaceInstUsesWith(SI, V); + } + + if (isa<ConstantAggregateZero>(CondVal)) { + return ReplaceInstUsesWith(SI, FalseVal); + } } return 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index afa6a4b5e6..1b102bd243 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -854,7 +854,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttr(Attribute::AddressSafety)) return false; + if (!F.getFnAttributes().hasAddressSafetyAttr()) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 495cdc6321..305d70f27b 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); - OptSize = F.hasFnAttr(Attribute::OptimizeForSize); + OptSize = F.getFnAttributes().hasOptimizeForSizeAttr(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); - if (!SinglePred || SinglePred == BB) continue; + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { @@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && pred_begin(BB) == pred_end(BB)) + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 9b0aadb0b5..3ec6f3dcc3 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { // This case never fires - remove it. CI.getCaseSuccessor()->removePredecessor(BB); SI->removeCase(CI); // Does not invalidate the iterator. + + // The condition can be modified by removePredecessor's PHI simplification + // logic. + Cond = SI->getCondition(); + ++NumDeadCases; Changed = true; } else if (State == LazyValueInfo::True) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1ff4329c84..301ee2f663 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SetVector.h" @@ -45,6 +46,7 @@ namespace { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { @@ -55,6 +57,7 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TLI = AA->getTargetLibraryInfo(); bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) @@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I, /// hasMemoryWrite - Does this instruction write some memory? This only returns /// true for things that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I) { +static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) { return true; } } + if (CallSite CS = I) { + if (Function *F = CS.getCalledFunction()) { + if (TLI && TLI->has(LibFunc::strcpy) && + F->getName() == TLI->getName(LibFunc::strcpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncpy) && + F->getName() == TLI->getName(LibFunc::strncpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strcat) && + F->getName() == TLI->getName(LibFunc::strcat)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncat) && + F->getName() == TLI->getName(LibFunc::strncat)) { + return true; + } + } + } return false; } @@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// instruction if any. static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { - assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) && + "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->isUnordered(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. - return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - // Don't remove volatile memory intrinsics. - return !cast<MemIntrinsic>(II)->isVolatile(); + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } } + + if (CallSite CS = I) + return CS.getInstruction()->use_empty(); + + return false; } @@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) { if (isa<StoreInst>(I)) return false; - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } } + + // Don't shorten libcalls calls for now. + + return false; } /// getStoredPointerOperand - Return the pointer that is being written to. @@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getArgOperand(0); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getArgOperand(0); + } } + + CallSite CS = I; + // All the supported functions so far happen to have dest as their first + // argument. + return CS.getArgument(0); } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { @@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (CallInst *F = isFreeCall(Inst, TLI)) { MadeChange |= HandleFree(F); continue; } // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst)) + if (!hasMemoryWrite(Inst, TLI)) continue; MemDepResult InstDep = MD->getDependency(Inst); @@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(SI, *MD, TLI); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(DepWrite, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency)) break; Value *DepPointer = @@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(Dependency, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && - !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); @@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { + if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) + if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 09a186f7f9..f8709a537f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // not user specified. unsigned Threshold = CurrentThreshold; if (!UserThreshold && - Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr()) Threshold = OptSizeUnrollThreshold; // Find trip count and trip multiple if count is not available diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 58f7739888..74c8f43ec2 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // Check to see if it would be profitable to unswitch current loop. // Do not do non-trivial unswitch while optimizing for size. - if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr()) return false; UnswitchNontrivialCondition(LoopCond, Val, currentLoop); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e3182d319c..a8dc0533bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -202,11 +202,11 @@ public: use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } - void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) { - Uses[Idx].insert(UI, U); + void use_push_back(unsigned Idx, const PartitionUse &U) { + Uses[Idx].push_back(U); } - void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) { - Uses[I - begin()].insert(UI, U); + void use_push_back(const_iterator I, const PartitionUse &U) { + Uses[I - begin()].push_back(U); } void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); } void use_erase(const_iterator I, use_iterator UI) { @@ -522,8 +522,10 @@ private: void insertUse(Instruction &I, int64_t Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which don't overlap the allocation. - if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) || + // Completely skip uses which have a zero size or don't overlap the + // allocation. + if (Size == 0 || + (Offset >= 0 && (uint64_t)Offset >= AllocSize) || (Offset < 0 && (uint64_t)-Offset >= Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which starts past the end of the " << AllocSize @@ -660,11 +662,14 @@ private: bool Inserted = false; llvm::tie(PMI, Inserted) = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); - if (!Inserted && Offsets.IsSplittable) { + if (Offsets.IsSplittable && + (!Inserted || II.getRawSource() == II.getRawDest())) { // We've found a memory transfer intrinsic which refers to the alloca as - // both a source and dest. We refuse to split these to simplify splitting - // logic. If possible, SROA will still split them into separate allocas - // and then re-analyze. + // both a source and dest. This is detected either by direct equality of + // the operand values, or when we visit the intrinsic twice due to two + // different chains of values leading to it. We refuse to split these to + // simplify splitting logic. If possible, SROA will still split them into + // separate allocas and then re-analyze. Offsets.IsSplittable = false; P.Partitions[PMI->second].IsSplittable = false; P.Partitions[NewIdx].IsSplittable = false; @@ -697,6 +702,9 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + // If there are no loads or stores, the access is dead. We mark that as + // a size zero access. + Size = 0; do { Instruction *I, *UsedI; llvm::tie(UsedI, I) = Uses.pop_back_val(); @@ -824,9 +832,9 @@ private: } void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { - // If the use extends outside of the allocation, record it as a dead use - // for elimination later. - if ((uint64_t)Offset >= AllocSize || + // If the use has a zero size or extends outside of the allocation, record + // it as a dead use for elimination later. + if (Size == 0 || (uint64_t)Offset >= AllocSize || (Offset < 0 && (uint64_t)-Offset >= Size)) return markAsDead(User); @@ -853,7 +861,7 @@ private: PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset), std::min(I->EndOffset, EndOffset), &User, cast<Instruction>(*U)); - P.Uses[I - P.begin()].push_back(NewUse); + P.use_push_back(I, NewUse); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[std::make_pair(&User, U->get())] = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); @@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI) Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); UB(); - for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I) - std::stable_sort(use_begin(I), use_end(I)); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -1890,7 +1896,8 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { assert(IntPromotionTy && "Alloca is not an integer we can extract from"); - Value *V = IRB.CreateLoad(&NewAI, getName(".load")); + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; if (RelOffset) @@ -1906,7 +1913,7 @@ private: StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); if (Ty == IntPromotionTy) - return IRB.CreateStore(V, &NewAI); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && "Cannot insert a larger integer!"); @@ -1918,10 +1925,12 @@ private: APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) .shl(RelOffset*8); - Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")), + Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".oldload")), Mask, getName(".mask")); - return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI); + return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), + &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -1943,12 +1952,12 @@ private: Value *Result; if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result - = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")), - getIndex(IRB, BeginOffset), - getName(".extract")); + Result = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + getIndex(IRB, BeginOffset), getName(".extract")); } else { - Result = IRB.CreateLoad(&NewAI, getName(".load")); + Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); } if (Result->getType() != LI.getType()) Result = getValueCast(IRB, Result, LI.getType()); @@ -1983,6 +1992,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); + if (LI.getAlignment()) + LI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -1996,13 +2008,14 @@ private: BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) V = getValueCast(IRB, V, ElementTy); - V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { V = getValueCast(IRB, V, VecTy); } - StoreInst *Store = IRB.CreateStore(V, &NewAI); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); (void)Store; @@ -2033,6 +2046,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); + if (SI.getAlignment()) + SI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2048,6 +2064,15 @@ private: // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); + + Type *CstTy = II.getAlignmentCst()->getType(); + if (!NewAI.getAlignment()) + II.setAlignment(ConstantInt::get(CstTy, 0)); + else + II.setAlignment( + ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset))); + deleteIfTriviallyDead(OldPtr); return false; } @@ -2067,11 +2092,15 @@ private: !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); + unsigned Align = 1; + if (NewAI.getAlignment()) + Align = MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset); CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, II.getAlignment(), + II.getValue(), Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2109,11 +2138,13 @@ private: // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { - StoreInst *Store = IRB.CreateStore( - IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + StoreInst *Store = IRB.CreateAlignedStore( + IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".load")), + V, getIndex(IRB, BeginOffset), getName(".insert")), - &NewAI); + &NewAI, NewAI.getAlignment()); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2131,7 +2162,8 @@ private: assert(V->getType() == VecTy); } - Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile()); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); @@ -2164,6 +2196,13 @@ private: else II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); + Type *CstTy = II.getAlignmentCst()->getType(); + if (II.getAlignment() > 1) + II.setAlignment(ConstantInt::get( + CstTy, MinAlign(II.getAlignment(), + MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)))); + DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldOp); return false; @@ -2221,6 +2260,11 @@ private: OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, getName("." + OtherPtr->getName())); + unsigned Align = II.getAlignment(); + if (Align > 1) + Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), + MinAlign(II.getAlignment(), NewAI.getAlignment())); + // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI @@ -2236,8 +2280,7 @@ private: CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, - Size, II.getAlignment(), - II.isVolatile()); + Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -2251,22 +2294,25 @@ private: Value *Src; if (IsVectorElement && !IsDest) { // We have to extract rather than load. - Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr, - getName(".copyload")), - getIndex(IRB, BeginOffset), - getName(".copyextract")); + Src = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), + getIndex(IRB, BeginOffset), + getName(".copyextract")); } else { - Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload")); + Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), + getName(".copyload")); } if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), - Src, getIndex(IRB, BeginOffset), - getName(".insert")); + Src = IRB.CreateInsertElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + Src, getIndex(IRB, BeginOffset), + getName(".insert")); } - Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile()); + StoreInst *Store = cast<StoreInst>( + IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); @@ -2460,8 +2506,7 @@ private: else { AllocaPartitioning::PartitionUse OtherUse = *UI; OtherUse.User = Load; - P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse), - OtherUse); + P.use_push_back(PI, OtherUse); } } } @@ -2559,7 +2604,7 @@ private: LoadInst *OtherLoad = IsTrueVal ? FL : TL; assert(OtherUse.Ptr == OtherLoad->getOperand(0)); OtherUse.User = OtherLoad; - P.use_insert(PI, P.use_end(PI), OtherUse); + P.use_push_back(PI, OtherUse); } // Transfer alignment and TBAA info if present. @@ -2576,8 +2621,6 @@ private: LI->replaceAllUsesWith(V); Pass.DeadInsts.push_back(LI); } - if (PI != P.end()) - std::stable_sort(P.use_begin(PI), P.use_end(PI)); deleteIfTriviallyDead(OldPtr); return NewPtr == &NewAI; @@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, assert(PI == P.begin() && "Begin offset is zero on later partition"); NewAI = &AI; } else { - // FIXME: The alignment here is overly conservative -- we could in many - // cases get away with much weaker alignment constraints. - NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(), + unsigned Alignment = AI.getAlignment(); + if (!Alignment) { + // The minimum alignment which users can rely on when the explicit + // alignment is omitted or zero is that required by the ABI for this + // type. + Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); + } + Alignment = MinAlign(Alignment, PI->BeginOffset); + // If we will get at least this much alignment from the type alone, leave + // the alloca's alignment unconstrained. + if (Alignment <= TD->getABITypeAlignment(AllocaTy)) + Alignment = 0; + NewAI = new AllocaInst(AllocaTy, 0, Alignment, AI.getName() + ".sroa." + Twine(PI - P.begin()), &AI); ++NumNewAllocas; diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 9d630349ab..55227e2714 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -23,11 +23,69 @@ using namespace llvm; +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + /// Generate code to divide two signed integers. Returns the quotient, rounded -/// towards 0. Builder's insert point should be pointing at the sdiv -/// instruction. This will generate a udiv in the process, and Builder's insert -/// point will be pointing at the udiv (if present, i.e. not folded), ready to -/// be expanded if the user wishes. +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Implementation taken from compiler-rt's __divsi3 @@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, } /// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing at -/// the udiv instruction. +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, return Q_5; } +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code @@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), Builder); + Div->getOperand(1), Builder); Div->replaceAllUsesWith(Quotient); Div->dropAllReferences(); Div->eraseFromParent(); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 876ff2c337..065325b7c2 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -58,9 +58,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI, return true; } -/// BuildLookupTable - Build a lookup table with the contents of Results, using -/// DefaultResult to fill the holes in the table. If the table ends up -/// containing the same result in each element, set *SingleResult to that value -/// and return NULL. -static GlobalVariable *BuildLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, - Constant *DefaultResult, - Constant **SingleResult) { - assert(Results.size() && "Need values to build lookup table"); - assert(TableSize >= Results.size() && "Table needs to hold all values"); +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. - Constant *SameResult = Results.begin()->second; + SingleValue = Values.begin()->second; // Build up the table contents. - std::vector<Constant*> TableContents(TableSize); - for (size_t I = 0, E = Results.size(); I != E; ++I) { - ConstantInt *CaseVal = Results[I].first; - Constant *CaseRes = Results[I].second; - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); TableContents[Idx] = CaseRes; - if (CaseRes != SameResult) - SameResult = NULL; + if (CaseRes != SingleValue) + SingleValue = NULL; } // Fill in any holes in the table with the default result. - if (Results.size() < TableSize) { - for (unsigned i = 0; i < TableSize; ++i) { - if (!TableContents[i]) - TableContents[i] = DefaultResult; + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; } - if (DefaultResult != SameResult) - SameResult = NULL; + if (DefaultValue != SingleValue) + SingleValue = NULL; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; } - // Same result was used in the entire table; just return that. - if (SameResult) { - *SingleResult = SameResult; - return NULL; + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; } - ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, - "switch.table"); - GV->setUnnamedAddr(true); - return GV; + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetData *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() * 10 >= TableSize * 4) + return true; + + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetData* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases are too small. + // Ignore the switch if the number of cases is too small. // This is similar to the check when building jump tables in // SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Determine the best cut-off. @@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - // The table density should be at lest 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - // Be careful to avoid overlow in the density computation. - if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) - return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (SI->getNumCases() * 10 < TableSize * 4) + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) return false; - // Build the lookup tables. - SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; - SmallDenseMap<PHINode*, Constant*> SingleResults; - - Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - - Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, - ResultLists[PHI], DefaultResults[PHI], - &SingleResult); - SingleResults[PHI] = SingleResult; - } - // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), @@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); bool ReturnedEarly = false; - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - // There was a single result for this phi; just use that. - if (Constant *SingleResult = SingleResults[PHI]) { - PHI->addIncoming(SingleResult, LookupBB); - continue; - } + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); - Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; - Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, - "switch.gep"); - Value *Result = Builder.CreateLoad(GEP, "switch.load"); + Value *Result = Table.BuildLookup(TableIndex, Builder); // If the result is used to return immediately from the function, we want to // do that right here. @@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder)) + if (SwitchToLookupTable(SI, Builder, TD)) return SimplifyCFG(BB) | true; return false; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538db64..a30b09321b 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { |