diff options
author | Chris Lattner <sabre@nondot.org> | 2010-11-21 00:28:59 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-11-21 00:28:59 +0000 |
commit | 2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3 (patch) | |
tree | 0368003df16ef9b625afc9cdca10357bf6f22268 /lib/Transforms/Scalar/MemCpyOptimizer.cpp | |
parent | a6fd81dd7f6039fbc1a55f6f4d45659fffdd81fb (diff) |
Implement PR8644: forwarding a memcpy value to a byval,
allowing the memcpy to be eliminated.
Unfortunately, the requirements on byval's without explicit
alignment are really weak and impossible to predict in the
mid-level optimizer, so this doesn't kick in much with current
frontends. The fix is to change clang to set alignment on all
byval arguments.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119916 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 171 |
1 files changed, 127 insertions, 44 deletions
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6f53c322ef..4f1cfd622f 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -301,11 +301,13 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { namespace { class MemCpyOpt : public FunctionPass { + MemoryDependenceAnalysis *MD; bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); + MD = 0; } private: @@ -327,6 +329,7 @@ namespace { uint64_t cpyLen, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize); + bool processByValArgument(CallSite CS, unsigned ArgNo); bool iterateOnFunction(Function &F); }; @@ -359,9 +362,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (!LI->isVolatile() && LI->hasOneUse()) { - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - - MemDepResult dep = MD.getDependency(LI); + MemDepResult dep = MD->getDependency(LI); CallInst *C = 0; if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst())) C = dyn_cast<CallInst>(dep.getInst()); @@ -372,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LI->getPointerOperand()->stripPointerCasts(), TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); if (changed) { - MD.removeInstruction(SI); + MD->removeInstruction(SI); SI->eraseFromParent(); LI->eraseFromParent(); ++NumMemCpyInstr; @@ -505,8 +506,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - dbgs() << *Range.TheStores[i]; - dbgs() << "With: " << *C); C=C; + dbgs() << *Range.TheStores[i] << '\n'; + dbgs() << "With: " << *C << '\n'); C=C; // Don't invalidate the iterator BBI = BI; @@ -657,11 +658,10 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - MD.removeInstruction(C); + MD->removeInstruction(C); - // Remove the memcpy - MD.removeInstruction(cpy); + // Remove the memcpy. + MD->removeInstruction(cpy); ++NumMemCpyInstr; return true; @@ -675,7 +675,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize) { // We can only transforms memcpy's where the dest of one is the source of the // other. - if (M->getSource() != MDep->getDest()) + if (M->getSource() != MDep->getDest() || MDep->isVolatile()) return false; // Second, the length of the memcpy's must be the same, or the preceeding one @@ -684,27 +684,26 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (!C1) return false; uint64_t DepSize = C1->getValue().getZExtValue(); - if (DepSize < MSize) return false; - Intrinsic::ID ResultFn = Intrinsic::memcpy; + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + Intrinsic::ID ResultFn = Intrinsic::memcpy; if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize)) ResultFn = Intrinsic::memmove; - // If all checks passed, then we can transform these memcpy's + // If all checks passed, then we can transform M. const Type *ArgTys[3] = { M->getRawDest()->getType(), MDep->getRawSource()->getType(), M->getLength()->getType() }; Function *MemCpyFun = - Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(), + Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(), ResultFn, ArgTys, 3); // Make sure to use the lesser of the alignment of the source and the dest @@ -717,29 +716,30 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, M->getRawDest(), MDep->getRawSource(), M->getLength(), - ConstantInt::get(Type::getInt32Ty(M->getContext()), Align), + ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align), M->getVolatileCst() }; CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M); - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - // Verify that the copied-from memory doesn't change in between the two // transfers. For example, in: // memcpy(a <- b) // *b = 42; // memcpy(c <- a) // It would be invalid to transform the second memcpy into memcpy(c <- b). - MemDepResult NewDep = MD.getDependency(C); + // + // TODO: If the code between M and MDep is transparent to the destination "c", + // then we could still perform the xform by moving M up to the first memcpy. + MemDepResult NewDep = MD->getDependency(C); if (!NewDep.isClobber() || NewDep.getInst() != MDep) { - MD.removeInstruction(C); + MD->removeInstruction(C); C->eraseFromParent(); return false; } // Otherwise we're good! Nuke the instruction we're replacing. - MD.removeInstruction(M); + MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; return true; @@ -752,25 +752,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, /// circumstances). This allows later passes to remove the first memcpy /// altogether. bool MemCpyOpt::processMemCpy(MemCpyInst *M) { - MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); - - // We can only optimize statically-sized memcpy's. - ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength()); - if (!cpyLen) return false; + // We can only optimize statically-sized memcpy's that are non-volatile. + ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); + if (CopySize == 0 || M->isVolatile()) return false; // The are two possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. - MemDepResult dep = MD.getDependency(M); - if (!dep.isClobber()) + MemDepResult DepInfo = MD->getDependency(M); + if (!DepInfo.isClobber()) return false; - if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(dep.getInst())) - return processMemCpyMemCpyDependence(M, MDep, cpyLen->getZExtValue()); + if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); - if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) { + if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(), - cpyLen->getZExtValue(), C); + CopySize->getZExtValue(), C); if (changed) M->eraseFromParent(); return changed; } @@ -805,33 +803,116 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. - getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M); + MD->removeInstruction(M); ++NumMoveToCpy; return true; } +/// processByValArgument - This is called on every byval argument in call sites. +bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) return false; -// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN. + Value *ByValArg = CS.getArgument(ArgNo); + + // MemDep doesn't have a way to do a local query with a memory location. + // Instead, just insert a load and ask for its dependences. + LoadInst *TmpLoad = new LoadInst(ByValArg, "", CS.getInstruction()); + MemDepResult DepInfo = MD->getDependency(TmpLoad); + + MD->removeInstruction(TmpLoad); + TmpLoad->eraseFromParent(); + + if (!DepInfo.isClobber()) + return false; + + // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, see if we can byval from the source of the memcpy instead of the + // result. + MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); + if (MDep == 0 || MDep->isVolatile() || + ByValArg->stripPointerCasts() != MDep->getDest()) + return false; + + // The length of the memcpy must be larger or equal to the size of the byval. + // must be larger than the following one. + ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); + if (C1 == 0 || + C1->getValue().getZExtValue() < TD->getTypeAllocSize(ByValArg->getType())) + return false; + + // Get the alignment of the byval. If it is greater than the memcpy, then we + // can't do the substitution. If the call doesn't specify the alignment, then + // it is some target specific value that we can't know. + unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) + return false; + + // Verify that the copied-from memory doesn't change in between the memcpy and + // the byval call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + Value *TmpCast = MDep->getSource(); + if (MDep->getSource()->getType() != ByValArg->getType()) + TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), + "tmpcast", CS.getInstruction()); + Value *TmpVal = + UndefValue::get(cast<PointerType>(TmpCast->getType())->getElementType()); + Instruction *TmpStore = new StoreInst(TmpVal, TmpCast, false, + CS.getInstruction()); + DepInfo = MD->getDependency(TmpStore); + bool isUnsafe = !DepInfo.isClobber() || DepInfo.getInst() != MDep; + MD->removeInstruction(TmpStore); + TmpStore->eraseFromParent(); + + if (isUnsafe) { + // Clean up the inserted cast instruction. + if (TmpCast != MDep->getSource()) + cast<Instruction>(TmpCast)->eraseFromParent(); + return false; + } + + DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" + << " " << *MDep << "\n" + << " " << *CS.getInstruction() << "\n"); + + // Otherwise we're good! Update the byval argument. + CS.setArgument(ArgNo, TmpCast); + ++NumMemCpyInstr; + return true; +} + +/// iterateOnFunction - Executes one iteration of MemCpyOpt. bool MemCpyOpt::iterateOnFunction(Function &F) { bool MadeChange = false; // Walk all instruction in the function. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE;) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { // Avoid invalidating the iterator. Instruction *I = BI++; + bool RepeatInstruction = false; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) MadeChange |= processStore(SI, BI); - else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) - MadeChange |= processMemCpy(M); - else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) { - if (processMemMove(M)) { - --BI; // Reprocess the new memcpy. - MadeChange = true; - } + else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) { + RepeatInstruction = processMemCpy(M); + } else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) { + RepeatInstruction = processMemMove(M); + } else if (CallSite CS = (Value*)I) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.paramHasAttr(i+1, Attribute::ByVal)) + MadeChange |= processByValArgument(CS, i); + } + + // Reprocess the instruction if desired. + if (RepeatInstruction) { + --BI; + MadeChange = true; } } } @@ -844,12 +925,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { // bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; + MD = &getAnalysis<MemoryDependenceAnalysis>(); while (1) { if (!iterateOnFunction(F)) break; MadeChange = true; } + MD = 0; return MadeChange; } |