diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-11-14 16:47:45 -0800 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-11-15 10:12:39 -0800 |
commit | 923f52fb3f6670e843ffe0b8da2f2bad898d752c (patch) | |
tree | a279587ac525162817217dbbe0f01a91893a9d0b /lib/Transforms | |
parent | f4099a3c92570a80cd9a3850cda598c5ea446b96 (diff) | |
parent | 08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb (diff) |
Merge commit '08e9cb46feb0c8e08e3d309a0f9fd75a04ca54fb'
(svn r167699, also the 3.2 branch point)
Conflicts:
lib/Target/X86/X86Subtarget.cpp
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 20 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 99 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 247 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 295 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 229 |
5 files changed, 566 insertions, 324 deletions
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index ccf75bca2b..9a46f25e66 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2367,6 +2367,24 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { return MadeIRChange; } +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(TD, TLI) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<DataLayout>(); @@ -2379,7 +2397,7 @@ bool InstCombiner::runOnFunction(Function &F) { InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - LibCallSimplifier TheSimplifier(TD, TLI); + InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this); Simplifier = &TheSimplifier; bool EverMadeChange = false; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c6244a55c9..9e10fc4416 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -97,6 +97,10 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanWrite[kNumberOfAccessSizes]; Function *TsanAtomicLoad[kNumberOfAccessSizes]; Function *TsanAtomicStore[kNumberOfAccessSizes]; + Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes]; + Function *TsanAtomicCAS[kNumberOfAccessSizes]; + Function *TsanAtomicThreadFence; + Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; }; } // namespace @@ -167,10 +171,42 @@ bool ThreadSanitizer::doInitialization(Module &M) { TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, NULL)); + + for (int op = AtomicRMWInst::FIRST_BINOP; + op <= AtomicRMWInst::LAST_BINOP; ++op) { + TsanAtomicRMW[op][i] = NULL; + const char *NamePart = NULL; + if (op == AtomicRMWInst::Xchg) + NamePart = "_exchange"; + else if (op == AtomicRMWInst::Add) + NamePart = "_fetch_add"; + else if (op == AtomicRMWInst::Sub) + NamePart = "_fetch_sub"; + else if (op == AtomicRMWInst::And) + NamePart = "_fetch_and"; + else if (op == AtomicRMWInst::Or) + NamePart = "_fetch_or"; + else if (op == AtomicRMWInst::Xor) + NamePart = "_fetch_xor"; + else + continue; + SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); + TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( + RMWName, Ty, PtrTy, Ty, OrdTy, NULL)); + } + + SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + "_compare_exchange_val"); + TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL)); } TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); + TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); return true; } @@ -253,8 +289,8 @@ static bool isAtomic(Instruction *I) { return true; if (isa<AtomicCmpXchgInst>(I)) return true; - if (FenceInst *FI = dyn_cast<FenceInst>(I)) - return FI->getSynchScope() == CrossThread; + if (isa<FenceInst>(I)) + return true; return false; } @@ -354,15 +390,14 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { switch (ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. - case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. - case Acquire: v = 1 << 2; break; - case Release: v = 1 << 3; break; - case AcquireRelease: v = 1 << 4; break; - case SequentiallyConsistent: v = 1 << 5; break; + case Monotonic: v = 0; break; + // case Consume: v = 1; break; // Not specified yet. + case Acquire: v = 2; break; + case Release: v = 3; break; + case AcquireRelease: v = 4; break; + case SequentiallyConsistent: v = 5; break; } - // +100500 is temporal to migrate to new enum values. - return IRB->getInt32(v + 100500); + return IRB->getInt32(v); } bool ThreadSanitizer::instrumentAtomic(Instruction *I) { @@ -397,12 +432,44 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { CallInst *C = CallInst::Create(TsanAtomicStore[Idx], ArrayRef<Value*>(Args)); ReplaceInstWithInst(I, C); - } else if (isa<AtomicRMWInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<AtomicCmpXchgInst>(I)) { - // FIXME: Not yet supported. - } else if (isa<FenceInst>(I)) { - // FIXME: Not yet supported. + } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { + Value *Addr = RMWI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; + if (F == NULL) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), + createOrdering(&IRB, RMWI->getOrdering())}; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { + Value *Addr = CASI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false), + IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), + createOrdering(&IRB, CASI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); + } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { + Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; + Function *F = FI->getSynchScope() == SingleThread ? + TsanAtomicSignalFence : TsanAtomicThreadFence; + CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args)); + ReplaceInstWithInst(I, C); } return true; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7d652dea48..17d07cdb2d 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -99,243 +99,7 @@ static bool CallHasFloatingPointArgument(const CallInst *CI) { return false; } -/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality -/// comparisons with With. -static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) - if (IC->isEquality() && IC->getOperand(1) == With) - continue; - // Unknown instruction. - return false; - } - return true; -} - -//===----------------------------------------------------------------------===// -// String and Memory LibCall Optimizations -//===----------------------------------------------------------------------===// - namespace { -//===---------------------------------------===// -// 'strcspn' Optimizations - -struct StrCSpnOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - !FT->getReturnType()->isIntegerTy()) - return 0; - - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strcspn("", s) -> 0 - if (HasS1 && S1.empty()) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t Pos = S1.find_first_of(S2); - if (Pos == StringRef::npos) Pos = S1.size(); - return ConstantInt::get(CI->getType(), Pos); - } - - // strcspn(s, "") -> strlen(s) - if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); - - return 0; - } -}; - -//===---------------------------------------===// -// 'strstr' Optimizations - -struct StrStrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isPointerTy()) - return 0; - - // fold strstr(x, x) -> x. - if (CI->getArgOperand(0) == CI->getArgOperand(1)) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); - if (!StrLen) - return 0; - Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD, TLI); - if (!StrNCmp) - return 0; - for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); - UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(*UI++); - Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, - ConstantInt::getNullValue(StrNCmp->getType()), - "cmp"); - Old->replaceAllUsesWith(Cmp); - Old->eraseFromParent(); - } - return CI; - } - - // See if either input string is a constant string. - StringRef SearchStr, ToFindStr; - bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); - bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); - - // fold strstr(x, "") -> x. - if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); - - // If both strings are known, constant fold it. - if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); - - if (Offset == StringRef::npos) // strstr("foo", "bar") -> null - return Constant::getNullValue(CI->getType()); - - // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getArgOperand(0), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); - } - - // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; - } - return 0; - } -}; - - -//===---------------------------------------===// -// 'memcmp' Optimizations - -struct MemCmpOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy(32)) - return 0; - - Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); - - if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Constant::getNullValue(CI->getType()); - - // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) return 0; - uint64_t Len = LenC->getZExtValue(); - - if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Constant::getNullValue(CI->getType()); - - // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS - if (Len == 1) { - Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), - CI->getType(), "lhsv"); - Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), - CI->getType(), "rhsv"); - return B.CreateSub(LHSV, RHSV, "chardiff"); - } - - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) - StringRef LHSStr, RHSStr; - if (getConstantStringInfo(LHS, LHSStr) && - getConstantStringInfo(RHS, RHSStr)) { - // Make sure we're not reading out-of-bounds memory. - if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; - uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); - return ConstantInt::get(CI->getType(), Ret); - } - - return 0; - } -}; - -//===---------------------------------------===// -// 'memcpy' Optimizations - -struct MemCpyOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memmove' Optimizations - -struct MemMoveOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - -//===---------------------------------------===// -// 'memset' Optimizations - -struct MemSetOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require DataLayout. - if (!TD) return 0; - - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; - - // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); - } -}; - //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1004,9 +768,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - // String and Memory LibCall Optimizations - StrCSpnOpt StrCSpn; StrStrOpt StrStr; - MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; @@ -1072,14 +833,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // String and Memory LibCall Optimizations - Optimizations["strcspn"] = &StrCSpn; - Optimizations["strstr"] = &StrStr; - Optimizations["memcmp"] = &MemCmp; - AddOpt(LibFunc::memcpy, &MemCpy); - Optimizations["memmove"] = &MemMove; - AddOpt(LibFunc::memset, &MemSet); - // Math Library Optimizations Optimizations["cosf"] = &Cos; Optimizations["cos"] = &Cos; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 64c7011660..c3ea63852f 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -34,6 +34,7 @@ protected: Function *Caller; const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; LLVMContext* Context; public: LibCallOptimization() { } @@ -48,10 +49,12 @@ public: =0; Value *optimizeCall(CallInst *CI, const DataLayout *TD, - const TargetLibraryInfo *TLI, IRBuilder<> &B) { + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; this->TLI = TLI; + this->LCS = LCS; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -83,6 +86,20 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -801,6 +818,204 @@ struct StrSpnOpt : public LibCallOptimization { } }; +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + } // End anonymous namespace. namespace llvm { @@ -808,6 +1023,7 @@ namespace llvm { class LibCallSimplifierImpl { const DataLayout *TD; const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; StringMap<LibCallOptimization*> Optimizations; // Fortified library call optimizations. @@ -818,7 +1034,7 @@ class LibCallSimplifierImpl { StpCpyChkOpt StpCpyChk; StrNCpyChkOpt StrNCpyChk; - // String and memory library call optimizations. + // String library call optimizations. StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; @@ -832,12 +1048,23 @@ class LibCallSimplifierImpl { StrPBrkOpt StrPBrk; StrToOpt StrTo; StrSpnOpt StrSpn; + StrCSpnOpt StrCSpn; + StrStrOpt StrStr; + + // Memory library call optimizations. + MemCmpOpt MemCmp; + MemCpyOpt MemCpy; + MemMoveOpt MemMove; + MemSetOpt MemSet; void initOptimizations(); + void addOpt(LibFunc::Func F, LibCallOptimization* Opt); public: - LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) { + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS) { this->TD = TD; this->TLI = TLI; + this->LCS = LCS; } Value *optimizeCall(CallInst *CI); @@ -853,26 +1080,34 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["__strncpy_chk"] = &StrNCpyChk; Optimizations["__stpncpy_chk"] = &StrNCpyChk; - // String and memory library call optimizations. - Optimizations["strcat"] = &StrCat; - Optimizations["strncat"] = &StrNCat; - Optimizations["strchr"] = &StrChr; - Optimizations["strrchr"] = &StrRChr; - Optimizations["strcmp"] = &StrCmp; - Optimizations["strncmp"] = &StrNCmp; - Optimizations["strcpy"] = &StrCpy; - Optimizations["stpcpy"] = &StpCpy; - Optimizations["strncpy"] = &StrNCpy; - Optimizations["strlen"] = &StrLen; - Optimizations["strpbrk"] = &StrPBrk; - Optimizations["strtol"] = &StrTo; - Optimizations["strtod"] = &StrTo; - Optimizations["strtof"] = &StrTo; - Optimizations["strtoul"] = &StrTo; - Optimizations["strtoll"] = &StrTo; - Optimizations["strtold"] = &StrTo; - Optimizations["strtoull"] = &StrTo; - Optimizations["strspn"] = &StrSpn; + // String library call optimizations. + addOpt(LibFunc::strcat, &StrCat); + addOpt(LibFunc::strncat, &StrNCat); + addOpt(LibFunc::strchr, &StrChr); + addOpt(LibFunc::strrchr, &StrRChr); + addOpt(LibFunc::strcmp, &StrCmp); + addOpt(LibFunc::strncmp, &StrNCmp); + addOpt(LibFunc::strcpy, &StrCpy); + addOpt(LibFunc::stpcpy, &StpCpy); + addOpt(LibFunc::strncpy, &StrNCpy); + addOpt(LibFunc::strlen, &StrLen); + addOpt(LibFunc::strpbrk, &StrPBrk); + addOpt(LibFunc::strtol, &StrTo); + addOpt(LibFunc::strtod, &StrTo); + addOpt(LibFunc::strtof, &StrTo); + addOpt(LibFunc::strtoul, &StrTo); + addOpt(LibFunc::strtoll, &StrTo); + addOpt(LibFunc::strtold, &StrTo); + addOpt(LibFunc::strtoull, &StrTo); + addOpt(LibFunc::strspn, &StrSpn); + addOpt(LibFunc::strcspn, &StrCSpn); + addOpt(LibFunc::strstr, &StrStr); + + // Memory library call optimizations. + addOpt(LibFunc::memcmp, &MemCmp); + addOpt(LibFunc::memcpy, &MemCpy); + addOpt(LibFunc::memmove, &MemMove); + addOpt(LibFunc::memset, &MemSet); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { @@ -883,14 +1118,19 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); if (LCO) { IRBuilder<> Builder(CI); - return LCO->optimizeCall(CI, TD, TLI, Builder); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); } return 0; } +void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI) { - Impl = new LibCallSimplifierImpl(TD, TLI); + Impl = new LibCallSimplifierImpl(TD, TLI, this); } LibCallSimplifier::~LibCallSimplifier() { @@ -901,4 +1141,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return Impl->optimizeCall(CI); } +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 892808760f..a7ef248e6e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -78,6 +78,10 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, /// We don't vectorize loops with a known constant trip count below this number. const unsigned TinyTripCountThreshold = 16; +/// When performing a runtime memory check, do not check more than this +/// number of pointers. Notice that the check is quadratic! +const unsigned RuntimeMemoryCheckThreshold = 2; + namespace { // Forward declarations. @@ -114,7 +118,7 @@ public: /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. vectorizeLoop(Legal); - // register the new loop. + // Register the new loop and update the analysis passes. updateAnalysis(); } @@ -123,7 +127,8 @@ private: void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); - /// Insert the new loop to the loop hierarchy and pass manager. + /// Insert the new loop to the loop hierarchy and pass manager + /// and update the analysis passes. void updateAnalysis(); /// This instruction is un-vectorizable. Implement it as a sequence @@ -242,6 +247,15 @@ public: ReductionKind Kind; }; + // This POD struct holds information about the memory runtime legality + // check that a group of pointers do not overlap. + struct RuntimePointerCheck { + /// This flag indicates if we need to add the runtime check. + bool Need; + /// Holds the pointers that we need to check. + SmallVector<Value*, 2> Pointers; + }; + /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList; @@ -263,9 +277,14 @@ public: /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + /// Returns true if this instruction will remain scalar after vectorization. bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + /// Returns the information that we collected about runtime memory check. + RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; } private: /// Check if a single basic block loop is vectorizable. /// At this point we know |