diff options
author | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
---|---|---|
committer | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
commit | c6cf05cb5108f356dde97c01ee4188b0671d4542 (patch) | |
tree | 436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/Transforms | |
parent | c75199c649c739aade160289d93f257edc798cde (diff) | |
parent | 7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff) |
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts:
docs/LangRef.rst
include/llvm/CodeGen/CallingConvLower.h
include/llvm/IRReader/IRReader.h
include/llvm/Target/TargetMachine.h
lib/CodeGen/CallingConvLower.cpp
lib/IRReader/IRReader.cpp
lib/IRReader/LLVMBuild.txt
lib/IRReader/Makefile
lib/LLVMBuild.txt
lib/Makefile
lib/Support/MemoryBuffer.cpp
lib/Support/Unix/PathV2.inc
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/ARMSubtarget.cpp
lib/Target/ARM/ARMTargetMachine.cpp
lib/Target/Mips/CMakeLists.txt
lib/Target/Mips/MipsDelaySlotFiller.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsInstrInfo.td
lib/Target/Mips/MipsSubtarget.cpp
lib/Target/Mips/MipsSubtarget.h
lib/Target/X86/X86FastISel.cpp
lib/Target/X86/X86ISelDAGToDAG.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrControl.td
lib/Target/X86/X86InstrFormats.td
lib/Transforms/IPO/ExtractGV.cpp
lib/Transforms/InstCombine/InstCombineCompares.cpp
lib/Transforms/Utils/SimplifyLibCalls.cpp
test/CodeGen/X86/fast-isel-divrem.ll
test/MC/ARM/data-in-code.ll
tools/Makefile
tools/llvm-extract/llvm-extract.cpp
tools/llvm-link/CMakeLists.txt
tools/opt/CMakeLists.txt
tools/opt/LLVMBuild.txt
tools/opt/Makefile
tools/opt/opt.cpp
Diffstat (limited to 'lib/Transforms')
59 files changed, 6156 insertions, 2616 deletions
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 8336d3ad34..a7bf18896b 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" using namespace llvm; @@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { if (LLVMUsed == 0) return; - ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); - if (Inits == 0) return; - - for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) - if (GlobalValue *GV = - dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) - UsedValues.insert(GV); + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { + Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + GlobalValue *GV = cast<GlobalValue>(Operand); + UsedValues.insert(GV); + } } // True if A is better than B. diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 09f9d7c788..b5a05a7f1c 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -69,7 +69,7 @@ namespace { // @LOCALMOD-END } - bool Local = I->hasLocalLinkage(); + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -95,8 +95,8 @@ namespace { continue; // @LOCALMOD-END } - - bool Local = I->hasLocalLinkage(); + + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -113,7 +113,7 @@ namespace { Module::alias_iterator CurI = I; ++I; - if (CurI->hasLocalLinkage()) { + if (CurI->isDiscardableIfUnused()) { CurI->setVisibility(GlobalValue::HiddenVisibility); CurI->setLinkage(GlobalValue::ExternalLinkage); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a75212a386..bc5109b4d4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1,4 +1,4 @@ -//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ // to the function does not create any copies of the pointer value that // outlive the call. This more or less means that the pointer is only // dereferenced, and not returned from the function or stored in a global. +// Finally, well-known library call declarations are marked with all +// attributes that are consistent with the function's standard definition. // This pass is implemented as a bottom-up traversal of the call-graph. // //===----------------------------------------------------------------------===// @@ -32,12 +34,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { @@ -62,14 +66,63 @@ namespace { // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const CallGraphSCC &SCC); + // Utility methods used by inferPrototypeAttributes to add attributes + // and maintain annotation statistics. + + void setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + } + } + + void setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + } + } + + void setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + } + } + + void setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + } + } + + void setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + } + } + + // inferPrototypeAttributes - Analyze the name and prototype of the + // given function and set any applicable attributes. Returns true + // if any attributes were set and false otherwise. + bool inferPrototypeAttributes(Function &F); + + // annotateLibraryCalls - Adds attributes to well-known standard library + // call declarations. + bool annotateLibraryCalls(const CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); CallGraphSCCPass::getAnalysisUsage(AU); } private: AliasAnalysis *AA; + TargetLibraryInfo *TLI; }; } @@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } +/// inferPrototypeAttributes - Analyze the name and prototype of the +/// given function and set any applicable attributes. Returns true +/// if any attributes were set and false otherwise. +bool FunctionAttrs::inferPrototypeAttributes(Function &F) { + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) + return false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strcmp: + case LibFunc::strspn: + case LibFunc::strncmp: + case LibFunc::strcspn: + case LibFunc::strcoll: + case LibFunc::strcasecmp: + case LibFunc::strncasecmp: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::scanf: + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat: + case LibFunc::sscanf: + case LibFunc::sprintf: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + break; + case LibFunc::system: + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotAlias(F, 0); + break; + case LibFunc::mkdir: + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::read: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::rmdir: + case LibFunc::rewind: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::rename: + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::chmod: + case LibFunc::chown: + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + break; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + break; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + case LibFunc::fread: + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + case LibFunc::fputs: + case LibFunc::fscanf: + case LibFunc::fprintf: + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::gets: + case LibFunc::getchar: + setDoesNotThrow(F); + break; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::uname: + case LibFunc::unlink: + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::pread: + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + break; + case LibFunc::putchar: + setDoesNotThrow(F); + break; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vsscanf: + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + break; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + break; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + default: + // Didn't mark any attributes. + return false; + } + + return true; +} + +/// annotateLibraryCalls - Adds attributes to well-known standard library +/// call declarations. +bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { + bool MadeChange = false; + + // Check each function in turn annotating well-known library function + // declarations with attributes. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F != 0 && F->isDeclaration()) + MadeChange |= inferPrototypeAttributes(*F); + } + + return MadeChange; +} + bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { AA = &getAnalysis<AliasAnalysis>(); + TLI = &getAnalysis<TargetLibraryInfo>(); - bool Changed = AddReadAttrs(SCC); + bool Changed = annotateLibraryCalls(SCC); + Changed |= AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); return Changed; diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index dc99492990..201f320c43 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -42,6 +42,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; + SmallPtrSet<Constant *, 8> SeenConstants; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); + SeenConstants.clear(); return Changed; } @@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return GlobalIsNeeded(GV); - + // Loop over all of the operands of the constant, adding any globals they // use to the list of needed globals. - for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) - if (Constant *OpC = dyn_cast<Constant>(*I)) - MarkUsedGlobalsAsNeeded(OpC); + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { + // If we've already processed this constant there's no need to do it again. + Constant *Op = dyn_cast<Constant>(*I); + if (Op && SeenConstants.insert(Op)) + MarkUsedGlobalsAsNeeded(Op); + } } // RemoveUnusedGlobalValue - Loop over all of the uses of the specified diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 700d0dfb5c..6cab6ed0ff 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -473,8 +473,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, DataLayout *TD, TargetLibraryInfo *TLI) { bool Changed = false; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { - User *U = *UI++; + SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end()); + while (!WorkList.empty()) { + User *U = WorkList.pop_back_val(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { if (Init) { @@ -537,7 +538,6 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // us, and if they are all dead, nuke them without remorse. if (SafeToDestroyConstant(C)) { C->destroyConstant(); - // This could have invalidated UI, start over from scratch. CleanupConstantGlobalUsers(V, Init, TD, TLI); return true; } @@ -3062,6 +3062,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { return true; } +static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) { + for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + Use *U = &I.getUse(); + if (Tried.count(U)) + continue; + + User *Usr = *I; + GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr); + if (!GV || !GV->hasName()) { + Tried.insert(U); + return I; + } + + StringRef Name = GV->getName(); + if (Name != "llvm.used" && Name != "llvm.compiler_used") { + Tried.insert(U); + return I; + } + } + return V->use_end(); +} + +static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New); + +static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From, + Value *ToV, Use *U) { + Constant *To = cast<Constant>(ToV); + + SmallVector<Constant*, 8> NewOps; + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + Constant *Op = CA->getOperand(i); + NewOps.push_back(Op == From ? To : Op); + } + + Constant *Replacement = ConstantArray::get(CA->getType(), NewOps); + assert(Replacement != CA && "CA didn't contain From!"); + + bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement); + if (Replacement->use_empty()) + Replacement->destroyConstant(); + if (CA->use_empty()) + CA->destroyConstant(); + return Ret; +} + +static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From, + Value *ToV, Use *U) { + Constant *To = cast<Constant>(ToV); + SmallVector<Constant*, 8> NewOps; + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { + Constant *Op = CE->getOperand(i); + NewOps.push_back(Op == From ? To : Op); + } + + Constant *Replacement = CE->getWithOperands(NewOps); + assert(Replacement != CE && "CE didn't contain From!"); + + bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement); + if (Replacement->use_empty()) + Replacement->destroyConstant(); + if (CE->use_empty()) + CE->destroyConstant(); + return Ret; +} + +static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To, + Use *U) { + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) + return replaceUsesOfWithOnConstant(CA, From, To, U); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return replaceUsesOfWithOnConstant(CE, From, To, U); + C->replaceUsesOfWithOnConstant(From, To, U); + return true; +} + +static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) { + SmallPtrSet<Use*, 8> Tried; + bool Ret = false; + for (;;) { + Value::use_iterator I = getFirst(Old, Tried); + if (I == Old->use_end()) + break; + Use &U = I.getUse(); + + // Must handle Constants specially, we cannot call replaceUsesOfWith on a + // constant because they are uniqued. + if (Constant *C = dyn_cast<Constant>(U.getUser())) { + if (!isa<GlobalValue>(C)) { + Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U); + continue; + } + } + + U.set(New); + Ret = true; + } + return Ret; +} + bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool Changed = false; @@ -3081,11 +3180,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); // Make all users of the alias use the aliasee instead. - if (!J->use_empty()) { - J->replaceAllUsesWith(Aliasee); + if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) { ++NumAliasesResolved; Changed = true; } + if (!J->use_empty()) + continue; // If the alias is externally visible, we may still be able to simplify it. if (!J->hasLocalLinkage()) { diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 892100f058..4ce749cfec 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); +/// Returns the type id for a type to be hashed. We turn pointer types into +/// integers here because the actual compare logic below considers pointers and +/// integers of the same size as equal. +static Type::TypeID getTypeIDForHash(Type *Ty) { + if (Ty->isPointerTy()) + return Type::IntegerTyID; + return Ty->getTypeID(); +} + /// Creates a hash-code for the function which is the same for any two /// functions that will compare equal, without looking at the instructions /// inside the function. @@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) { ID.AddInteger(F->getCallingConv()); ID.AddBoolean(F->hasGC()); ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); + ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); + ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); return ID.ComputeHash(); } @@ -200,8 +209,7 @@ private: // Any two pointers in the same address space are equivalent, intptr_t and // pointers are equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(Type *Ty1, - Type *Ty2) const { +bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { @@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { if (NewG->getReturnType()->isVoidTy()) { Builder.CreateRetVoid(); } else { - Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType())); + Type *RetTy = NewG->getReturnType(); + if (CI->getType()->isIntegerTy() && RetTy->isPointerTy()) + Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy)); + else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy()) + Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy)); + else + Builder.CreateRet(Builder.CreateBitCast(CI, RetTy)); } NewG->copyAttributesFrom(G); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 47b2b51899..986c0b8928 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops", cl::desc("Run the Loop vectorization passes")); static cl::opt<bool> -RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes")); +RunSLPVectorization("vectorize-slp", + cl::desc("Run the SLP vectorization passes")); + +static cl::opt<bool> +RunBBVectorization("vectorize-slp-aggressive", + cl::desc("Run the BB vectorization passes")); static cl::opt<bool> UseGVNAfterVectorization("use-gvn-after-vectorization", @@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() { DisableSimplifyLibCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; - Vectorize = RunBBVectorization; + BBVectorize = RunBBVectorization; + SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; } @@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_ScalarOptimizerLate, MPM); - if (Vectorize) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); if (OptLevel > 1 && UseGVNAfterVectorization) @@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createGlobalDCEPass()); } +inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { + return reinterpret_cast<PassManagerBuilder*>(P); +} + +inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { + return reinterpret_cast<LLVMPassManagerBuilderRef>(P); +} + LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); @@ -391,9 +408,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM, - bool Internalize, - bool RunInliner) { + LLVMBool Internalize, + LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); PassManagerBase *LPM = unwrap(PM); - Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); + Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0); } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 5f8681ff45..3396f7929e 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { if (LLVMUsed == 0) return; UsedValues.insert(LLVMUsed); - - ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); - if (Inits == 0) return; - + + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index 72cfe2c985..a25696ec03 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine InstCombineMulDivRem.cpp InstCombinePHI.cpp InstCombineSelect.cpp - InstCombineShifts.cpp + InstCombineShifts.cpp InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 1f6a3a5e33..2a36074750 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -233,6 +233,7 @@ private: Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); Value *EmitGEPOffset(User *GEP); + Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); public: // InsertNewInstBefore - insert an instruction New before instruction Old diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..166f8dfdb4 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -24,9 +24,9 @@ namespace { /// Class representing coefficient of floating-point addend. /// This class needs to be highly efficient, which is especially true for /// the constructor. As of I write this comment, the cost of the default - /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to /// perform write-merging). - /// + /// class FAddendCoef { public: // The constructor has to initialize a APFloat, which is uncessary for @@ -37,31 +37,31 @@ namespace { // FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} ~FAddendCoef(); - + void set(short C) { assert(!insaneIntVal(C) && "Insane coefficient"); IsFp = false; IntVal = C; } - + void set(const APFloat& C); - + void negate(); - + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } Value *getValue(Type *) const; - + // If possible, don't define operator+/operator- etc because these // operators inevitably call FAddendCoef's constructor which is not cheap. void operator=(const FAddendCoef &A); void operator+=(const FAddendCoef &A); void operator-=(const FAddendCoef &A); void operator*=(const FAddendCoef &S); - + bool isOne() const { return isInt() && IntVal == 1; } bool isTwo() const { return isInt() && IntVal == 2; } bool isMinusOne() const { return isInt() && IntVal == -1; } bool isMinusTwo() const { return isInt() && IntVal == -2; } - + private: bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) @@ -74,18 +74,28 @@ namespace { return *getFpValPtr(); } - APFloat &getFpVal(void) - { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } - + APFloat &getFpVal(void) { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + bool isInt() const { return !IsFp; } + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; - + // True iff FpValBuf contains an instance of APFloat. bool BufHasFpVal; - + // The integer coefficient of an individual addend is either 1 or -1, // and we try to simplify at most 4 addends from neighboring at most // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt @@ -94,7 +104,7 @@ namespace { AlignedCharArrayUnion<APFloat> FpValBuf; }; - + /// FAddend is used to represent floating-point addend. An addend is /// represented as <C, V>, where the V is a symbolic value, and C is a /// constant coefficient. A constant addend is represented as <C, 0>. @@ -102,10 +112,10 @@ namespace { class FAddend { public: FAddend() { Val = 0; } - + Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - + bool isConstant() const { return Val == 0; } bool isZero() const { return Coeff.isZero(); } @@ -114,17 +124,17 @@ namespace { { Coeff.set(Coefficient); Val = V; } void set(const ConstantFP* Coefficient, Value *V) { Coeff.set(Coefficient->getValueAPF()); Val = V; } - + void negate() { Coeff.negate(); } - + /// Drill down the U-D chain one step to find the definition of V, and /// try to break the definition into one or two addends. static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); - + /// Similar to FAddend::drillDownOneStep() except that the value being /// splitted is the addend itself. unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; - + void operator+=(const FAddend &T) { assert((Val == T.Val) && "Symbolic-values disagree"); Coeff += T.Coeff; @@ -132,12 +142,12 @@ namespace { private: void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } - + // This addend has the value of "Coeff * Val". Value *Val; FAddendCoef Coeff; }; - + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along /// with its neighboring at most two instructions. /// @@ -145,27 +155,30 @@ namespace { public: FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} Value *simplify(Instruction *FAdd); - + private: typedef SmallVector<const FAddend*, 4> AddendVect; - + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); - + /// Return the number of instructions needed to emit the N-ary addition. unsigned calcInstrNumber(const AddendVect& Vect); Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); - + InstCombiner::BuilderTy *Builder; Instruction *Instr; - + private: // Debugging stuff are clustered here. #ifndef NDEBUG @@ -177,7 +190,7 @@ namespace { void incCreateInstNum() {} #endif }; -} +} //===----------------------------------------------------------------------===// // @@ -200,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) { } else *P = C; - IsFp = BufHasFpVal = true; + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; } -void FAddendCoef::operator=(const FAddendCoef& That) { +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { if (That.isInt()) set(That.IntVal); else @@ -219,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { getFpVal().add(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); return; } - + APFloat &T = getFpVal(); - T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } void FAddendCoef::operator-=(const FAddendCoef &That) { @@ -240,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { getFpVal().subtract(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); return; } APFloat &T = getFpVal(); - T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); } void FAddendCoef::operator*=(const FAddendCoef &That) { @@ -268,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { return; } - const fltSemantics &Semantic = + const fltSemantics &Semantic = isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) - set(APFloat(Semantic, IntVal)); + convertToFpType(Semantic); APFloat &F0 = getFpVal(); if (That.isInt()) - F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); else F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); @@ -302,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const { // A - B <1, A>, <1,B> // 0 - B <-1, B> // C * A, <C, A> -// A + C <1, A> <C, NULL> +// A + C <1, A> <C, NULL> // 0 +/- 0 <0, NULL> (corner case) // // Legend: A and B are not constant, C is constant -// +// unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { Instruction *I = 0; @@ -377,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep return 0; unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); - if (!BreakNum || Coeff.isOne()) + if (!BreakNum || Coeff.isOne()) return BreakNum; Addend0.Scale(Coeff); @@ -388,6 +426,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -398,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); - // Save the instruction before calling other member-functions. + // Save the instruction before calling other member-functions. Instr = I; FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; @@ -409,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { unsigned Opnd0_ExpNum = 0; unsigned Opnd1_ExpNum = 0; - if (!Opnd0.isConstant()) + if (!Opnd0.isConstant()) Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. @@ -431,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) @@ -471,7 +581,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -479,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { unsigned AddendNum = Addends.size(); assert(AddendNum <= 4 && "Too many addends"); - // For saving intermediate results; + // For saving intermediate results; unsigned NextTmpIdx = 0; FAddend TmpResult[3]; @@ -495,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { AddendVect SimpVect; // The outer loop works on one symbolic-value at a time. Suppose the input - // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... // The symbolic-values will be processed in this order: x, y, z. // for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { @@ -522,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = 0; SimpVect.push_back(T); } } @@ -535,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { R += *SimpVect[Idx]; // Pop all addends being folded and push the resulting folded addend. - SimpVect.resize(StartIdx); + SimpVect.resize(StartIdx); if (Val != 0) { if (!R.isZero()) { SimpVect.push_back(&R); @@ -548,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { } } - assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && "out-of-bound access"); if (ConstAdd) @@ -570,7 +681,7 @@ Value *FAddCombine::createNaryFAdd assert(!Opnds.empty() && "Expect at least one addend"); // Step 1: Check if the # of instructions needed exceeds the quota. - // + // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) return 0; @@ -591,7 +702,7 @@ Value *FAddCombine::createNaryFAdd // Iterate the addends, creating fadd/fsub using adjacent two addends. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); I != E; I++) { - bool NeedNeg; + bool NeedNeg; Value *V = createAddendVal(**I, NeedNeg); if (!LastVal) { LastVal = V; @@ -617,7 +728,7 @@ Value *FAddCombine::createNaryFAdd } #ifndef NDEBUG - assert(CreateInstrNum == InstrNeeded && + assert(CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers"); #endif @@ -627,7 +738,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +751,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -665,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { unsigned OpndNum = Opnds.size(); unsigned InstrNeeded = OpndNum - 1; - // The number of addends in the form of "(-1)*x". - unsigned NegOpndNum = 0; + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; // Adjust the number of instructions needed to emit the N-ary add. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); @@ -853,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } + // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, + // transform them into (X + (signbit ^ C)) + if (XorRHS->getValue().isSignBit()) + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); } } @@ -1111,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } } + // select C, 0, B + select C, A, 0 -> select C, A, B + { + Value *A1, *B1, *C1, *A2, *B2, *C2; + if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && + match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { + if (C1 == C2) { + Constant *Z1=0, *Z2=0; + Value *A, *B, *C=C1; + if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(A1); A = A2; + Z2 = dyn_cast<Constant>(B2); B = B1; + } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(B1); B = B2; + Z2 = dyn_cast<Constant>(A2); A = A1; + } + + if (Z1 && Z2 && + (I.hasNoSignedZeros() || + (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { + return SelectInst::Create(C, A, B); + } + } + } + } + if (I.hasUnsafeAlgebra()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4332467371..ec75dd2e04 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -22,8 +22,8 @@ using namespace PatternMatch; /// AddOne - Add one to a ConstantInt. -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); } /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { @@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return 0; } - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient +/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise +/// (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) + return 0; + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { @@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 <u 2 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return Builder->CreateICmpULT(Add, AddCST); - } - if (LHS->getOperand(0) == RHS->getOperand(0)) { // if LHSCst and RHSCst differ only by one bit: // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 @@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 <u 2 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return Builder->CreateICmpULT(Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64cd1bd278..78b4a2c6c9 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); + const AttributeSet &NewPAL = + AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index a960ab2499..2ee1278d23 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; @@ -1604,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1625,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } @@ -1647,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6eca399a40..518a8323b6 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, } } +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { @@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (ArrayElementCount <= 32 || - (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { - Type *Ty; - if (ArrayElementCount <= 32) + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } } return 0; @@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; } + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + break; + } + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; @@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); + // If the shift is NSW and we compare to 0, then it is just shifting out + // sign bits, no need for an AND either. + if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); @@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } + // If this is a signed comparison to 0 and the shift is sign preserving, + // use the shift LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && @@ -1333,18 +1399,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - // @LOCALMOD-BEGIN - // We don't want to introduce non-power-of-two integer sizes for PNaCl's - // stable wire format, so modify this transformation for NaCl. - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt && - isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8) { - // @LOCALMOD-END + if (LHSI->hasOneUse() && + // @LOCALMOD-BEGIN + // We don't want to introduce non-power-of-two integer sizes for PNaCl's + // stable wire format, so modify this transformation for NaCl. + isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8 && + // @LOCALMOD-END + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, @@ -1536,6 +1603,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(pred, X, NegX); } } + break; + case Instruction::Mul: + if (RHSV == 0 && BO->hasNoSignedWrap()) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // The trivial case (mul X, 0) is handled by InstSimplify + // General case : (mul X, C) != 0 iff X != 0 + // (mul X, C) == 0 iff X == 0 + if (!BOC->isZero()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Constant::getNullValue(RHS->getType())); + } + } + break; default: break; } } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { @@ -2416,6 +2496,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(Pred, Y, Z); } + // icmp slt (X + -1), Y -> icmp sle X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); + + // icmp sge (X + -1), Y -> icmp sgt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); + + // icmp sle (X + 1), Y -> icmp slt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); + + // icmp sgt (X + 1), Y -> icmp sge X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); + + // if C1 has greater magnitude than C2: + // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y + // s.t. C3 = C1 - C2 + // + // if C2 has greater magnitude than C1: + // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) + // s.t. C3 = C2 - C1 + if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && + (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) + if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { + const APInt &AP1 = C1->getValue(); + const APInt &AP2 = C2->getValue(); + if (AP1.isNegative() == AP2.isNegative()) { + APInt AP1Abs = C1->getValue().abs(); + APInt AP2Abs = C2->getValue().abs(); + if (AP1Abs.uge(AP2Abs)) { + ConstantInt *C3 = Builder->getInt(AP1 - AP2); + Value *NewAdd = Builder->CreateNSWAdd(A, C3); + return new ICmpInst(Pred, NewAdd, C); + } else { + ConstantInt *C3 = Builder->getInt(AP2 - AP1); + Value *NewAdd = Builder->CreateNSWAdd(C, C3); + return new ICmpInst(Pred, A, NewAdd); + } + } + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). A = 0; B = 0; C = 0; D = 0; @@ -2549,6 +2678,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } { Value *A, *B; + // Transform (A & ~B) == 0 --> (A & B) != 0 + // and (A & ~B) != 0 --> (A & B) == 0 + // if A is a power of 2. + if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality()) + return new ICmpInst(I.getInversePredicate(), + Builder->CreateAnd(A, B), + Op1); + // ~x < ~y --> y < x // ~x < cst --> ~cst < x if (match(Op0, m_Not(m_Value(A)))) { diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 337cfe32a8..e2d7966cb3 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, - IsOffset || !GEP->hasAllZeroIndices())) + if (!isOnlyCopiedFromConstantGlobal( + GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) return false; continue; } @@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for @@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getDataLayout() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. @@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - LoadInst *NewLoad = + LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); @@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return 0; - + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. @@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - } + } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. @@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (CE->isCast()) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and @@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; - + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector<Value*, 4> NewGEPIndices; - + // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. @@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); - + while (1) { if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ @@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { break; } } - + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return 0; - + // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. if (!IC.getDataLayout() || - SrcTy->getAddressSpace() != + SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before + // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Value *SIOp0 = SI.getOperand(0); @@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { if (SIOp0->getType()->isPointerTy()) opcode = Instruction::PtrToInt; } - + // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); @@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which @@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } @@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) + if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { @@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; - } - + } + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), @@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } break; } - + // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). @@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); - + // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } - + // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; @@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; - + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || @@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! - + return 0; } @@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); - + // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - + // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); @@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { if (++PI == pred_end(DestBB)) return false; - + P = *PI; if (P != StoreBB) { if (OtherBB) @@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; - + // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = 0; @@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && + if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; - + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. @@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BBI == OtherBB->begin()) return false; } - + // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. @@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; } } - + // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { @@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } - + // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); @@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); - NewSI->setDebugLoc(OtherStore->getDebugLoc()); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had the same TBAA tag, preserve it. if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) @@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { OtherStore->getMetadata(LLVMContext::MD_tbaa)))) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); - + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 8e4267f898..ecc9fc3e45 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // if this is safe. For example, the use could be in dynamically unreached // code. if (!V->hasOneUse()) return 0; - + bool MadeChange = false; // ((1 << A) >>u B) --> (1 << (A-B)) @@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } - + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) @@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { I->setOperand(0, V2); MadeChange = true; } - + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { I->setIsExact(); MadeChange = true; } - + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { I->setHasNoUnsignedWrap(); MadeChange = true; @@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // TODO: Lots more we could do here: // If V is a phi node, we can call this on each of its operands. // "select cond, X, 0" can simplify to "X". - + return MadeChange ? V : 0; } @@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { LHSExt = LHSExt.zext(W * 2); RHSExt = RHSExt.zext(W * 2); } - + APInt MulExt = LHSExt * RHSExt; - + if (!sign) return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); - + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); return MulExt.slt(Min) || MulExt.sgt(Max); @@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_AllOnes())) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); - + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - + // ((X << C1)*C2) == (X * (C2 << C1)) if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) if (SI->getOpcode() == Instruction::Shl) if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) return BinaryOperator::CreateMul(SI->getOperand(0), ConstantExpr::getShl(CI, ShOp)); - + const APInt &Val = CI->getValue(); if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); @@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); return Shl; } - + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && @@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } } - + // Simplify mul instructions with a constant RHS. - if (isa<Constant>(Op1)) { + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *Op1C = Op1; BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); if (!BO || - (BO->getOpcode() != Instruction::UDiv && + (BO->getOpcode() != Instruction::UDiv && BO->getOpcode() != Instruction::SDiv)) { Op1C = Op0; BO = dyn_cast<BinaryOperator>(Op1); @@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_Shl(m_One(), m_Value(Y)))) return BinaryOperator::CreateShl(Op0, Y); } - + // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. // X * Y (where Y is 0 or 1) -> X & (0-Y) if (!I.getType()->isVectorTy()) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - + Value *BoolCast = 0, *OtherOp = 0; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; @@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { return; if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) return; - + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); if (CFP && CFP->isExactlyValue(0.5)) { Y = I->getOperand(1); @@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { CFP = dyn_cast<ConstantFP>(I->getOperand(1)); if (CFP && CFP->isExactlyValue(0.5)) Y = I->getOperand(0); -} +} /// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns /// true iff the given value is FMul or FDiv with one and only one operand /// being a normal constant (i.e. not Zero/NaN/Infinity). static bool isFMulOrFDivWithConstant(Value *V) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || (I->getOpcode() != Instruction::FMul && + if (!I || (I->getOpcode() != Instruction::FMul && I->getOpcode() != Instruction::FDiv)) return false; @@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) { /// foldFMulConst() is a helper routine of InstCombiner::visitFMul(). /// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand /// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true). -/// This function is to simplify "FMulOrDiv * C" and returns the +/// This function is to simplify "FMulOrDiv * C" and returns the /// resulting expression. Note that this function could return NULL in /// case the constants cannot be folded into a normal floating-point. -/// +/// Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, Instruction *InsertBefore) { assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid"); @@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, if (isNormalFp(F)) { R = BinaryOperator::CreateFMul(Opnd0, F); } else { - // (X / C1) * C => X / (C1/C) + // (X / C1) * C => X / (C1/C) Constant *F = ConstantExpr::getFDiv(C1, C); if (isNormalFp(cast<ConstantFP>(F))) R = BinaryOperator::CreateFDiv(Opnd0, F); @@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2) + // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) Instruction *FAddSub = dyn_cast<Instruction>(Op0); if (FAddSub && (FAddSub->getOpcode() == Instruction::FAdd || @@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (C0) { std::swap(C0, C1); std::swap(Opnd0, Opnd1); - Swap = true; + Swap = true; } if (C1 && C1->getValueAPF().isNormal() && isFMulOrFDivWithConstant(Opnd0)) { - Value *M0 = ConstantExpr::getFMul(C1, C); - Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ? + Value *M1 = ConstantExpr::getFMul(C1, C); + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : 0; if (M0 && M1) { @@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // (X*Y) * X => (X*X) * Y where Y != X - // The purpose is two-fold: + // The purpose is two-fold: // 1) to form a power expression (of X). // 2) potentially shorten the critical path: After transformation, the // latency of the instruction Y is amortized by the expression of X*X, @@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } + // B * (uitofp i1 C) -> select C, B, 0 + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *B, *C; + if (!match(RHS, m_UIToFp(m_Value(C)))) + std::swap(LHS, RHS); + + if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) { + B = LHS; + Value *Zero = ConstantFP::getNegativeZero(B->getType()); + return SelectInst::Create(C, B, Zero); + } + } + + // A * (1 - uitofp i1 C) -> select C, 0, A + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *A, *C; + if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C))))) + std::swap(LHS, RHS); + + if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) && + C->getType()->isIntegerTy(1)) { + A = LHS; + Value *Zero = ConstantFP::getNegativeZero(A->getType()); + return SelectInst::Create(C, Zero, A); + } + } + if (!isa<Constant>(Op1)) std::swap(Opnd0, Opnd1); else @@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast<SelectInst>(I.getOperand(1)); - + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y int NonNullOperand = -1; if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) @@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) if (ST->isNullValue()) NonNullOperand = 1; - + if (NonNullOperand == -1) return false; - + Value *SelectCond = SI->getOperand(0); - + // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); - + // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have // multiple uses. Based on our knowledge that the operand must be non-zero, // propagate the known value for the select into other uses of it, and // propagate a known value of the condition into its other users. - + // If the select and condition only have a single use, don't bother with this, // early exit. if (SI->use_empty() && SelectCond->hasOneUse()) return true; - + // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); - + while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so // information from below it cannot be propagated above it. if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) break; - + // Replace uses of the select or its condition with the known values. for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { @@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { Worklist.Add(BBI); } } - + // If we past the instruction, quit looking for it. if (&*BBI == SI) SI = 0; if (&*BBI == SelectCond) SelectCond = 0; - + // If we ran out of things to eliminate, break out of the loop. if (SelectCond == 0 && SI == 0) break; - + } return true; } @@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { I.setOperand(1, V); return &I; } - + // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) @@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; - - { + + { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. const APInt *C; if (match(Op1, m_Power2(C))) { BinaryOperator *LShr = - BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), + BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->logBase2())); if (I.isExact()) LShr->setIsExact(); return LShr; @@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return BinaryOperator::CreateLShr(Op0, N); } } - + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) // where C1&C2 are powers of two. { Value *Cond; const APInt *C1, *C2; @@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Construct the "on true" case of the select Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", I.isExact()); - + // Construct the "on false" case of the select Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", I.isExact()); - + // construct the select instruction and return it. return SelectInst::Create(Cond, TSI, FSI); } @@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } - + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is @@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } } } - + return 0; } /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special /// FP value and: -/// 1) 1/C is exact, or +/// 1) 1/C is exact, or /// 2) reciprocal is allowed. /// If the convertion was successful, the simplified expression "X * 1/C" is /// returned; otherwise, NULL is returned. @@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, const APFloat &FpVal = Divisor->getValueAPF(); APFloat Reciprocal(FpVal.getSemantics()); bool Cvt = FpVal.getExactInverse(&Reciprocal); - + if (!Cvt && AllowReciprocal && FpVal.isNormal()) { Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); @@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Constant *C = ConstantExpr::getFMul(C1, C2); const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); if (F.isNormal() && !F.isDenormal()) { - Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), + Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), AllowReciprocal); if (!Res) - Res = BinaryOperator::CreateFDiv(X, C); + Res = BinaryOperator::CreateFDiv(X, C); } } @@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Fold) { const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF(); if (FoldC.isNormal() && !FoldC.isDenormal()) { - Instruction *R = CreateDiv ? + Instruction *R = CreateDiv ? BinaryOperator::CreateFDiv(Fold, X) : BinaryOperator::CreateFMul(X, Fold); R->setFastMathFlags(I.getFastMathFlags()); @@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Instruction *common = commonIRemTransforms(I)) return common; - + // X urem C^2 -> X and C-1 { const APInt *C; if (match(Op1, m_Power2(C))) @@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { ConstantInt::get(I.getType(), *C-1)); } - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) if (match(Op1, m_Shl(m_Power2(), m_Value()))) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); @@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; - + if (Value *RHSNeg = dyn_castNegVal(Op1)) if (!isa<Constant>(RHSNeg) || (isa<ConstantInt>(RHSNeg) && diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index b0a998cca7..bd14e81c3f 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { unsigned Opc = FirstInst->getOpcode(); Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - + Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); - + bool isNUW = false, isNSW = false, isExact = false; if (OverflowingBinaryOperator *BO = dyn_cast<OverflowingBinaryOperator>(FirstInst)) { @@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } else if (PossiblyExactOperator *PEO = dyn_cast<PossiblyExactOperator>(FirstInst)) isExact = PEO->isExact(); - + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); @@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (CmpInst *CI = dyn_cast<CmpInst>(I)) if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) return 0; - + if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); if (isNSW) isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); if (isExact) isExact = cast<PossiblyExactOperator>(I)->isExact(); - + // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; @@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) return 0; - + // Otherwise, this is safe to transform! - + Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; @@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } - + if (RHSVal == 0) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); @@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; } - + // Add all operands to the new PHIs. if (NewLHS || NewRHS) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { @@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } } } - + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, RHSVal); @@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); - - SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), + + SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), FirstInst->op_end()); // This is true if all GEP bases are allocas and if all indices into them are // constants. @@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // more than one phi, which leads to higher register pressure. This is // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; - + bool AllInBounds = true; - + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); @@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { return 0; AllInBounds &= GEP->isInBounds(); - + // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa<AllocaInst>(GEP->getOperand(0)) || !GEP->hasAllConstantIndices())) AllBasePointersAreAllocas = false; - + // Compare the operand lists. for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { if (FirstInst->getOperand(op) == GEP->getOperand(op)) continue; - + // Don't merge two GEPs when two operands differ (introducing phi nodes) // if one of the PHIs has a constant for the index. The index may be // substantially cheaper to compute for the constants, so making it a @@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { if (isa<ConstantInt>(FirstInst->getOperand(op)) || isa<ConstantInt>(GEP->getOperand(op))) return 0; - + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) return 0; @@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { NeededPhi = true; } } - + // If all of the base pointers of the PHI'd GEPs are from allocas, don't // bother doing this transformation. At best, this will just save a bit of // offset calculation, but all the predecessors will have to materialize the @@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // which can usually all be folded into the load. if (AllBasePointersAreAllocas) return 0; - + // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); - + bool HasAnyPHIs = false; for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. @@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; HasAnyPHIs = true; } - + // Add all operands to the new PHIs. if (HasAnyPHIs) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); BasicBlock *InBB = PN.getIncomingBlock(i); - + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) if (PHINode *OpPhi = OperandPhis[op]) OpPhi->addIncoming(InGEP->getOperand(op), InBB); } } - + Value *Base = FixedOperands[0]; - GetElementPtrInst *NewGEP = + GetElementPtrInst *NewGEP = GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); @@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L, E = L->getParent()->end(); - + for (++BBI; BBI != E; ++BBI) if (BBI->mayWriteToMemory()) return false; - + // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { @@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { isAddressTaken = true; break; } - + if (!isAddressTaken && AI->isStaticAlloca()) return false; } - + // If this load is a load from a GEP with a constant offset from an alloca, // then we don't want to sink it. In its present form, it will be // load [constant stack offset]. Sinking it will cause us to have to @@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) return false; - + return true; } @@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { bool isVolatile = FirstLI->isVolatile(); unsigned LoadAlignment = FirstLI->getAlignment(); unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); - + // We can't sink the load if the loaded value could be modified between the // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) return 0; - + // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && + if (isVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) return 0; - + // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); if (!LI || !LI->hasOneUse()) return 0; - - // We can't sink the load if the loaded value could be modified between + + // We can't sink the load if the loaded value could be modified between // the load and the PHI. if (LI->isVolatile() != isVolatile || LI->getParent() != PN.getIncomingBlock(i) || LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) return 0; - + // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) return 0; - + LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); - + // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. @@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LI->getParent()->getTerminator()->getNumSuccessors() != 1) return 0; } - + // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), PN.getNumIncomingValues(), PN.getName()+".in"); - + Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - + // Add all operands to the new PHI. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); @@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { InVal = 0; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } - + Value *PhiVal; if (InVal) { // The new PHI unions all of the same values together. This is really @@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { InsertNewInstBefore(NewPN, PN); PhiVal = NewPN; } - + // If this was a volatile load that we are merging, make sure to loop through // and mark all the input loads as non-volatile. If we don't do this, we will // insert a new volatile load and the old ones will not be deletable. if (isVolatile) for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); NewLI->setDebugLoc(FirstLI->getDebugLoc()); return NewLI; @@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { return FoldPHIArgGEPIntoPHI(PN); if (isa<LoadInst>(FirstInst)) return FoldPHIArgLoadIntoPHI(PN); - + // Scan the instruction, looking for input operations that can be folded away. // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing @@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { Constant *ConstantOp = 0; Type *CastSrcTy = 0; bool isNUW = false, isNSW = false, isExact = false; - + if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); @@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { return 0; } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { - // Can fold binop, compare or shift here if the RHS is a constant, + // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (ConstantOp == 0) return FoldPHIArgBinOpIntoPHI(PN); - + if (OverflowingBinaryOperator *BO = dyn_cast<OverflowingBinaryOperator>(FirstInst)) { isNUW = BO->hasNoUnsignedWrap(); @@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { } else if (I->getOperand(1) != ConstantOp) { return 0; } - + if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); if (isNSW) @@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { NewCI->setDebugLoc(FirstInst->getDebugLoc()); return NewCI; } - + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); if (isNUW) BinOp->setHasNoUnsignedWrap(); @@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { BinOp->setDebugLoc(FirstInst->getDebugLoc()); return BinOp; } - + CmpInst *CIOp = cast<CmpInst>(FirstInst); CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); @@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN, // Remember this node, and if we find the cycle, return. if (!PotentiallyDeadPHIs.insert(PN)) return true; - + // Don't scan crazily complex things. if (PotentiallyDeadPHIs.size() == 16) return false; @@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN, /// PHIsEqualValue - Return true if this phi node is always equal to /// NonPhiInVal. This happens with mutually cyclic phi nodes like: /// z = some value; x = phi (y, z); y = phi (x, z) -static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { // See if we already saw this PHI node. if (!ValueEqualPHIs.insert(PN)) return true; - + // Don't scan crazily complex things. if (ValueEqualPHIs.size() == 16) return false; - + // Scan the operands to see if they are either phi nodes or are equal to // the value. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, } else if (Op != NonPhiInVal) return false; } - + return true; } @@ -557,10 +557,10 @@ struct PHIUsageRecord { unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) unsigned Shift; // The amount shifted. Instruction *Inst; // The trunc instruction. - + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) : PHIId(pn), Shift(Sh), Inst(User) {} - + bool operator<(const PHIUsageRecord &RHS) const { if (PHIId < RHS.PHIId) return true; if (PHIId > RHS.PHIId) return false; @@ -570,15 +570,15 @@ struct PHIUsageRecord { RHS.Inst->getType()->getPrimitiveSizeInBits(); } }; - + struct LoweredPHIRecord { PHINode *PN; // The PHI that was lowered. unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} - + // Ctor form used by DenseMap. LoweredPHIRecord(PHINode *pn, unsigned Sh) : PN(pn), Shift(Sh), Width(0) {} @@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHIUsers - Keep track of all of the truncated values extracted from a set // of PHIs, along with their offset. These are the things we want to rewrite. SmallVector<PHIUsageRecord, 16> PHIUsers; - + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI // nodes which are extracted from. PHIsToSlice is a set we use to avoid // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to // check the uses of (to ensure they are all extracts). SmallVector<PHINode*, 8> PHIsToSlice; SmallPtrSet<PHINode*, 8> PHIsInspected; - + PHIsToSlice.push_back(&FirstPhi); PHIsInspected.insert(&FirstPhi); - + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; - + // Scan the input list of the PHI. If any input is an invoke, and if the // input is defined in the predecessor, then we won't be split the critical // edge which is required to insert a truncate. Because of this, we have to @@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (II == 0) continue; if (II->getParent() != PN->getIncomingBlock(i)) continue; - + // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. return 0; } - - + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); - + // If the user is a PHI, inspect its uses recursively. if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (PHIsInspected.insert(UserPN)) PHIsToSlice.push_back(UserPN); continue; } - + // Truncates are always ok. if (isa<TruncInst>(User)) { PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); continue; } - + // Otherwise it must be a lshr which can only be used by one trunc. if (User->getOpcode() != Instruction::LShr || !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || !isa<ConstantInt>(User->getOperand(1))) return 0; - + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); } } - + // If we have no users, they must be all self uses, just nuke the PHI. if (PHIUsers.empty()) return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); - + // If this phi node is transformable, create new PHIs for all the pieces // extracted out of it. First, sort the users by their offset and size. array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; ); - + // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. DenseMap<BasicBlock*, Value*> PredValues; - + // ExtractedVals - Each new PHI we introduce is saved here so we don't // introduce redundant PHIs. DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; - + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; Type *Ty = PHIUsers[UserI].Inst->getType(); - + PHINode *EltPHI; - + // If we've already lowered a user like this, reuse the previously lowered // value. if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { - + // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); - + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); Value *&PredVal = PredValues[Pred]; - + // If we already have a value for this predecessor, reuse it. if (PredVal) { EltPHI->addIncoming(PredVal, Pred); @@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { EltPHI->addIncoming(PredVal, Pred); continue; } - + if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { // If the incoming value was a PHI, and if it was one of the PHIs we // already rewrote it, just use the lowered value. @@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { continue; } } - + // Otherwise, do an extract in the predecessor. Builder->SetInsertPoint(Pred, Pred->getTerminator()); Value *Res = InVal; @@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { Res = Builder->CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); - + // If the incoming value was a PHI, and if it was one of the PHIs we are // rewriting, we will ultimately delete the code we inserted. This // means we need to revisit that PHI to make sure we extract out the @@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (PHIsInspected.count(OldInVal)) { unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), OldInVal)-PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res))); ++UserE; } } PredValues.clear(); - + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " << *EltPHI << '\n'); ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; } - + // Replace the use of this piece with the PHI node. ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); } - + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) // with undefs. Value *Undef = UndefValue::get(FirstPhi.getType()); @@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (DeadPHICycle(PU, PotentiallyDeadPHIs)) return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } - + // If this phi has a single use, and if that use just computes a value for // the next iteration of a loop, delete the phi. This occurs with unused // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this @@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (InValNo != NumIncomingVals) { Value *NonPhiInVal = PN.getIncomingValue(InValNo); - + // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { @@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; } - + // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. @@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index a262d711d3..59502fb988 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // If this is a non-volatile load or a cast from the same type, // merge. if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + Type *FIOpndTy = FI->getOperand(0)->getType(); + if (TI->getOperand(0)->getType() != FIOpndTy) return 0; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); - if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != - FI->getOperand(0)->getType()->getVectorNumElements()) + if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || + CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) return 0; } else { return 0; // unknown unary op. @@ -349,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return 0; } +/// foldSelectICmpAndOr - We want to turn: +/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) +/// into: +/// (or (shl (and X, C1), C3), y) +/// iff: +/// C1 and C2 are both powers of 2 +/// where: +/// C3 = Log(C2) - Log(C1) +/// +/// This transform handles cases where: +/// 1. The icmp predicate is inverted +/// 2. The select operands are reversed +/// 3. The magnitude of C2 and C1 are flipped +static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality()) + return 0; + + Value *CmpLHS = IC->getOperand(0); + Value *CmpRHS = IC->getOperand(1); + + if (!match(CmpRHS, m_Zero())) + return 0; + + Value *X; + const APInt *C1; + if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) + return 0; + + const APInt *C2; + bool OrOnTrueVal = false; + bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); + if (!OrOnFalseVal) + OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); + + if (!OrOnFalseVal && !OrOnTrueVal) + return 0; + + Value *V = CmpLHS; + Value *Y = OrOnFalseVal ? TrueVal : FalseVal; + + unsigned C1Log = C1->logBase2(); + unsigned C2Log = C2->logBase2(); + if (C2Log > C1Log) { + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder->CreateShl(V, C2Log - C1Log); + } else if (C1Log > C2Log) { + V = Builder->CreateLShr(V, C1Log - C2Log); + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + } else + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + + ICmpInst::Predicate Pred = IC->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) || + (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal)) + V = Builder->CreateXor(V, *C2); + + return Builder->CreateOr(V, Y); +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -520,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) + return ReplaceInstUsesWith(SI, V); + return Changed ? &SI : 0; } @@ -675,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Change: A = select B, false, C --> A = and !B, C Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); - } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { + } + if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); @@ -689,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select a, a, b -> a|b if (CondVal == TrueVal) return BinaryOperator::CreateOr(CondVal, FalseVal); - else if (CondVal == FalseVal) + if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); // select a, ~a, b -> (~a)&b // select a, b, ~a -> (~a)|b if (match(TrueVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateAnd(TrueVal, FalseVal); - else if (match(FalseVal, m_Not(m_Specific(CondVal)))) + if (match(FalseVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateOr(TrueVal, FalseVal); } @@ -837,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); - Value *NewSel = + Value *NewSel = Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); @@ -861,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *LHS, *RHS, *LHS2, *RHS2; if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) return R; if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) @@ -907,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -917,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { - // Form a shufflevector instruction. - SmallVector<Constant *, 8> Mask(VWidth); - Type *Int32Ty = Type::getInt32Ty(CV->getContext()); - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elem = cast<Constant>(CV->getOperand(i)); - if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) - Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); - else if (isa<UndefValue>(Elem)) - Mask[i] = UndefValue::get(Int32Ty); - else - return 0; - } - Constant *MaskVal = ConstantVector::get(Mask); - Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); - return ReplaceInstUsesWith(SI, V); - } - if (isa<ConstantAggregateZero>(CondVal)) { return ReplaceInstUsesWith(SI, FalseVal); } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 4f71db1a4b..4301ddb5aa 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return 0; } +// If we have a PHI node with a vector type that has only 2 uses: feed +// itself and be an operand of extractelemnt at a constant location, +// try to replace the PHI of the vector type with a PHI of a scalar type +Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { + // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. + if (!PN->hasNUses(2)) + return NULL; + + // If so, it's known at this point that one operand is PHI and the other is + // an extractelement node. Find the PHI user that is not the extractelement + // node. + Value::use_iterator iu = PN->use_begin(); + Instruction *PHIUser = dyn_cast<Instruction>(*iu); + if (PHIUser == cast<Instruction>(&EI)) + PHIUser = cast<Instruction>(*(++iu)); + + // Verify that this PHI user has one use, which is the PHI itself, + // and that it is a binary operation which is cheap to scalarize. + // otherwise return NULL. + if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) || + !(isa<BinaryOperator>(PHIUser)) || + !CheapToScalarize(PHIUser, true)) + return NULL; + + // Create a scalar PHI node that will replace the vector PHI node + // just before the current PHI node. + PHINode * scalarPHI = cast<PHINode>( + InsertNewInstWith(PHINode::Create(EI.getType(), + PN->getNumIncomingValues(), ""), *PN)); + // Scalarize each PHI operand. + for (unsigned i=0; i < PN->getNumIncomingValues(); i++) { + Value *PHIInVal = PN->getIncomingValue(i); + BasicBlock *inBB = PN->getIncomingBlock(i); + Value *Elt = EI.getIndexOperand(); + // If the operand is the PHI induction variable: + if (PHIInVal == PHIUser) { + // Scalarize the binary operation. Its first operand is the + // scalar PHI and the second operand is extracted from the other + // vector operand. + BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); + unsigned opId = (B0->getOperand(0) == PN) ? 1: 0; + Value *Op = Builder->CreateExtractElement( + B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt"); + Value *newPHIUser = InsertNewInstWith( + BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op), + *B0); + scalarPHI->addIncoming(newPHIUser, inBB); + } else { + // Scalarize PHI input: + Instruction *newEI = + ExtractElementInst::Create(PHIInVal, Elt, ""); + // Insert the new instruction into the predecessor basic block. + Instruction *pos = dyn_cast<Instruction>(PHIInVal); + BasicBlock::iterator InsertPos; + if (pos && !isa<PHINode>(pos)) { + InsertPos = pos; + ++InsertPos; + } else { + InsertPos = inBB->getFirstInsertionPt(); + } + + InsertNewInstWith(newEI, *InsertPos); + + scalarPHI->addIncoming(newEI, inBB); + } + } + return ReplaceInstUsesWith(EI, scalarPHI); +} + Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is constant with all elements the same, replace EI with // that element. We handle a known element # below. @@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } + + // If there's a vector PHI feeding a scalar use through this extractelement + // instruction, try to scalarize the PHI. + if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { + Instruction *scalarPHI = scalarizePHI(EI, PN); + if (scalarPHI) + return (scalarPHI); + } } if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { @@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } else if (CastInst *CI = dyn_cast<CastInst>(I)) { // Canonicalize extractelement(cast) -> cast(extractelement) // bitcasts can change the number of vector elements and they cost nothing - if (CI->hasOneUse() && EI.hasOneUse() && - (CI->getOpcode() != Instruction::BitCast)) { + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { Value *EE = Builder->CreateExtractElement(CI->getOperand(0), EI.getIndexOperand()); + Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } } @@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Update Mask to reflect that `ScalarOp' has been inserted at + // position `InsertedIdx' within the vector returned by IEI. + Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; + // Everything but the extracted element is replaced with the RHS. for (unsigned i = 0; i != NumElts; ++i) { if (i != InsertedIdx) diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index c6115e3e91..ec10751202 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { Module *M = II->getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - ArrayRef<Value *>(), "", II->getParent()); + None, "", II->getParent()); } return EraseInstFromFunction(MI); } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475b1d..623c470506 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v3"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass { Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F); - void createInitializerPoisonCalls(Module &M, - Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); void emitShadowMapping(Module &M, IRBuilder<> &IRB) const; virtual bool doInitialization(Module &M); @@ -315,7 +313,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass { void initializeCallbacks(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); - void createInitializerPoisonCalls(Module &M, Value *FirstAddr, - Value *LastAddr); + void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t RedzoneSize() const { return RedzoneSizeForScale(Mapping.Scale); } @@ -531,9 +528,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -750,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, } void AddressSanitizerModule::createInitializerPoisonCalls( - Module &M, Value *FirstAddr, Value *LastAddr) { + Module &M, GlobalValue *ModuleName) { // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); // If that function is not present, this TU contains no globals, or they have @@ -762,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls( IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. - IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); + IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); @@ -836,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); @@ -885,11 +886,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -897,9 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - // The addresses of the first and last dynamically initialized globals in - // this TU. Used in initialization order checking. - Value *FirstDynamic = 0, *LastDynamic = 0; + bool HasDynamicallyInitializedGlobals = false; + + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + // We shouldn't merge same module names, as this string serves as unique + // module ID in runtime. + ModuleName->setUnnamedAddr(false); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; @@ -930,11 +936,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,15 +960,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); // Populate the first and last globals declared in this TU. - if (CheckInitOrder && GlobalHasDynamicInitializer) { - LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); - if (FirstDynamic == 0) - FirstDynamic = LastDynamic; - } + if (CheckInitOrder && GlobalHasDynamicInitializer) + HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } @@ -977,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (CheckInitOrder && FirstDynamic && LastDynamic) - createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + if (CheckInitOrder && HasDynamicallyInitializedGlobals) + createInitializerPoisonCalls(M, ModuleName); IRB.CreateCall2(AsanRegisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); @@ -1095,6 +1095,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); @@ -1312,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() { ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); } - // This string will be parsed by the run-time (DescribeStackAddress). + // This string will be parsed by the run-time (DescribeAddressIfStack). SmallString<2048> StackDescriptionStorage; raw_svector_ostream StackDescription(StackDescriptionStorage); - StackDescription << F.getName() << " " << AllocaVec.size() << " "; + StackDescription << AllocaVec.size() << " "; // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; @@ -1348,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() { } assert(Pos == LocalStackSize); - // Write the Magic value and the frame description constant to the redzone. + // The left-most redzone has enough space for at least 4 pointers. + // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), BasePlus0); - Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, - ASan.LongSize/8)); - BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), + IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), StackDescription.str()); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, + 2 * ASan.LongSize/8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); // Poison the stack redzones at the entry. Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index 927982d2af..39de4b0401 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) { bool BlackList::isInInit(const GlobalVariable &G) const { return (isIn(*G.getParent()) || inSection("global-init", G.getName()) || - inSection("global-init-type", GetGVTypeString(G))); + inSection("global-init-type", GetGVTypeString(G)) || + inSection("global-init-src", G.getParent()->getModuleIdentifier())); } bool BlackList::inSection(const StringRef Section, diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 095b852d93..2edd151869 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,8 +29,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/PathV2.h" #include "llvm/Support/raw_ostream.h" @@ -39,33 +41,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), - UseExtraChecksum(false), NoRedZone(false), - NoFunctionNamesInData(false) { + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool Use402Format, - bool UseExtraChecksum, bool NoRedZone, - bool NoFunctionNamesInData) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(Use402Format), UseExtraChecksum(UseExtraChecksum), - NoRedZone(NoRedZone), NoFunctionNamesInData(NoFunctionNamesInData) { - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -76,6 +102,8 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteWriteoutFunctionListFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -86,23 +114,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - bool Use402Format; - bool UseExtraChecksum; - bool NoRedZone; - bool NoFunctionNamesInData; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -113,13 +140,14 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format, - bool UseExtraChecksum, - bool NoRedZone, - bool NoFunctionNamesInData) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum, - NoRedZone, NoFunctionNamesInData); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); +} + +static std::string getFunctionName(DISubprogram SP) { + if (!SP.getLinkageName().empty()) + return SP.getLinkageName(); + return SP.getName(); } namespace { @@ -257,8 +285,8 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, - bool Use402Format, bool UseExtraChecksum) { + GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -270,17 +298,16 @@ namespace { ReturnBlock = new GCOVBlock(i++, os); writeBytes(FunctionTag, 4); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); - uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum - writeGCOVString(SP.getName()); + writeGCOVString(getFunctionName(SP)); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); } @@ -355,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { SmallString<128> Filename = CU.getFilename(); sys::path::replace_extension(Filename, NewStem); - return sys::path::filename(Filename.str()); + StringRef FName = sys::path::filename(Filename); + SmallString<128> CurPath; + if (sys::fs::current_path(CurPath)) return FName; + sys::path::append(CurPath, FName.str()); + return CurPath.str(); } bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -380,10 +411,9 @@ void GCOVProfiler::emitGCNO() { std::string ErrorInfo; raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); - if (!Use402Format) - out.write("oncg*404MVLL", 12); - else - out.write("oncg*204MVLL", 12); + out.write("oncg", 4); + out.write(ReversedVersion, 4); + out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { @@ -392,7 +422,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -522,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Type *Params[] = { + PointerType::get(FTy, 0), + PointerType::get(FTy, 0) + }; + FTy = FunctionType::get(Builder.getVoidTy(), Params, false); + + // Inialize the environment and register the local writeout and flush + // functions. + Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); + Builder.CreateCall2(GCOVInit, WriteoutF, FlushF); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -581,8 +641,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Type::getInt8PtrTy(*Ctx), false); + Type *Args[] = { + Type::getInt8PtrTy(*Ctx), // const char *orig_filename + Type::getInt8PtrTy(*Ctx), // const char version[4] + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } @@ -598,9 +661,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - Type *Args[2] = { + Type *Args[3] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name + Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); @@ -611,11 +675,20 @@ Constant *GCOVProfiler::getEmitArcsFunc() { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy); +} + +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -634,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -643,7 +716,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -659,20 +732,19 @@ void GCOVProfiler::insertCounterWriteout( for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); - Builder.CreateCall(StartFile, - Builder.CreateGlobalStringPtr(FilenameGcda)); - for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator - I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - DISubprogram SP(I->second); - intptr_t ident = reinterpret_cast<intptr_t>(I->second); - Builder.CreateCall2(EmitFunction, - Builder.getInt32(ident), - NoFunctionNamesInData ? - Constant::getNullValue(Builder.getInt8PtrTy()) : - Builder.CreateGlobalStringPtr(SP.getName())); - - GlobalVariable *GV = I->first; + Builder.CreateCall2(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda), + Builder.CreateGlobalStringPtr(ReversedVersion)); + for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { + DISubprogram SP(CountersBySP[j].second); + Builder.CreateCall3( + EmitFunction, Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(getFunctionName(SP)) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); + + GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); Builder.CreateCall2(EmitArcs, @@ -682,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout( Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Builder.getInt32Ty(), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -713,7 +765,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); // Create basic blocks for function. @@ -758,18 +810,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -794,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 8ba102559b..9f353967f3 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index fce6513a97..4e75904ded 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given patter"), cl::Hidden, cl::init(0xff)); +static cl::opt<bool> ClPoisonUndef("msan-poison-undef", + cl::desc("poison undef temps"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), @@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Clean shadow (all zeroes) means all bits of the value are defined /// (initialized). - Value *getCleanShadow(Value *V) { + Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) return 0; @@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return ConstantStruct::get(ST, Vals); } + /// \brief Create a dirty shadow for a given value. + Constant *getPoisonedShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return getPoisonedShadow(ShadowTy); + } + /// \brief Create a clean (zero) origin. Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); @@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return Shadow; } if (UndefValue *U = dyn_cast<UndefValue>(V)) { - Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V); DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); (void)U; return AllOnes; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index f93c5ab4c8..299060a42f 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -56,6 +57,9 @@ static cl::opt<bool> ClInstrumentFuncEntryExit( static cl::opt<bool> ClInstrumentAtomics( "tsan-instrument-atomics", cl::init(true), cl::desc("Instrument atomics"), cl::Hidden); +static cl::opt<bool> ClInstrumentMemIntrinsics( + "tsan-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); +STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads"); STATISTIC(NumOmittedReadsFromConstantGlobals, "Number of reads from constant globals"); STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); @@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass { void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I); bool instrumentAtomic(Instruction *I); + bool instrumentMemIntrinsic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, SmallVectorImpl<Instruction*> &All); bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr); DataLayout *TD; + Type *IntptrTy; SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; IntegerType *OrdTy; @@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanAtomicThreadFence; Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; + Function *TsanVptrLoad; + Function *MemmoveFn, *MemcpyFn, *MemsetFn; }; } // namespace @@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); + + MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL)); + MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL)); + MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy, NULL)); } bool ThreadSanitizer::doInitialization(Module &M) { @@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); + IntptrTy = IRB.getIntPtrTy(TD); Value *TsanInit = M.getOrInsertFunction("__tsan_init", IRB.getVoidTy(), NULL); appendToGlobalCtors(M, cast<Function>(TsanInit), 0); @@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; SmallVector<Instruction*, 8> AtomicAccesses; + SmallVector<Instruction*, 8> MemIntrinCalls; bool Res = false; bool HasCalls = false; @@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { else if (isa<ReturnInst>(BI)) RetVec.push_back(BI); else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { + if (isa<MemIntrinsic>(BI)) + MemIntrinCalls.push_back(BI); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } @@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Res |= instrumentAtomic(AtomicAccesses[i]); } + if (ClInstrumentMemIntrinsics) + for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) { + Res |= instrumentMemIntrinsic(MemIntrinCalls[i]); + } + // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); @@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { NumInstrumentedVtableWrites++; return true; } + if (!IsWrite && isVtableAccess(I)) { + IRB.CreateCall(TsanVptrLoad, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + NumInstrumentedVtableReads++; + return true; + } Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); if (IsWrite) NumInstrumentedWrites++; @@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { return IRB->getInt32(v); } +// If a memset intrinsic gets inlined by the code gen, we will miss races on it. +// So, we either need to ensure the intrinsic is not inlined, or instrument it. +// We do not instrument memset/memmove/memcpy intrinsics (too complicated), +// instead we simply replace them with regular function calls, which are then +// intercepted by the run-time. +// Since tsan is running after everyone else, the calls should not be +// replaced back with intrinsics. If that becomes wrong at some point, +// we will need to call e.g. __tsan_memset to avoid the intrinsics. +bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { + IRBuilder<> IRB(I); + if (MemSetInst *M = dyn_cast<MemSetInst>(I)) { + IRB.CreateCall3(MemsetFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) { + IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } + return false; +} + // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x // standards. For background see C++11 standard. A slightly older, publically // available draft of the standard (not entirely up-to-date, but close enough diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 5aada9c373..8f917aeb37 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, switch (Class) { case IC_Autorelease: case IC_AutoreleaseRV: + case IC_IntrinsicUser: case IC_User: // These operations never directly modify a reference count. return false; diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index 53a31b0de1..373168e898 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -30,6 +30,7 @@ using namespace llvm::objcarc; bool llvm::objcarc::EnableARCOpts; static cl::opt<bool, true> EnableARCOptimizations("enable-objc-arc-opts", + cl::desc("enable/disable all ARC Optimizations"), cl::location(EnableARCOpts), cl::init(true)); diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index e062b66555..39670f339e 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_copyWeak") || M.getNamedValue("objc_retainedObject") || M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); + M.getNamedValue("objc_unretainedPointer") || + M.getNamedValue("clang.arc.use"); } /// \enum InstructionClass @@ -89,6 +90,7 @@ enum InstructionClass { IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) IC_StoreStrong, ///< objc_storeStrong (derived) + IC_IntrinsicUser, ///< clang.arc.use IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -97,6 +99,13 @@ enum InstructionClass { raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class); +/// \brief Test if the given class is a kind of user. +inline static bool IsUser(InstructionClass Class) { + return Class == IC_User || + Class == IC_CallOrUser || + Class == IC_IntrinsicUser; +} + /// \brief Test if the given class is objc_retain or equivalent. static inline bool IsRetain(InstructionClass Class) { return Class == IC_Retain || @@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) { /// \brief Test if the given class represents instructions which return their /// argument verbatim. static inline bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. return Class == IC_Retain || Class == IC_RetainRV || Class == IC_Autorelease || Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || Class == IC_NoopCast; } @@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); } -static inline bool isNullOrUndef(const Value *V) { +static inline bool IsNullOrUndef(const Value *V) { return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); } -static inline bool isNoopInstruction(const Instruction *I) { +static inline bool IsNoopInstruction(const Instruction *I) { return isa<BitCastInst>(I) || (isa<GetElementPtrInst>(I) && cast<GetElementPtrInst>(I)->hasAllZeroIndices()); diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 1c13d1cbea..c43f4f4a44 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -66,6 +66,8 @@ namespace { Constant *RetainAutoreleaseCallee; /// Declaration for objc_retainAutoreleaseReturnValue(). Constant *RetainAutoreleaseRVCallee; + /// Declaration for objc_retainAutoreleasedReturnValue(). + Constant *RetainRVCallee; /// The inline asm string to insert between calls and RetainRV calls to make /// the optimization work on targets which need it. @@ -77,9 +79,12 @@ namespace { SmallPtrSet<CallInst *, 8> StoreStrongCalls; Constant *getStoreStrongCallee(Module *M); + Constant *getRetainRVCallee(Module *M); Constant *getRetainAutoreleaseCallee(Module *M); Constant *getRetainAutoreleaseRVCallee(Module *M); + bool OptimizeRetainCall(Function &F, Instruction *Retain); + bool ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, SmallPtrSet<Instruction *, 4> @@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { return RetainAutoreleaseRVCallee; } +Constant *ObjCARCContract::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attribute); + } + return RetainRVCallee; +} + +/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a +/// return value. We do this late so we do not disrupt the dataflow analysis in +/// ObjCARCOpt. +bool +ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) { + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); + if (!Call) + return false; + if (Call->getParent() != Retain->getParent()) + return false; + + // Check that the call is next to the retain. + BasicBlock::const_iterator I = Call; + ++I; + while (IsNoopInstruction(I)) ++I; + if (&*I != Retain) + return false; + + // Turn it to an objc_retainAutoreleasedReturnValue. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "Transforming objc_retain => " + "objc_retainAutoreleasedReturnValue since the operand is a " + "return value.\nOld: "<< *Retain << "\n"); + + // We do not have to worry about tail calls/does not throw since + // retain/retainRV have the same properties. + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); + + DEBUG(dbgs() << "New: " << *Retain << "\n"); + return true; +} + /// Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, @@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) { StoreStrongCallee = 0; RetainAutoreleaseCallee = 0; RetainAutoreleaseRVCallee = 0; + RetainRVCallee = 0; // Initialize RetainRVMarker. RetainRVMarker = 0; @@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) { // objc_retainBlock does not necessarily return its argument. InstructionClass Class = GetBasicInstructionClass(Inst); switch (Class) { - case IC_Retain: case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: break; @@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) continue; break; + case IC_Retain: + // Attempt to convert retains to retainrvs if they are next to function + // calls. + if (!OptimizeRetainCall(F, Inst)) + break; + // If we succeed in our optimization, fall through. + // FALLTHROUGH case IC_RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, @@ -410,7 +473,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { break; } --BBI; - } while (isNoopInstruction(BBI)); + } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " @@ -429,7 +492,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; @@ -453,6 +516,10 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; + case IC_IntrinsicUser: + // Remove calls to @clang.arc.use(...). + Inst->eraseFromParent(); + continue; default: continue; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 9c14949877..43e2e20035 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -190,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { do { const Value *V = Worklist.pop_back_val(); - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n"); + DEBUG(dbgs() << "Visiting: " << *V << "\n"); for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const User *UUser = *UI; - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n"); + DEBUG(dbgs() << "User: " << *UUser << "\n"); // Special - Use by a call (callee or argument) is not considered // to be an escape. @@ -206,11 +207,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { case IC_StoreStrong: case IC_Autorelease: case IC_AutoreleaseRV: { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer " - "arguments. Pointer Escapes!\n"); + DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n"); // These special functions make copies of their pointer arguments. return true; } + case IC_IntrinsicUser: + // Use by the use intrinsic is not an escape. + continue; case IC_User: case IC_None: // Use by an instruction which copies the value is an escape if the @@ -219,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { if (VisitedSet.insert(UUser)) { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. " - "Ptr escapes if result escapes. Adding to list.\n"); + DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes." + " Adding to list.\n"); Worklist.push_back(UUser); } else { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node." - "\n"); + DEBUG(dbgs() << "Already visited node.\n"); } continue; } @@ -241,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { continue; } // Otherwise, conservatively assume an escape. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n"); + DEBUG(dbgs() << "Assuming ptr escapes.\n"); return true; } } while (!Worklist.empty()); // No escapes found. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n"); + DEBUG(dbgs() << "Ptr does not escape.\n"); return false; } @@ -301,6 +303,16 @@ STATISTIC(NumRets, "Number of return value forwarding " "retain+autoreleaes eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +STATISTIC(NumRetainsBeforeOpt, + "Number of retains before optimization."); +STATISTIC(NumReleasesBeforeOpt, + "Number of releases before optimization."); +#ifndef NDEBUG +STATISTIC(NumRetainsAfterOpt, + "Number of retains after optimization."); +STATISTIC(NumReleasesAfterOpt, + "Number of releases after optimization."); +#endif namespace { /// \enum Sequence @@ -371,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { namespace { /// \brief Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverese sequence. + /// reverse sequence. struct RRInfo { /// After an objc_retain, the reference count of the referenced /// object is known to be positive. Similarly, before an objc_release, the @@ -387,10 +399,6 @@ namespace { /// KnownSafe is true when either of these conditions is satisfied. bool KnownSafe; - /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain - /// calls). - bool IsRetainBlock; - /// True of the objc_release calls are all marked with the "tail" keyword. bool IsTailCallRelease; @@ -407,17 +415,18 @@ namespace { SmallPtrSet<Instruction *, 2> ReverseInsertPts; RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); + + bool IsTrackingImpreciseReleases() { + return ReleaseMetadata != 0; + } }; } void RRInfo::clear() { KnownSafe = false; - IsRetainBlock = false; IsTailCallRelease = false; ReleaseMetadata = 0; Calls.clear(); @@ -431,7 +440,7 @@ namespace { /// True if the reference count is known to be incremented. bool KnownPositiveRefCount; - /// True of we've seen an opportunity for partial RR elimination, such as + /// True if we've seen an opportunity for partial RR elimination, such as /// pushing calls into a CFG triangle or into one side of a CFG diamond. bool Partial; @@ -451,15 +460,16 @@ namespace { KnownPositiveRefCount = true; } - void ClearRefCount() { + void ClearKnownPositiveRefCount() { KnownPositiveRefCount = false; } - bool IsKnownIncremented() const { + bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; } void SetSeq(Sequence NewSeq) { + DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n"); Seq = NewSeq; } @@ -472,7 +482,8 @@ namespace { } void ResetSequenceProgress(Sequence NewSeq) { - Seq = NewSeq; + DEBUG(dbgs() << "Resetting sequence progress.\n"); + SetSeq(NewSeq); Partial = false; RRI.clear(); } @@ -486,10 +497,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - // If we're not in a sequence (anymore), drop all associated state. if (Seq == S_None) { Partial = false; @@ -698,6 +705,287 @@ void BBState::MergeSucc(const BBState &Other) { MI->second.Merge(PtrState(), /*TopDown=*/false); } +// Only enable ARC Annotations if we are building a debug version of +// libObjCARCOpts. +#ifndef NDEBUG +#define ARC_ANNOTATIONS +#endif + +// Define some macros along the lines of DEBUG and some helper functions to make +// it cleaner to create annotations in the source code and to no-op when not +// building in debug mode. +#ifdef ARC_ANNOTATIONS + +#include "llvm/Support/CommandLine.h" + +/// Enable/disable ARC sequence annotations. +static cl::opt<bool> +EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false), + cl::desc("Enable emission of arc data flow analysis " + "annotations")); +static cl::opt<bool> +DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false), + cl::desc("Disable check for cfg hazards when " + "annotating")); +static cl::opt<std::string> +ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", + cl::init(""), + cl::desc("filter out all data flow annotations " + "but those that apply to the given " + "target llvm identifier.")); + +/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an +/// instruction so that we can track backwards when post processing via the llvm +/// arc annotation processor tool. If the function is an +static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, + Value *Ptr) { + MDString *Hash = 0; + + // If pointer is a result of an instruction and it does not have a source + // MDNode it, attach a new MDNode onto it. If pointer is a result of + // an instruction and does have a source MDNode attached to it, return a + // reference to said Node. Otherwise just return 0. + if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) { + MDNode *Node; + if (!(Node = Inst->getMetadata(NodeId))) { + // We do not have any node. Generate and attatch the hash MDString to the + // instruction. + + // We just use an MDString to ensure that this metadata gets written out + // of line at the module level and to provide a very simple format + // encoding the information herein. Both of these makes it simpler to + // parse the annotations by a simple external program. + std::string Str; + raw_string_ostream os(Str); + os << "(" << Inst->getParent()->getParent()->getName() << ",%" + << Inst->getName() << ")"; + + Hash = MDString::get(Inst->getContext(), os.str()); + Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash)); + } else { + // We have a node. Grab its hash and return it. + assert(Node->getNumOperands() == 1 && + "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand."); + Hash = cast<MDString>(Node->getOperand(0)); + } + } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) { + std::string str; + raw_string_ostream os(str); + os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName() + << ")"; + Hash = MDString::get(Arg->getContext(), os.str()); + } + + return Hash; +} + +static std::string SequenceToString(Sequence A) { + std::string str; + raw_string_ostream os(str); + os << A; + return os.str(); +} + +/// Helper function to change a Sequence into a String object using our overload +/// for raw_ostream so we only have printing code in one location. +static MDString *SequenceToMDString(LLVMContext &Context, + Sequence A) { + return MDString::get(Context, SequenceToString(A)); +} + +/// A simple function to generate a MDNode which describes the change in state +/// for Value *Ptr caused by Instruction *Inst. +static void AppendMDNodeToInstForPtr(unsigned NodeId, + Instruction *Inst, + Value *Ptr, + MDString *PtrSourceMDNodeID, + Sequence OldSeq, + Sequence NewSeq) { + MDNode *Node = 0; + Value *tmp[3] = {PtrSourceMDNodeID, + SequenceToMDString(Inst->getContext(), + OldSeq), + SequenceToMDString(Inst->getContext(), + NewSeq)}; + Node = MDNode::get(Inst->getContext(), + ArrayRef<Value*>(tmp, 3)); + + Inst->setMetadata(NodeId, Node); +} + +/// Add to the beginning of the basic block llvm.ptr.annotations which show the +/// state of a pointer at the entrance to a basic block. +static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + // If we have a target identifier, make sure that we match it before + // continuing. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Add to the end of the basic block llvm.ptr.annotations which show the state +/// of the pointer at the bottom of the basic block. +static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + // If we have a target identifier, make sure that we match it before emitting + // an annotation. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, llvm::prior(BB->end())); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Adds a source annotation to pointer and a state change annotation to Inst +/// referencing the source annotation and the old/new state of pointer. +static void GenerateARCAnnotation(unsigned InstMDId, + unsigned PtrMDId, + Instruction *Inst, + Value *Ptr, + Sequence OldSeq, + Sequence NewSeq) { + if (EnableARCAnnotations) { + // If we have a target identifier, make sure that we match it before + // emitting an annotation. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + + // First generate the source annotation on our pointer. This will return an + // MDString* if Ptr actually comes from an instruction implying we can put + // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), + // then we know that our pointer is from an Argument so we put a reference + // to the argument number. + // + // The point of this is to make it easy for the + // llvm-arc-annotation-processor tool to cross reference where the source + // pointer is in the LLVM IR since the LLVM IR parser does not submit such + // information via debug info for backends to use (since why would anyone + // need such a thing from LLVM IR besides in non standard cases + // [i.e. this]). + MDString *SourcePtrMDNode = + AppendMDNodeToSourcePtr(PtrMDId, Ptr); + AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq, + NewSeq); + } +} + +// The actual interface for accessing the above functionality is defined via +// some simple macros which are defined below. We do this so that the user does +// not need to pass in what metadata id is needed resulting in cleaner code and +// additionally since it provides an easy way to conditionally no-op all +// annotation support in a non-debug build. + +/// Use this macro to annotate a sequence state change when processing +/// instructions bottom up, +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) +/// Use this macro to annotate a sequence state change when processing +/// instructions top down. +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) + +#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \ + do { \ + if (EnableARCAnnotations) { \ + for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \ + E = (_states)._direction##_ptr_end(); I != E; ++I) { \ + Value *Ptr = const_cast<Value*>(I->first); \ + Sequence Seq = I->second.GetSeq(); \ + GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \ + } \ + } \ + } while (0) + +#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \ + Entrance, bottom_up) +#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \ + Terminator, bottom_up) +#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \ + Entrance, top_down) +#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \ + Terminator, top_down) + +#else // !ARC_ANNOTATION +// If annotations are off, noop. +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) +#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock) +#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock) +#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock) +#define ANNOTATE_TOPDOWN_BBEND(states, basicblock) +#endif // !ARC_ANNOTATION + namespace { /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { @@ -711,9 +999,6 @@ namespace { /// them. These are initialized lazily to avoid cluttering up the Module /// with unused declarations. - /// Declaration for ObjC runtime function - /// objc_retainAutoreleasedReturnValue. - Constant *RetainRVCallee; /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. Constant *AutoreleaseRVCallee; /// Declaration for ObjC runtime function objc_release. @@ -738,7 +1023,15 @@ namespace { /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. unsigned NoObjCARCExceptionsMDKind; - Constant *getRetainRVCallee(Module *M); +#ifdef ARC_ANNOTATIONS + /// The Metadata Kind for llvm.arc.annotation.bottomup metadata. + unsigned ARCAnnotationBottomUpMDKind; + /// The Metadata Kind for llvm.arc.annotation.topdown metadata. + unsigned ARCAnnotationTopDownMDKind; + /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata. + unsigned ARCAnnotationProvenanceSourceMDKind; +#endif // ARC_ANNOATIONS + Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); Constant *getRetainCallee(Module *M); @@ -747,10 +1040,11 @@ namespace { bool IsRetainBlockOptimizable(const Instruction *Inst); - void OptimizeRetainCall(Function &F, Instruction *Retain); bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, InstructionClass &Class); + bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock, + InstructionClass &Class); void OptimizeIndividualCalls(Function &F); void CheckForCFGHazards(const BasicBlock *BB, @@ -804,6 +1098,10 @@ namespace { void OptimizeReturns(Function &F); +#ifndef NDEBUG + void GatherStatistics(Function &F, bool AfterOptimization = false); +#endif + virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); @@ -851,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { return true; } -Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { - if (!RetainRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainRVCallee = - M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, - Attribute); - } - return RetainRVCallee; -} - Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { if (!AutoreleaseRVCallee) { LLVMContext &C = M->getContext(); @@ -946,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } -/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a -/// return value. -void -ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - ImmutableCallSite CS(GetObjCArg(Retain)); - const Instruction *Call = CS.getInstruction(); - if (!Call) return; - if (Call->getParent() != Retain->getParent()) return; - - // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I != Retain) - return; - - // Turn it to an objc_retainAutoreleasedReturnValue.. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " - "objc_retain => objc_retainAutoreleasedReturnValue" - " since the operand is a return value.\n" - " Old: " - << *Retain << "\n"); - - cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *Retain << "\n"); -} - /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is /// not a return value. Or, if it can be paired with an /// objc_autoreleaseReturnValue, delete the pair and return true. @@ -990,14 +1240,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { if (Call->getParent() == RetainRV->getParent()) { BasicBlock::const_iterator I = Call; ++I; - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { BasicBlock *RetainRVParent = RetainRV->getParent(); if (II->getNormalDest() == RetainRVParent) { BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } @@ -1008,15 +1258,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // pointer. In this case, we can delete the pair. BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); + do --I; while (I != Begin && IsNoopInstruction(I)); if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && GetObjCArg(I) == Arg) { Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" - << " Erasing " << *RetainRV - << "\n"); + DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n" + << "Erasing " << *RetainRV << "\n"); EraseInstruction(I); EraseInstruction(RetainRV); @@ -1028,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " - "objc_retainAutoreleasedReturnValue => " + DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " "objc_retain since the operand is not a return value.\n" - " Old: " - << *RetainRV << "\n"); + "Old = " << *RetainRV << "\n"); cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); - DEBUG(dbgs() << " New: " - << *RetainRV << "\n"); + DEBUG(dbgs() << "New = " << *RetainRV << "\n"); return false; } @@ -1066,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " - "objc_autoreleaseReturnValue => " + DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => " "objc_autorelease since its operand is not used as a return " "value.\n" - " Old: " - << *AutoreleaseRV << "\n"); + "Old = " << *AutoreleaseRV << "\n"); CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); AutoreleaseRVCI-> @@ -1079,14 +1323,48 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. Class = IC_Autorelease; - DEBUG(dbgs() << " New: " - << *AutoreleaseRV << "\n"); + DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); + +} + +// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain +// calls. +// +// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and +// does not escape (following the rules of block escaping), strength reduce the +// objc_retainBlock to an objc_retain. +// +// TODO: If an objc_retainBlock call is dominated period by a previous +// objc_retainBlock call, strength reduce the objc_retainBlock to an +// objc_retain. +bool +ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst, + InstructionClass &Class) { + assert(GetBasicInstructionClass(Inst) == Class); + assert(IC_RetainBlock == Class); + + // If we can not optimize Inst, return false. + if (!IsRetainBlockOptimizable(Inst)) + return false; + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n"); + DEBUG(dbgs() << "Old: " << *Inst << "\n"); + CallInst *RetainBlock = cast<CallInst>(Inst); + RetainBlock->setCalledFunction(getRetainCallee(F.getParent())); + // Remove copy_on_escape metadata. + RetainBlock->setMetadata(CopyOnEscapeMDKind, 0); + Class = IC_Retain; + DEBUG(dbgs() << "New: " << *Inst << "\n"); + return true; } /// Visit each call, one at a time, and make simplifications without doing any /// additional analysis. void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); // Reset all the flags in preparation for recomputing them. UsedInThisFunction = 0; @@ -1096,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { InstructionClass Class = GetBasicInstructionClass(Inst); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " - << Class << "; " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); switch (Class) { default: break; @@ -1113,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_NoopCast: Changed = true; ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" - " " << *Inst << "\n"); + DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); EraseInstruction(Inst); continue; @@ -1125,18 +1401,15 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_InitWeak: case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { + if (IsNullOrUndef(CI->getArgOperand(0))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), CI); llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); + DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." + "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); CI->replaceAllUsesWith(NewValue); CI->eraseFromParent(); continue; @@ -1146,8 +1419,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_CopyWeak: case IC_MoveWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(0)) || + IsNullOrUndef(CI->getArgOperand(1))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), @@ -1155,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CI); llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); + DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." + "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); CI->replaceAllUsesWith(NewValue); CI->eraseFromParent(); @@ -1167,8 +1437,14 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } break; } + case IC_RetainBlock: + // If we strength reduce an objc_retainBlock to an objc_retain, continue + // onto the objc_retain peephole optimizations. Otherwise break. + if (!OptimizeRetainBlockCall(F, Inst, Class)) + break; + // FALLTHROUGH case IC_Retain: - OptimizeRetainCall(F, Inst); + ++NumRetainsBeforeOpt; break; case IC_RetainRV: if (OptimizeRetainRVCall(F, Inst)) @@ -1177,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_AutoreleaseRV: OptimizeAutoreleaseRVCall(F, Inst, Class); break; + case IC_Release: + ++NumReleasesBeforeOpt; + break; } // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. @@ -1193,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *NewCall = CallInst::Create(getReleaseCallee(F.getParent()), Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, - MDNode::get(C, ArrayRef<Value *>())); + NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None)); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " - "objc_autorelease(x) with objc_release(x) since x is " - "otherwise unused.\n" - " Old: " << *Call << - "\n New: " << - *NewCall << "\n"); + DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " + "since x is otherwise unused.\nOld: " << *Call << "\nNew: " + << *NewCall << "\n"); EraseInstruction(Call); Inst = NewCall; @@ -1213,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // a tail keyword. if (IsAlwaysTail(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" - " to function since it can never be passed stack args: " << *Inst << - "\n"); + DEBUG(dbgs() << "Adding tail keyword to function since it can never be " + "passed stack args: " << *Inst << "\n"); cast<CallInst>(Inst)->setTailCall(); } @@ -1223,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // semantics of ARC truly do not do so. if (IsNeverTail(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " - "keyword from function: " << *Inst << + DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst << "\n"); cast<CallInst>(Inst)->setTailCall(false); } @@ -1232,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Set nounwind as needed. if (IsNoThrow(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" - " class. Setting nounwind on: " << *Inst << "\n"); + DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst + << "\n"); cast<CallInst>(Inst)->setDoesNotThrow(); } @@ -1245,11 +1518,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { const Value *Arg = GetObjCArg(Inst); // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { + if (IsNullOrUndef(Arg)) { Changed = true; ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " - " null are no-ops. Erasing: " << *Inst << "\n"); + DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst + << "\n"); EraseInstruction(Inst); continue; } @@ -1280,7 +1553,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) + if (IsNullOrUndef(Incoming)) HasNull = true; else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) .getNumSuccessors() != 1) { @@ -1334,7 +1607,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { + if (!IsNullOrUndef(Incoming)) { CallInst *Clone = cast<CallInst>(CInst->clone()); Value *Op = PN->getIncomingValue(i); Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); @@ -1343,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { Clone->setArgOperand(0, Op); Clone->insertBefore(InsertPos); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " + DEBUG(dbgs() << "Cloning " << *CInst << "\n" - " And inserting " - "clone at " << *InsertPos << "\n"); + "And inserting clone at " << *InsertPos << "\n"); Worklist.push_back(std::make_pair(Clone, Incoming)); } } @@ -1358,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } } while (!Worklist.empty()); } - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); +} + +/// If we have a top down pointer in the S_Use state, make sure that there are +/// no CFG hazards by checking the states of various bottom up pointers. +static void CheckForUseCFGHazard(const Sequence SuccSSeq, + const bool SuccSRRIKnownSafe, + PtrState &S, + bool &SomeSuccHasSame, + bool &AllSuccsHaveSame, + bool &ShouldContinue) { + switch (SuccSSeq) { + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + ShouldContinue = true; + break; + } + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + case S_None: + llvm_unreachable("This should have been handled earlier."); + } +} + +/// If we have a Top Down pointer in the S_CanRelease state, make sure that +/// there are no CFG hazards by checking the states of various bottom up +/// pointers. +static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, + const bool SuccSRRIKnownSafe, + PtrState &S, + bool &SomeSuccHasSame, + bool &AllSuccsHaveSame) { + switch (SuccSSeq) { + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + case S_None: + llvm_unreachable("This should have been handled earlier."); + } } /// Check for critical edges, loop boundaries, irreducible control flow, or @@ -1371,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // If any top-down local-use or possible-dec has a succ which is earlier in // the sequence, forget it. for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) - switch (I->second.GetSeq()) { - default: break; - case S_Use: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: - case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_Use: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) + E = MyStates.top_down_ptr_end(); I != E; ++I) { + PtrState &S = I->second; + const Sequence Seq = I->second.GetSeq(); + + // We only care about S_Retain, S_CanRelease, and S_Use. + if (Seq == S_None) + continue; + + // Make sure that if extra top down states are added in the future that this + // code is updated to handle it. + assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && + "Unknown top down sequence state."); + + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + const DenseMap<const BasicBlock *, BBState>::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + const Sequence SuccSSeq = SuccS.GetSeq(); + + // If bottom up, the pointer is in an S_None state, clear the sequence + // progress since the sequence in the bottom up state finished + // suggesting a mismatch in between retains/releases. This is true for + // all three cases that we are handling here: S_Retain, S_Use, and + // S_CanRelease. + if (SuccSSeq == S_None) { S.ClearSequenceProgress(); - break; - } - case S_CanRelease: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } + continue; + } + + // If we have S_Use or S_CanRelease, perform our check for cfg hazard + // checks. + const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + + // *NOTE* We do not use Seq from above here since we are allowing for + // S.GetSeq() to change while we are visiting basic blocks. + switch(S.GetSeq()) { + case S_Use: { + bool ShouldContinue = false; + CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, + SomeSuccHasSame, AllSuccsHaveSame, + ShouldContinue); + if (ShouldContinue) continue; - } - case S_CanRelease: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + break; + } + case S_CanRelease: { + CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, + S, SomeSuccHasSame, + AllSuccsHaveSame); + break; + } + case S_Retain: + case S_None: + case S_Stop: + case S_Release: + case S_MovableRelease: + break; } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } } + + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + } } bool @@ -1482,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, InstructionClass Class = GetInstructionClass(Inst); const Value *Arg = 0; + DEBUG(dbgs() << "Class: " << Class << "\n"); + switch (Class) { case IC_Release: { Arg = GetObjCArg(Inst); @@ -1496,27 +1804,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // pairs by making PtrState hold a stack of states, but this is // simple and avoids adding overhead for the non-nested case. if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { - DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " - "releases (i.e. a release pair)\n"); + DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n"); NestingDetected = true; } MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; + ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq); + S.ResetSequenceProgress(NewSeq); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1524,20 +1831,22 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - switch (S.GetSeq()) { + Sequence OldSeq = S.GetSeq(); + switch (OldSeq) { case S_Stop: case S_Release: case S_MovableRelease: case S_Use: - S.RRI.ReverseInsertPts.clear(); + // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an + // imprecise release, clear our reverse insertion points. + if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases()) + S.RRI.ReverseInsertPts.clear(); // FALL THROUGH case S_CanRelease: // Don't do retain+release tracking for IC_RetainRV, because it's // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; + if (Class != IC_RetainRV) Retains[Inst] = S.RRI; - } S.ClearSequenceProgress(); break; case S_None: @@ -1545,7 +1854,9 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } - return NestingDetected; + ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); + // A retain moving bottom up can be a use. + break; } case IC_AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. @@ -1571,10 +1882,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr + << "\n"); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Use: S.SetSeq(S_CanRelease); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq()); continue; case S_CanRelease: case S_Release: @@ -1592,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Release: case S_MovableRelease: if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr + << "\n"); assert(S.RRI.ReverseInsertPts.empty()); // If this is an invoke instruction, we're scanning it as part of // one of its successor blocks, since we can't insert code after it @@ -1601,10 +1917,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, else S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } else if (Seq == S_Release && IsUser(Class)) { + DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr + << "\n"); // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); + ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); assert(S.RRI.ReverseInsertPts.empty()); // As above; handle invoke specially. if (isa<InvokeInst>(Inst)) @@ -1614,8 +1933,12 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, } break; case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr + << "\n"); S.SetSeq(S_Use); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } break; case S_CanRelease: case S_Use: @@ -1633,6 +1956,9 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains) { + + DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); + bool NestingDetected = false; BBState &MyStates = BBStates[BB]; @@ -1654,6 +1980,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, } } + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + ANNOTATE_BOTTOMUP_BBEND(MyStates, BB); + // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { Instruction *Inst = llvm::prior(I); @@ -1662,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (isa<InvokeInst>(Inst)) continue; - DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } @@ -1677,6 +2007,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB); + return NestingDetected; } @@ -1690,11 +2024,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (Class) { case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1714,9 +2047,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, if (S.GetSeq() == S_Retain) NestingDetected = true; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain); S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.Calls.insert(Inst); } @@ -1730,17 +2063,23 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); + + Sequence OldSeq = S.GetSeq(); - switch (S.GetSeq()) { + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + + switch (OldSeq) { case S_Retain: case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); + if (OldSeq == S_Retain || ReleaseMetadata != 0) + S.RRI.ReverseInsertPts.clear(); // FALL THROUGH case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); Releases[Inst] = S.RRI; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); S.ClearSequenceProgress(); break; case S_None: @@ -1776,10 +2115,13 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr + << "\n"); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease); assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); @@ -1801,8 +2143,12 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible direct uses. switch (Seq) { case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr + << "\n"); S.SetSeq(S_Use); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); + } break; case S_Retain: case S_Use: @@ -1822,6 +2168,7 @@ bool ObjCARCOpt::VisitTopDown(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, DenseMap<Value *, RRInfo> &Releases) { + DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); bool NestingDetected = false; BBState &MyStates = BBStates[BB]; @@ -1843,15 +2190,26 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, } } + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + ANNOTATE_TOPDOWN_BBSTART(MyStates, BB); + // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + ANNOTATE_TOPDOWN_BBEND(MyStates, BB); + +#ifdef ARC_ANNOTATIONS + if (!(EnableARCAnnotations && DisableCheckForCFGHazards)) +#endif CheckForCFGHazards(BB, BBStates, MyStates); return NestingDetected; } @@ -1983,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Type *ArgTy = Arg->getType(); Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); + DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); + // Insert the new retain and release calls. for (SmallPtrSet<Instruction *, 2>::const_iterator PI = ReleasesToMove.ReverseInsertPts.begin(), @@ -1991,20 +2351,12 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); + CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef<Value *>())); - else - Call->setTailCall(); + Call->setTailCall(); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); + DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n" + "At insertion point: " << *InsertPt << "\n"); } for (SmallPtrSet<Instruction *, 2>::const_iterator PI = RetainsToMove.ReverseInsertPts.begin(), @@ -2021,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, if (ReleasesToMove.IsTailCallRelease) Call->setTailCall(); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); + DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n" + "At insertion point: " << *InsertPt << "\n"); } // Delete the original retain and release calls. @@ -2034,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRetain = *AI; Retains.blot(OrigRetain); DeadInsts.push_back(OrigRetain); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << - "\n"); + DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); } for (SmallPtrSet<Instruction *, 2>::const_iterator AI = ReleasesToMove.Calls.begin(), @@ -2043,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRelease = *AI; Releases.erase(OrigRelease); DeadInsts.push_back(OrigRelease); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease - << "\n"); + DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); } + } bool @@ -2075,7 +2424,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> unsigned OldCount = 0; unsigned NewCount = 0; bool FirstRelease = true; - bool FirstRetain = true; for (;;) { for (SmallVectorImpl<Instruction *>::const_iterator NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { @@ -2156,16 +2504,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> OldDelta += PathCount; OldCount += PathCount; - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - return false; - // Collect the optimal insertion points. if (!KnownSafe) for (SmallPtrSet<Instruction *, 2>::const_iterator @@ -2210,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) return false; +#ifdef ARC_ANNOTATIONS + // Do not move calls if ARC annotations are requested. + if (EnableARCAnnotations) + return false; +#endif // ARC_ANNOTATIONS + Changed = true; assert(OldCount != 0 && "Unreachable code?"); NumRRs += OldCount - NewCount; @@ -2228,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, Module *M) { + DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); + bool AnyPairsCompletelyEliminated = false; RRInfo RetainsToMove; RRInfo ReleasesToMove; @@ -2243,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> Instruction *Retain = cast<Instruction>(V); - DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain - << "\n"); + DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); Value *Arg = GetObjCArg(Retain); @@ -2295,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> /// Weak pointer optimizations. void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); + // First, do memdep-style RLE and S2L optimizations. We can't use memdep // itself because it uses AliasAnalysis and we need to do provenance // queries instead. for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << - "\n"); + DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); InstructionClass Class = GetBasicInstructionClass(Inst); if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) @@ -2392,6 +2738,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { goto clobbered; case IC_AutoreleasepoolPush: case IC_None: + case IC_IntrinsicUser: case IC_User: // Weak pointers are only modified through the weak entry points // (and arbitrary calls, which could call the weak entry points). @@ -2449,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { done:; } } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); - } /// Identify program paths which execute sequences of retains and releases which @@ -2476,6 +2820,88 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { NestingDetected; } +/// Check if there is a dependent call earlier that does not have anything in +/// between the Retain and the call that can affect the reference count of their +/// shared pointer argument. Note that Retain need not be in BB. +static bool +HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain, + SmallPtrSet<Instruction *, 4> &DepInsts, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, + DepInsts, Visited, PA); + if (DepInsts.size() != 1) + return false; + + CallInst *Call = + dyn_cast_or_null<CallInst>(*DepInsts.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + return false; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + return false; + + return true; +} + +/// Find a dependent retain that precedes the given autorelease for which there +/// is nothing in between the two instructions that can affect the ref count of +/// Arg. +static CallInst * +FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, + Instruction *Autorelease, + SmallPtrSet<Instruction *, 4> &DepInsts, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DepInsts, Visited, PA); + if (DepInsts.size() != 1) + return 0; + + CallInst *Retain = + dyn_cast_or_null<CallInst>(*DepInsts.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) { + return 0; + } + + return Retain; +} + +/// Look for an ``autorelease'' instruction dependent on Arg such that there are +/// no instructions dependent on Arg that need a positive ref count in between +/// the autorelease and the ret. +static CallInst * +FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, + ReturnInst *Ret, + SmallPtrSet<Instruction *, 4> &DepInsts, + SmallPtrSet<const BasicBlock *, 4> &V, + ProvenanceAnalysis &PA) { + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DepInsts, V, PA); + if (DepInsts.size() != 1) + return 0; + + CallInst *Autorelease = + dyn_cast_or_null<CallInst>(*DepInsts.begin()); + if (!Autorelease) + return 0; + InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + return 0; + if (GetObjCArg(Autorelease) != Arg) + return 0; + + return Autorelease; +} + /// Look for this pattern: /// \code /// %call = call i8* @something(...) @@ -2484,122 +2910,91 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { /// ret i8* %3 /// \endcode /// And delete the retain and autorelease. -/// -/// Otherwise if it's just this: -/// \code -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// convert the autorelease to autoreleaseRV. void ObjCARCOpt::OptimizeReturns(Function &F) { if (!F.getReturnType()->isPointerTy()) return; + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); + SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { BasicBlock *BB = FI; ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); + DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); - if (!Ret) continue; + if (!Ret) + continue; const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); - FindDependencies(NeedsPositiveRetainCount, Arg, - BB, Ret, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Autorelease = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - if (!Autorelease) - goto next_block; - InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); - if (!IsAutorelease(AutoreleaseClass)) - goto next_block; - if (GetObjCArg(Autorelease) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Check that there is nothing that can affect the reference - // count between the autorelease and the retain. - FindDependencies(CanChangeRetainCount, Arg, - BB, Autorelease, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Retain = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - - // Check that we found a retain with the same argument. - if (!Retain || - !IsRetain(GetBasicInstructionClass(Retain)) || - GetObjCArg(Retain) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Convert the autorelease to an autoreleaseRV, since it's - // returning the value. - if (AutoreleaseClass == IC_Autorelease) { - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease " - "=> autoreleaseRV since it's returning a value.\n" - " In: " << *Autorelease - << "\n"); - Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); - DEBUG(dbgs() << " Out: " << *Autorelease - << "\n"); - Autorelease->setTailCall(); // Always tail call autoreleaseRV. - AutoreleaseClass = IC_AutoreleaseRV; - } - - // Check that there is nothing that can affect the reference - // count between the retain and the call. - // Note that Retain need not be in BB. - FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, - DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - { - CallInst *Call = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + // Look for an ``autorelease'' instruction that is a predecessor of Ret and + // dependent on Arg such that there are no instructions dependent on Arg + // that need a positive ref count in between the autorelease and Ret. + CallInst *Autorelease = + FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret, + DependingInstructions, Visited, + PA); + DependingInstructions.clear(); + Visited.clear(); - // Check that the pointer is the return value of the call. - if (!Call || Arg != Call) - goto next_block; + if (!Autorelease) + continue; - // Check that the call is a regular call. - InstructionClass Class = GetBasicInstructionClass(Call); - if (Class != IC_CallOrUser && Class != IC_Call) - goto next_block; + CallInst *Retain = + FindPredecessorRetainWithSafePath(Arg, BB, Autorelease, + DependingInstructions, Visited, PA); + DependingInstructions.clear(); + Visited.clear(); - // If so, we can zap the retain and autorelease. - Changed = true; - ++NumRets; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain - << "\n Erasing: " - << *Autorelease << "\n"); - EraseInstruction(Retain); - EraseInstruction(Autorelease); - } - } - } + if (!Retain) + continue; - next_block: + // Check that there is nothing that can affect the reference count + // between the retain and the call. Note that Retain need not be in BB. + bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain, + DependingInstructions, + Visited, PA); DependingInstructions.clear(); Visited.clear(); + + if (!HasSafePathToCall) + continue; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " + << *Autorelease << "\n"); + EraseInstruction(Retain); + EraseInstruction(Autorelease); } +} - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); +#ifndef NDEBUG +void +ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { + llvm::Statistic &NumRetains = + AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt; + llvm::Statistic &NumReleases = + AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + switch (GetBasicInstructionClass(Inst)) { + default: + break; + case IC_Retain: + ++NumRetains; + break; + case IC_Release: + ++NumReleases; + break; + } + } } +#endif bool ObjCARCOpt::doInitialization(Module &M) { if (!EnableARCOpts) @@ -2617,13 +3012,20 @@ bool ObjCARCOpt::doInitialization(Module &M) { M.getContext().getMDKindID("clang.arc.copy_on_escape"); NoObjCARCExceptionsMDKind = M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); +#ifdef ARC_ANNOTATIONS + ARCAnnotationBottomUpMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.bottomup"); + ARCAnnotationTopDownMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.topdown"); + ARCAnnotationProvenanceSourceMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.provenancesource"); +#endif // ARC_ANNOTATIONS // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release // calls finalizers which can have arbitrary side effects. // These are initialized lazily. - RetainRVCallee = 0; AutoreleaseRVCallee = 0; ReleaseCallee = 0; RetainCallee = 0; @@ -2643,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { Changed = false; - DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); + DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>" + "\n"); PA.setAA(&getAnalysis<AliasAnalysis>()); @@ -2651,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) { // when compiling code that isn't ObjC, skip these if the relevant ObjC // library functions aren't declared. - // Preliminary optimizations. This also computs UsedInThisFunction. + // Preliminary optimizations. This also computes UsedInThisFunction. OptimizeIndividualCalls(F); // Optimizations for weak pointers. @@ -2678,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) { (1 << IC_AutoreleaseRV))) OptimizeReturns(F); + // Gather statistics after optimization. +#ifndef NDEBUG + if (AreStatisticsEnabled()) { + GatherStatistics(F, true); + } +#endif + DEBUG(dbgs() << "\n"); return Changed; diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index a841c64a9f..03e12d4fd7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, return OS << "IC_Call"; case IC_User: return OS << "IC_User"; + case IC_IntrinsicUser: + return OS << "IC_IntrinsicUser"; case IC_None: return OS << "IC_None"; } @@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - // No arguments. + // No (mandatory) arguments. if (AI == AE) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Case("clang.arc.use", IC_IntrinsicUser) .Default(IC_CallOrUser); // One argument. @@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { return StringSwitch<InstructionClass>(F->getName()) .Case("objc_moveWeak", IC_MoveWeak) .Case("objc_copyWeak", IC_CopyWeak) + // Ignore annotation calls. This is important to stop the + // optimizer from treating annotations as uses which would + // make the state of the pointers they are attempting to + // elucidate to be incorrect. + .Case("llvm.arc.annotation.topdown.bbstart", IC_None) + .Case("llvm.arc.annotation.topdown.bbend", IC_None) + .Case("llvm.arc.annotation.bottomup.bbstart", IC_None) + .Case("llvm.arc.annotation.bottomup.bbend", IC_None) .Default(IC_CallOrUser); } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d71dd5dec6..f0d29c88a8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/DominatorInternals.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -88,7 +89,7 @@ namespace { /// Keeps track of non-local addresses that have been sunk into a block. /// This allows us to avoid inserting duplicate code for blocks with /// multiple load/stores of the same address. - DenseMap<Value*, Value*> SunkAddrs; + ValueMap<Value*, Value*> SunkAddrs; /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to /// be updated. @@ -154,7 +155,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); for (Function::iterator I = F.begin(); I != F.end(); I++) @@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // start of the block. CurInstIterator = BB->begin(); SunkAddrs.clear(); - } else { - // This address is now available for reassignment, so erase the table - // entry; we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; } } ++NumMemoryInsts; @@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { if (!DefIsLiveOut) return false; - // Make sure non of the uses are PHI nodes. + // Make sure none of the uses are PHI nodes. for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447f1c..f350b9bbde 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { + class GVN; + struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. + MemIntrin // A memory intrinsic which is loaded from. + }; + + /// V - The value that is live out of the block. + PointerIntPair<Value *, 2, ValType> Val; + + /// Offset - The byte offset in Val that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(isMemIntrinValue() && "Wrong accessor"); + return cast<MemIntrinsic>(Val.getPointer()); + } + + /// MaterializeAdjustedValue - Emit code into this block to adjust the value + /// defined here to the specified type. This handles various coercion cases. + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; + }; class GVN : public FunctionPass { bool NoLoads; @@ -519,6 +588,11 @@ namespace { BumpPtrAllocator TableAllocator; SmallVector<Instruction*, 8> InstrsToErase; + + typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; + typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; + typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; + public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) @@ -599,11 +673,17 @@ namespace { } - // Helper fuctions - // FIXME: eliminate or document these better + // Helper fuctions of redundant load elimination bool processLoad(LoadInst *L); - bool processInstruction(Instruction *I); bool processNonLocalLoad(LoadInst *L); + void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + + // Other helper routines + bool processInstruction(Instruction *I); bool processBlock(BasicBlock *BB); void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); @@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } -namespace { - -struct AvailableValueInBlock { - /// BB - The basic block in question. - BasicBlock *BB; - enum ValType { - SimpleVal, // A simple offsetted value that is accessed. - LoadVal, // A value produced by a load. - MemIntrin // A memory intrinsic which is loaded from. - }; - - /// V - The value that is live out of the block. - PointerIntPair<Value *, 2, ValType> Val; - - /// Offset - The byte offset in Val that is interesting for the load query. - unsigned Offset; - - static AvailableValueInBlock get(BasicBlock *BB, Value *V, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(V); - Res.Val.setInt(SimpleVal); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(MI); - Res.Val.setInt(MemIntrin); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(LI); - Res.Val.setInt(LoadVal); - Res.Offset = Offset; - return Res; - } - - bool isSimpleValue() const { return Val.getInt() == SimpleVal; } - bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } - bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } - - Value *getSimpleValue() const { - assert(isSimpleValue() && "Wrong accessor"); - return Val.getPointer(); - } - - LoadInst *getCoercedLoadValue() const { - assert(isCoercedLoadValue() && "Wrong accessor"); - return cast<LoadInst>(Val.getPointer()); - } - - MemIntrinsic *getMemIntrinValue() const { - assert(isMemIntrinValue() && "Wrong accessor"); - return cast<MemIntrinsic>(Val.getPointer()); - } - - /// MaterializeAdjustedValue - Emit code into this block to adjust the value - /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { - Value *Res; - if (isSimpleValue()) { - Res = getSimpleValue(); - if (Res->getType() != LoadTy) { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), - *TD); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " - << *getSimpleValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else if (isCoercedLoadValue()) { - LoadInst *Load = getCoercedLoadValue(); - if (Load->getType() == LoadTy && Offset == 0) { - Res = Load; - } else { - Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), - gvn); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " - << *getCoercedLoadValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *TD); - DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset - << " " << *getMemIntrinValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - return Res; - } -}; - -} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value @@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } +Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { + Value *Res; + if (isSimpleValue()) { + Res = getSimpleValue(); + if (Res->getType() != LoadTy) { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), + *TD); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *getSimpleValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *getMemIntrinValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + return Res; +} + static bool isLifetimeStart(const Instruction *Inst) { if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } -/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are -/// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI) { - // Find the non-local dependencies of the load. - SmallVector<NonLocalDepResult, 64> Deps; - AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); - MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); - //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " - // << Deps.size() << *LI << '\n'); - - // If we had to process more than one hundred blocks to find the - // dependencies, this load isn't worth worrying about. Optimizing - // it will be too expensive. - unsigned NumDeps = Deps.size(); - if (NumDeps > 100) - return false; - - // If we had a phi translation failure, we'll have a single entry which is a - // clobber in the current block. Reject this early. - if (NumDeps == 1 && - !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { - DEBUG( - dbgs() << "GVN: non-local load "; - WriteAsOperand(dbgs(), LI); - dbgs() << " has unknown dependencies\n"; - ); - return false; - } +void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector<AvailableValueInBlock, 64> ValuesPerBlock; - SmallVector<BasicBlock*, 64> UnavailableBlocks; - + unsigned NumDeps = Deps.size(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { } UnavailableBlocks.push_back(DepBB); - continue; } +} - // If we have no predecessors that produce a known value for this load, exit - // early. - if (ValuesPerBlock.empty()) return false; - - // If all of the instructions we depend on produce a known value for this - // load, then it is fully redundant and we can use PHI insertion to compute - // its value. Insert PHIs and remove the fully redundant value now. - if (UnavailableBlocks.empty()) { - DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - - // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); - LI->replaceAllUsesWith(V); - - if (isa<PHINode>(V)) - V->takeName(LI); - if (V->getType()->getScalarType()->isPointerTy()) - MD->invalidateCachedPointerInfo(V); - markInstructionForDeletion(LI); - ++NumGVNLoad; - return true; - } - - if (!EnablePRE || !EnableLoadPRE) - return false; - +bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Okay, we have *some* definitions of the value. This means that the value // is available in some of our (transitive) predecessors. Lets think about // doing PRE of this load. This will involve inserting a new load into the @@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; - if (allSingleSucc) { - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); - } else { - Address.PHITranslateValue(LoadBB, UnavailablePred, DT); - LoadPtr = Address.getAddr(); - } + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. @@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { break; } - // Make sure it is valid to move this load here. We have to watch out for: - // @1 = getelementptr (i8* p, ... - // test p and branch if == 0 - // load @1 - // It is valid to have the getelementptr before the test, even if p can - // be 0, as getelementptr only does address arithmetic. - // If we are not pushing the value through any multiple-successor blocks - // we do not have this case. Otherwise, check that the load is safe to - // put anywhere; this can be improved, but should be conservatively safe. - if (!allSingleSucc && - // FIXME: REEVALUTE THIS. - !isSafeToLoadUnconditionally(LoadPtr, - UnavailablePred->getTerminator(), - LI->getAlignment(), TD)) { - CanDoPRE = false; - break; - } - I->second = LoadPtr; } @@ -1714,7 +1649,73 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// non-local by performing PHI construction. +bool GVN::processNonLocalLoad(LoadInst *LI) { + // Step 1: Find the non-local dependencies of the load. + LoadDepVect Deps; + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); + + // If we had to process more than one hundred blocks to find the + // dependencies, this load isn't worth worrying about. Optimizing + // it will be too expensive. + unsigned NumDeps = Deps.size(); + if (NumDeps > 100) + return false; + + // If we had a phi translation failure, we'll have a single entry which is a + // clobber in the current block. Reject this early. + if (NumDeps == 1 && + !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { + DEBUG( + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " has unknown dependencies\n"; + ); + return false; + } + + // Step 2: Analyze the availability of the load + AvailValInBlkVect ValuesPerBlock; + UnavailBlkVect UnavailableBlocks; + AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); + + // If we have no predecessors that produce a known value for this load, exit + // early. + if (ValuesPerBlock.empty()) + return false; + + // Step 3: Eliminate fully redundancy. + // + // If all of the instructions we depend on produce a known value for this + // load, then it is fully redundant and we can use PHI insertion to compute + // its value. Insert PHIs and remove the fully redundant value now. + if (UnavailableBlocks.empty()) { + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + + // Perform PHI construction. + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); + LI->replaceAllUsesWith(V); + + if (isa<PHINode>(V)) + V->takeName(LI); + if (V->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(LI); + ++NumGVNLoad; + return true; + } + + // Step 4: Eliminate partial redundancy. + if (!EnablePRE || !EnableLoadPRE) + return false; + + return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); +} + + +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1757,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1920,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2261,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d646..4796eb2953 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -64,10 +65,16 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +85,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -87,6 +111,7 @@ namespace { virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); + virtual bool doFinalization(Module &M); const char *getPassName() const { return "Merge internal globals"; @@ -169,6 +194,42 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +237,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +262,10 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +287,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } @@ -234,6 +300,11 @@ bool GlobalMerge::runOnFunction(Function &F) { return false; } +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + Pass *llvm::createGlobalMergePass(const TargetLowering *tli) { return new GlobalMerge(tli); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 97fff7e782..8e76c78f5a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; + // Computing the value outside of the loop brings no benefit if : + // - it is definitely used inside the loop in a way which can not be + // optimized away. + // - no use outside of the loop can take advantage of hoisting the + // computation out of the loop + if (ExitValue->getSCEVType()>=scMulExpr) { + unsigned NumHardInternalUses = 0; + unsigned NumSoftExternalUses = 0; + unsigned NumUses = 0; + for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); + IB!=IE && NumUses<=6 ; ++IB) { + Instruction *UseInstr = cast<Instruction>(*IB); + unsigned Opc = UseInstr->getOpcode(); + NumUses++; + if (L->contains(UseInstr)) { + if (Opc == Instruction::Call || Opc == Instruction::Ret) + NumHardInternalUses++; + } else { + if (Opc == Instruction::PHI) { + // Do not count the Phi as a use. LCSSA may have inserted + // plenty of trivial ones. + NumUses--; + for (Value::use_iterator PB=UseInstr->use_begin(), + PE=UseInstr->use_end(); + PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { + unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode(); + if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) + NumSoftExternalUses++; + } + continue; + } + if (Opc != Instruction::Call && Opc != Instruction::Ret) + NumSoftExternalUses++; + } + } + if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses) + continue; + } + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9c67e327e2..0b62050b17 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -34,13 +34,9 @@ namespace { } // Possibly eliminate loop L if it is dead. - bool runOnLoop(Loop* L, LPPassManager& LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM); - bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, - bool &Changed, BasicBlock *Preheader); - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); AU.addRequired<ScalarEvolution>(); @@ -53,6 +49,12 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); } + + private: + bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, + bool &Changed, BasicBlock *Preheader); + }; } @@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) -Pass* llvm::createLoopDeletionPass() { +Pass *llvm::createLoopDeletionPass() { return new LoopDeletion(); } -/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// isLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. -bool LoopDeletion::IsLoopDead(Loop* L, - SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, +bool LoopDeletion::isLoopDead(Loop *L, + SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop @@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L, // sufficient to guarantee that no loop-variant values are used outside // of the loop. BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]); // Make sure all exiting blocks produce the same incoming value for the exit // block. If there are different incoming values for different exiting // blocks, then it is impossible to statically determine which value should // be used. - for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) { if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - if (Instruction* I = dyn_cast<Instruction>(incoming)) + if (Instruction *I = dyn_cast<Instruction>(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; @@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L, /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. -bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { +bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. - BasicBlock* preheader = L->getLoopPreheader(); + BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; @@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); + ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { SE.forgetLoop(L); // Connect the preheader directly to the exit block. - TerminatorInst* TI = preheader->getTerminator(); + TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. - BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { + while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); @@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. - DominatorTree& DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { @@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. - LoopInfo& loopInfo = getAnalysis<LoopInfo>(); + LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e98ae953e5..14c5655f08 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -56,8 +56,8 @@ namespace { } bool runOnLoop(Loop *L, LPPassManager &LPM); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); + bool simplifyLoopLatch(Loop *L); + bool rotateLoop(Loop *L, bool SimplifiedLatch); private: LoopInfo *LI; @@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the // loop exit. - simplifyLoopLatch(L); + bool SimplifiedLatch = simplifyLoopLatch(L); // One loop can be rotated multiple times. bool MadeChange = false; - while (rotateLoop(L)) + while (rotateLoop(L, SimplifiedLatch)) { MadeChange = true; - + SimplifiedLatch = false; + } return MadeChange; } @@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, /// canonical form so downstream passes can handle it. /// /// I don't believe this invalidates SCEV. -void LoopRotate::simplifyLoopLatch(Loop *L) { +bool LoopRotate::simplifyLoopLatch(Loop *L) { BasicBlock *Latch = L->getLoopLatch(); if (!Latch || Latch->hasAddressTaken()) - return; + return false; BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); if (!Jmp || !Jmp->isUnconditional()) - return; + return false; BasicBlock *LastExit = Latch->getSinglePredecessor(); if (!LastExit || !L->isLoopExiting(LastExit)) - return; + return false; BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); if (!BI) - return; + return false; if (!shouldSpeculateInstrs(Latch->begin(), Jmp)) - return; + return false; DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); @@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) { if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->eraseNode(Latch); Latch->eraseFromParent(); + return true; } /// Rotate loop LP. Return true if the loop is rotated. -bool LoopRotate::rotateLoop(Loop *L) { +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; @@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) + if (OrigLatch == 0) + return false; + + // Rotate if either the loop latch does *not* exit the loop, or if the loop + // latch was just simplified. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) return false; // Check size of original header and reject loop if it is very big or we can't @@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) { ++NumRotated; return true; } - diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4e4cb86464..73e44d7edf 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, } if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); - if (isLoser()) + if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } @@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (!NewRHS) return Cond; } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) @@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // by LSR. const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); + // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); Value *IVSrc = 0; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 0da3746950..a3c241dc0f 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -110,6 +110,51 @@ namespace { } }; }; + + /// Utility class representing a non-constant Xor-operand. We classify + /// non-constant Xor-Operands into two categories: + /// C1) The operand is in the form "X & C", where C is a constant and C != ~0 + /// C2) + /// C2.1) The operand is in the form of "X | C", where C is a non-zero + /// constant. + /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this + /// operand as "E | 0" + class XorOpnd { + public: + XorOpnd(Value *V); + const XorOpnd &operator=(const XorOpnd &That); + + bool isInvalid() const { return SymbolicPart == 0; } + bool isOrExpr() const { return isOr; } + Value *getValue() const { return OrigVal; } + Value *getSymbolicPart() const { return SymbolicPart; } + unsigned getSymbolicRank() const { return SymbolicRank; } + const APInt &getConstPart() const { return ConstPart; } + + void Invalidate() { SymbolicPart = OrigVal = 0; } + void setSymbolicRank(unsigned R) { SymbolicRank = R; } + + // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. + // The purpose is twofold: + // 1) Cluster together the operands sharing the same symbolic-value. + // 2) Operand having smaller symbolic-value-rank is permuted earlier, which + // could potentially shorten crital path, and expose more loop-invariants. + // Note that values' rank are basically defined in RPO order (FIXME). + // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier + // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", + // "z" in the order of X-Y-Z is better than any other orders. + struct PtrSortFunctor { + bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { + return LHS->getSymbolicRank() < RHS->getSymbolicRank(); + } + }; + private: + Value *OrigVal; + Value *SymbolicPart; + APInt ConstPart; + unsigned SymbolicRank; + bool isOr; + }; } namespace { @@ -137,6 +182,11 @@ namespace { Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, + Value *&Res); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res); bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, SmallVectorImpl<Factor> &Factors); Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, @@ -148,6 +198,42 @@ namespace { }; } +XorOpnd::XorOpnd(Value *V) { + assert(!isa<ConstantInt>(V) && "No ConstantInt"); + OrigVal = V; + Instruction *I = dyn_cast<Instruction>(V); + SymbolicRank = 0; + + if (I && (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And)) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (isa<ConstantInt>(V0)) + std::swap(V0, V1); + + if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) { + ConstPart = C->getValue(); + SymbolicPart = V0; + isOr = (I->getOpcode() == Instruction::Or); + return; + } + } + + // view the operand as "V | 0" + SymbolicPart = V; + ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + isOr = true; +} + +const XorOpnd &XorOpnd::operator=(const XorOpnd &That) { + OrigVal = That.OrigVal; + SymbolicPart = That.SymbolicPart; + ConstPart = That.ConstPart; + SymbolicRank = That.SymbolicRank; + isOr = That.isOr; + return *this; +} + char Reassociate::ID = 0; INITIALIZE_PASS(Reassociate, "reassociate", "Reassociate expressions", false, false) @@ -1040,6 +1126,250 @@ static Value *OptimizeAndOrXor(unsigned Opcode, return 0; } +/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and +/// instruction with the given two operands, and return the resulting +/// instruction. There are two special cases: 1) if the constant operand is 0, +/// it will return NULL. 2) if the constant is ~0, the symbolic operand will +/// be returned. +static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, + const APInt &ConstOpnd) { + if (ConstOpnd != 0) { + if (!ConstOpnd.isAllOnesValue()) { + LLVMContext &Ctx = Opnd->getType()->getContext(); + Instruction *I; + I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), + "and.ra", InsertBefore); + I->setDebugLoc(InsertBefore->getDebugLoc()); + return I; + } + return Opnd; + } + return 0; +} + +// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" +// into "R ^ C", where C would be 0, and R is a symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively; otherwise, false is returned, +// and both "Res" and "ConstOpnd" remain unchanged. +// +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, + APInt &ConstOpnd, Value *&Res) { + // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 + // = ((x | c1) ^ c1) ^ (c1 ^ c2) + // = (x & ~c1) ^ (c1 ^ c2) + // It is useful only when c1 == c2. + if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { + if (!Opnd1->getValue()->hasOneUse()) + return false; + + const APInt &C1 = Opnd1->getConstPart(); + if (C1 != ConstOpnd) + return false; + + Value *X = Opnd1->getSymbolicPart(); + Res = createAndInstr(I, X, ~C1); + // ConstOpnd was C2, now C1 ^ C2. + ConstOpnd ^= C1; + + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + return true; + } + return false; +} + + +// Helper function of OptimizeXor(). It tries to simplify +// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively (If the entire expression is +// evaluated to a constant, the Res is set to NULL); otherwise, false is +// returned, and both "Res" and "ConstOpnd" remain unchanged. +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res) { + Value *X = Opnd1->getSymbolicPart(); + if (X != Opnd2->getSymbolicPart()) + return false; + + // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) + int DeadInstNum = 1; + if (Opnd1->getValue()->hasOneUse()) + DeadInstNum++; + if (Opnd2->getValue()->hasOneUse()) + DeadInstNum++; + + // Xor-Rule 2: + // (x | c1) ^ (x & c2) + // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1 + // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1 + // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3 + // + if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) { + if (Opnd2->isOrExpr()) + std::swap(Opnd1, Opnd2); + + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); + APInt C3((~C1) ^ C2); + + // Do not increase code size! + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C1; + + } else if (Opnd1->isOrExpr()) { + // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 + // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); + APInt C3 = C1 ^ C2; + + // Do not increase code size + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C3; + } else { + // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) + // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); + APInt C3 = C1 ^ C2; + Res = createAndInstr(I, X, C3); + } + + // Put the original operands in the Redo list; hope they will be deleted + // as dead code. + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue())) + RedoInsts.insert(T); + + return true; +} + +/// Optimize a series of operands to an 'xor' instruction. If it can be reduced +/// to a single Value, it is returned, otherwise the Ops list is mutated as +/// necessary. +Value *Reassociate::OptimizeXor(Instruction *I, + SmallVectorImpl<ValueEntry> &Ops) { + if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) + return V; + + if (Ops.size() == 1) + return 0; + + SmallVector<XorOpnd, 8> Opnds; + SmallVector<XorOpnd*, 8> OpndPtrs; + Type *Ty = Ops[0].Op->getType(); + APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); + + // Step 1: Convert ValueEntry to XorOpnd + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + Value *V = Ops[i].Op; + if (!isa<ConstantInt>(V)) { + XorOpnd O(V); + O.setSymbolicRank(getRank(O.getSymbolicPart())); + Opnds.push_back(O); + } else + ConstOpnd ^= cast<ConstantInt>(V)->getValue(); + } + + // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". + // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate + // the "OpndPtrs" as well. For the similar reason, do not fuse this loop + // with the previous loop --- the iterator of the "Opnds" may be invalidated + // when new elements are added to the vector. + for (unsigned i = 0, e = Opnds.size(); i != e; ++i) + OpndPtrs.push_back(&Opnds[i]); + + // Step 2: Sort the Xor-Operands in a way such that the operands containing + // the same symbolic value cluster together. For instance, the input operand + // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: + // ("x | 123", "x & 789", "y & 456"). + std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); + + // Step 3: Combine adjacent operands + XorOpnd *PrevOpnd = 0; + bool Changed = false; + for (unsigned i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd *CurrOpnd = OpndPtrs[i]; + // The combined value + Value *CV; + + // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" + if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { + Changed = true; + if (CV) + *CurrOpnd = XorOpnd(CV); + else { + CurrOpnd->Invalidate(); + continue; + } + } + + if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) { + PrevOpnd = CurrOpnd; + continue; + } + + // step 3.2: When previous and current operands share the same symbolic + // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" + // + if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { + // Remove previous operand + PrevOpnd->Invalidate(); + if (CV) { + *CurrOpnd = XorOpnd(CV); + PrevOpnd = CurrOpnd; + } else { + CurrOpnd->Invalidate(); + PrevOpnd = 0; + } + Changed = true; + } + } + + // Step 4: Reassemble the Ops + if (Changed) { + Ops.clear(); + for (unsigned int i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd &O = Opnds[i]; + if (O.isInvalid()) + continue; + ValueEntry VE(getRank(O.getValue()), O.getValue()); + Ops.push_back(VE); + } + if (ConstOpnd != 0) { + Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + ValueEntry VE(getRank(C), C); + Ops.push_back(VE); + } + int Sz = Ops.size(); + if (Sz == 1) + return Ops.back().Op; + else if (Sz == 0) { + assert(ConstOpnd == 0); + return ConstantInt::get(Ty->getContext(), ConstOpnd); + } + } + + return 0; +} + /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This /// optimizes based on identities. If it can be reduced to a single Value, it /// is returned, otherwise the Ops list is mutated as necessary. @@ -1431,11 +1761,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, default: break; case Instruction::And: case Instruction::Or: - case Instruction::Xor: if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) return Result; break; + case Instruction::Xor: + if (Value *Result = OptimizeXor(I, Ops)) + return Result; + break; + case Instruction::Add: if (Value *Result = OptimizeAdd(I, Ops)) return Result; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e90fe907d5..d073e789dc 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -57,11 +57,15 @@ using namespace llvm; STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); -STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); -STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); +STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); +STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); +STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); +STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); +STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); +STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); -STATISTIC(NumDeleted, "Number of instructions deleted"); -STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumDeleted, "Number of instructions deleted"); +STATISTIC(NumVectorized, "Number of vectorized aggregates"); /// Hidden option to force the pass to not use DomTree and mem2reg, instead /// forming SSA values through the SSAUpdater infrastructure. @@ -69,112 +73,167 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { +/// \brief A custom IRBuilder inserter which prefixes all names if they are +/// preserved. +template <bool preserveNames = true> +class IRBuilderPrefixedInserter : + public IRBuilderDefaultInserter<preserveNames> { + std::string Prefix; + public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; + void SetNamePrefix(const Twine &P) { Prefix = P.str(); } - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; +protected: + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<preserveNames>::InsertHelper( + I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); + } +}; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} +// Specialization for not preserving the name is trivial. +template <> +class IRBuilderPrefixedInserter<false> : + public IRBuilderDefaultInserter<false> { +public: + void SetNamePrefix(const Twine &P) {} +}; - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } +/// \brief Provide a typedef for IRBuilder that drops names in release builds. +#ifndef NDEBUG +typedef llvm::IRBuilder<true, ConstantFolder, + IRBuilderPrefixedInserter<true> > IRBuilderTy; +#else +typedef llvm::IRBuilder<false, ConstantFolder, + IRBuilderPrefixedInserter<false> > IRBuilderTy; +#endif +} - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } +namespace { +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief A partition of an alloca. + /// \brief Support for ordering ranges. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } + + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } + + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. + /// + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } - /// \brief A particular use of a partition of the alloca. + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; + +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} + + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} + +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} + +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +515,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +533,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +555,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +566,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +858,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +873,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +889,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -868,7 +929,7 @@ private: uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(); - MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; + const MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && Offsets.DestBegin == Offsets.SourceBegin) return markAsDead(II); // Skip identity transfers without side-effects. @@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) splitAndMergePartitions(); } + // Record how many partitions we end up with. + NumAllocaPartitions += Partitions.size(); + MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); + // Now build up the user lists for each of these disjoint partitions by // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); @@ -1084,26 +1149,34 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) PtrI = UB.visitPtr(AI); assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); + + unsigned NumUses = 0; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) + NumUses += Uses[Idx].size(); +#endif + NumAllocaPartitionUses += NumUses; + MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); } Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); - } else { + else return 0; // Bail if we have weird uses. - } if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { // If the type is larger than the partition, skip it. We only encounter @@ -1140,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1244,12 +1318,12 @@ public: // may be zapped by an optimization pass in future. if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(ZExt->getOperand(0)); - if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(SExt->getOperand(0)); if (!Arg) - Arg = SI->getOperand(0); + Arg = SI->getValueOperand(); } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - Arg = LI->getOperand(0); + Arg = LI->getPointerOperand(); } else { continue; } @@ -1375,11 +1449,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1473,7 +1547,7 @@ private: assert(!Loads.empty()); Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); - IRBuilder<> PHIBuilder(&PN); + IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -1496,7 +1570,7 @@ private: TerminatorInst *TI = Pred->getTerminator(); Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); Value *InVal = PN.getIncomingValue(Idx); - IRBuilder<> PredBuilder(TI); + IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + @@ -1523,8 +1597,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1571,16 +1645,16 @@ private: void visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // If the select isn't safe to speculate, just use simple logic to emit it. SmallVector<LoadInst *, 4> Loads; if (!isSafeSelectToSpeculate(SI, Loads)) return; + IRBuilderTy IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1591,7 +1665,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1623,8 +1697,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1641,9 +1715,8 @@ private: /// /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { +static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, + SmallVectorImpl<Value *> &Indices) { if (Indices.empty()) return BasePtr; @@ -1652,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); + return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1664,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, /// TargetTy. If we can't find one with the same type, we at least try to use /// one with the same size. If none of that works, we just produce the GEP as /// indicated by Indices to have the correct offset. -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, Value *BasePtr, Type *Ty, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Ty == TargetTy) - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); // See if we can descend into a struct and locate a field with the correct // type. @@ -1696,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, if (ElementTy != TargetTy) Indices.erase(Indices.end() - NumLayers, Indices.end()); - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); } /// \brief Recursively compute indices for a natural GEP. /// /// This is the recursive step for getNaturalGEPWithOffset that walks down the /// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Offset == 0) - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); // We can't recurse through pointer types. if (Ty->isPointerTy()) @@ -1729,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, Prefix); + Offset, TargetTy, Indices); } if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { @@ -1742,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } StructType *STy = dyn_cast<StructType>(Ty); @@ -1761,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Get a natural GEP from a base pointer to a particular offset and @@ -1774,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, /// Indices, and setting Ty to the result subtype. /// /// If no natural GEP can be constructed, this function returns null. -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, APInt Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { PointerType *Ty = cast<PointerType>(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1796,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the @@ -1814,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. -static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, - Value *Ptr, APInt Offset, Type *PointerTy, - const Twine &Prefix) { +static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, + Value *Ptr, APInt Offset, Type *PointerTy) { // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; @@ -1850,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, // See if we can perform a natural GEP here. Indices.clear(); if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, - Indices, Prefix)) { + Indices)) { if (P->getType() == PointerTy) { // Zap any offset pointer that we ended up computing in previous rounds. if (OffsetPtr && OffsetPtr->use_empty()) @@ -1885,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, if (!OffsetPtr) { if (!Int8Ptr) { Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), - Prefix + ".raw_cast"); + "raw_cast"); Int8PtrOffset = Offset; } OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), - Prefix + ".raw_idx"); + "raw_idx"); } Ptr = OffsetPtr; // On the off chance we were targeting i8*, guard the bitcast here. if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); + Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); return Ptr; } @@ -1911,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1933,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { /// This will try various different casting techniques, such as bitcasts, /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test /// two types for viability with this routine. -static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *Ty) { assert(canConvertValue(DL, V->getType(), Ty) && "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1977,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1997,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2063,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2074,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2089,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2105,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2125,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, return WholeAllocaOp; } -static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { DEBUG(dbgs() << " start: " << *V << "\n"); @@ -2148,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, return V; } -static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, +static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name) { IntegerType *IntTy = cast<IntegerType>(Old->getType()); IntegerType *Ty = cast<IntegerType>(V->getType()); @@ -2179,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } -static Value *extractVector(IRBuilder<> &IRB, Value *V, +static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(V->getType()); @@ -2207,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V, return V; } -static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, +static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(Old->getType()); assert(VecTy && "Can only insert a vector into a vector"); @@ -2243,17 +2322,15 @@ static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".expand"); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); Mask.clear(); for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), - Name + "insert"); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); + + V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); + + DEBUG(dbgs() << " blend: " << *V << "\n"); return V; } @@ -2297,11 +2374,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; - // The name prefix to use when rewriting instructions for this alloca. - std::string NamePrefix; + // Utility IR builder, whose name prefix is setup for each visited use, and + // the insertion point is set to point to the user. + IRBuilderTy IRB; public: AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, @@ -2314,7 +2393,8 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), + IRB(NewAI.getContext(), ConstantFolder()) { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2336,14 +2416,21 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); - NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); + + Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); + IRB.SetInsertPoint(OldUserI); + IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); + + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2365,14 +2452,10 @@ private: llvm_unreachable("No rewrite rule for this instruction!"); } - Twine getName(const Twine &Suffix) { - return NamePrefix + Suffix; - } - - Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { + Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); - return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); + return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); } /// \brief Compute suitable alignment to access an offset into the new alloca. @@ -2422,27 +2505,27 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { + Value *rewriteVectorizedLoadInst() { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); + "load"); + return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); } - Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; if (Offset > 0 || EndOffset < NewAllocaEndOffset) V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); + "extract"); return V; } @@ -2450,58 +2533,39 @@ private: DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); - IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB); + V = rewriteVectorizedLoadInst(); } else if (IntTy && LI.getType()->isIntegerTy()) { - V = rewriteIntegerLoad(IRB, LI); + V = rewriteIntegerLoad(LI); } else if (BeginOffset == NewAllocaBeginOffset && canConvertValue(TD, NewAllocaTy, LI.getType())) { V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); } else { Type *LTy = TargetTy->getPointerTo(); V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), getPartitionTypeAlign(TargetTy), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); IsPtrAdjusted = true; } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2511,7 +2575,7 @@ private: Value *Placeholder = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, - getName(".insert")); + "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; @@ -2525,7 +2589,7 @@ private: return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); @@ -2540,8 +2604,8 @@ private: // Mix in the existing elements. Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + "load"); + V = insertVector(IRB, Old, V, BeginIndex, "vec"); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2551,17 +2615,17 @@ private: return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { + bool rewriteIntegerStore(Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, - getName(".insert")); + "insert"); } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); @@ -2575,7 +2639,6 @@ private: DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); assert(OldOp == OldPtr); - IRBuilder<> IRB(&SI); Value *V = SI.getValueOperand(); @@ -2588,26 +2651,25 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, - getName(".extract")); + "extract"); } if (VecTy) - return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + return rewriteVectorizedStoreInst(V, SI, OldOp); if (IntTy && V->getType()->isIntegerTy()) - return rewriteIntegerStore(IRB, V, SI); + return rewriteIntegerStore(V, SI); StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset && canConvertValue(TD, V->getType(), NewAllocaTy)) { V = convertValue(TD, IRB, V, NewAllocaTy); NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), @@ -2635,7 +2697,7 @@ private: /// /// \param V The i8 value to splat. /// \param Size The number of bytes in the output (assuming i8 is one byte) - Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + Value *getIntegerSplat(Value *V, unsigned Size) { assert(Size > 0 && "Expected a positive number of bytes."); IntegerType *VTy = cast<IntegerType>(V->getType()); assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); @@ -2643,26 +2705,25 @@ private: return V; Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), ConstantExpr::getUDiv( Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), SplatIntTy)), - getName(".isplat")); + "isplat"); return V; } /// \brief Compute a vector splat for a given element value. - Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { - V = IRB.CreateVectorSplat(NumElements, V, NamePrefix); + Value *getVectorSplat(Value *V, unsigned NumElements) { + V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); DEBUG(dbgs() << " splat: " << *V << "\n"); return V; } bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawDest() == OldPtr); // If the memset has a variable size, it cannot be split, just adjust the @@ -2719,31 +2780,31 @@ private: unsigned NumElements = EndIndex - BeginIndex; assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Value *Splat = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ElementTy)/8); + Value *Splat = + getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); Splat = convertValue(TD, IRB, Splat, ElementTy); if (NumElements > 1) - Splat = getVectorSplat(IRB, Splat, NumElements); + Splat = getVectorSplat(Splat, NumElements); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + "oldload"); + V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); } else if (IntTy) { // If this is a memset on an alloca where we can widen stores, insert the // set integer. assert(!II.isVolatile()); uint64_t Size = EndOffset - BeginOffset; - V = getIntegerSplat(IRB, II.getValue(), Size); + V = getIntegerSplat(II.getValue(), Size); if (IntTy && (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaBeginOffset)) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + V = insertInteger(TD, IRB, Old, V, Offset, "insert"); } else { assert(V->getType() == IntTy && "Wrong type for an alloca wide integer!"); @@ -2754,10 +2815,9 @@ private: assert(BeginOffset == NewAllocaBeginOffset); assert(EndOffset == NewAllocaEndOffset); - V = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ScalarTy)/8); + V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) - V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + V = getVectorSplat(V, AllocaVecTy->getNumElements()); V = convertValue(TD, IRB, V, AllocaTy); } @@ -2774,7 +2834,6 @@ private: // them into two categories: split intrinsics and unsplit intrinsics. DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); bool IsDest = II.getRawDest() == OldPtr; @@ -2858,8 +2917,7 @@ private: // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() @@ -2902,8 +2960,7 @@ private: OtherPtrTy = SubIntTy->getPointerTo(); } - Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); @@ -2911,31 +2968,31 @@ private: Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); + "load"); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); Src = convertValue(TD, IRB, Src, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract")); + Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), - getName(".copyload")); + "copyload"); } if (VecTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + "oldload"); + Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert")); + Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); Src = convertValue(TD, IRB, Src, NewAllocaTy); } @@ -2950,7 +3007,6 @@ private: assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. @@ -2978,7 +3034,9 @@ private: // as local as possible to the PHI. To do that, we re-use the location of // the old pointer, which necessarily must be in the right position to // dominate the PHI. - IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr)); + IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); + PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); // Replace the operands which were using the old pointer. @@ -2991,17 +3049,16 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); - - // Find the operand we need to rewrite here. - bool IsTrueVal = SI.getTrueValue() == OldPtr; - if (IsTrueVal) - assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!"); - else - assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!"); + assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && + "Pointer isn't an operand!"); Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); - SI.setOperand(IsTrueVal ? 1 : 2, NewPtr); + // Replace the operands which were using the old pointer. + if (SI.getOperand(1) == OldPtr) + SI.setOperand(1, NewPtr); + if (SI.getOperand(2) == OldPtr) + SI.setOperand(2, NewPtr); + DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); return false; @@ -3066,7 +3123,7 @@ private: class OpSplitter { protected: /// The builder used to form new instructions. - IRBuilder<> IRB; + IRBuilderTy IRB; /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector<unsigned, 4> Indices; @@ -3278,12 +3335,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; - if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) + if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) return 0; - if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) + } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) return 0; + } Offset -= NumSkippedElements * ElementSize; // First check if we need to recurse. @@ -3381,7 +3439,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e590a374ea..bfde334c36 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { } // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the alloca instructions in the function, removing them -// if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the entry block, removing +// them if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); - if (!IdxVal) { - // Non constant GEPs are only a problem on arrays, structs, and pointers - // Vectors can be dynamically indexed. - // FIXME: Add support for dynamic indexing on arrays. This should be - // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] - // isn't. - if (!(*GEPIt)->isVectorTy()) - return MarkUnsafe(Info, GEPI); - NonConstant = true; - NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); - } + if (!IdxVal) + return MarkUnsafe(Info, GEPI); } // Compute the offset due to this GEP and check if the alloca has a diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 916b37d4a8..3514e6c2aa 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! @@ -35,7 +34,6 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); //===----------------------------------------------------------------------===// // Optimizer Base Class @@ -91,8 +89,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - - bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID) { @@ -104,14 +100,6 @@ namespace { void InitOptimizations(); bool runOnFunction(Function &F); - void setDoesNotAccessMemory(Function &F); - void setOnlyReadsMemory(Function &F); - void setDoesNotThrow(Function &F); - void setDoesNotCapture(Function &F, unsigned n); - void setDoesNotAlias(Function &F, unsigned n); - bool doInitialization(Module &M); - - void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfo>(); } @@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { return Changed; } -// Utility methods for doInitialization. - -void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - Modified = true; - } -} - - -void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "stpncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } -} - -/// doInitialization - Add attributes to well-known functions. -/// -bool SimplifyLibCalls::doInitialization(Module &M) { - Modified = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; - if (F.isDeclaration() && F.hasName()) - inferPrototypeAttributes(F); - } - return Modified; -} - // TODO: // Additional cases that we need to add to this file: // diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 00cda8e034..1f517d038d 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F, Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); // Compare operand values and branch - Value *ZeroV = MainBuilder.getInt32(0); + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); @@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F, // Get bitwidth of div/rem instruction IntegerType *T = cast<IntegerType>(J->getType()); - int bitwidth = T->getBitWidth(); + unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index a309bce544..be8d39e128 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -87,26 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(I) && "No mapping from source argument specified!"); #endif - // Clone any attributes. - if (NewFunc->arg_size() == OldFunc->arg_size()) - NewFunc->copyAttributesFrom(OldFunc); - else { - //Some arguments were deleted with the VMap. Copy arguments one by one - for (Function::const_arg_iterator I = OldFunc->arg_begin(), - E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(VMap[I])) - Anew->addAttr(OldFunc->getAttributes() - .getParamAttributes(I->getArgNo() + 1)); - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::ReturnIndex, - OldFunc->getAttributes())); - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::FunctionIndex, - OldFunc->getAttributes())); + AttributeSet OldAttrs = OldFunc->getAttributes(); + // Clone any argument attributes that are present in the VMap. + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); + I != E; ++I) + if (Argument *Anew = dyn_cast<Argument>(VMap[I])) { + AttributeSet attrs = + OldAttrs.getParamAttributes(I->getArgNo() + 1); + if (attrs.getNumSlots() > 0) + Anew->addAttr(attrs); + } - } + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldAttrs.getRetAttributes())); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldAttrs.getFnAttributes())); // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d2598a221..dabb67b921 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -82,7 +82,8 @@ namespace { /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. - void forwardResume(ResumeInst *RI); + void forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// block. When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI) { +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. If the code doesn't have calls or unwinds, we know there is - // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls) { - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); - return; - } - + // rewrite. InvokeInliningInfo Invoke(II); - + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { @@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, continue; } + // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI); + Invoke.forwardResume(RI, InlinedLPads); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the + // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } @@ -748,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If the call site was an invoke instruction, add a branch to the normal // destination. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) - BranchInst::Create(II->getNormalDest(), TheCall); + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. @@ -777,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... - BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. - AfterCallBB = OrigBB->splitBasicBlock(NewBr, + AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call @@ -840,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; - BranchInst::Create(AfterCallBB, RI); + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); RI->eraseFromParent(); } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. @@ -864,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a54ee08b67..12e5b3e9d2 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// Dbg Intrinsic utilities /// -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// See if there is a dbg.value intrinsic for DIVar before I. +static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + llvm::BasicBlock::InstListType::iterator PrevI(I); + if (PrevI != I->getParent()->getInstList().begin()) { + --PrevI; + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) + if (DVI->getValue() == I->getOperand(0) && + DVI->getOffset() == 0 && + DVI->getVariable() == DIVar) + return true; + } + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { @@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return false; + if (LdStHasDebugValue(DIVar, SI)) + return true; + Instruction *DbgVal = NULL; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, return true; } -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { @@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return false; + if (LdStHasDebugValue(DIVar, LI)) + return true; + Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); @@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) { E = Dbgs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + // We only remove the dbg.declare intrinsic if all uses are + // converted to dbg.value intrinsics. bool RemoveDDI = true; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) @@ -985,22 +1010,17 @@ bool llvm::removeUnreachableBlocks(Function &F) { if (Reachable.count(I)) continue; - // Remove the block as predecessor of all its reachable successors. - // Unreachable successors don't matter as they'll soon be removed, too. for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(I); + I->dropAllReferences(); + } - // Zap all instructions in this basic block. - while (!I->empty()) { - Instruction &Inst = I->back(); - if (!Inst.use_empty()) - Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); - I->getInstList().pop_back(); - } + for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; - --I; - llvm::next(I)->eraseFromParent(); - } return true; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a63d31d5af..052ad85551 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -59,6 +59,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); +static cl::opt<bool> +HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store preceeds")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return 0; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return 0; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 10; + for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), + RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { + Instruction *CurI = &*RI; + + // Could be calling an instruction that effects memory like free(). + if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) + return 0; + + StoreInst *SI = dyn_cast<StoreInst>(CurI); + // Found the previous store make sure it stores to the same location. + if (SI && SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + else if (SI) + return 0; // Unknown store. + } + + return 0; +} + /// \brief Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = 0; + StoreInst *SpeculatedStore = 0; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && + (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, + EndBB)))) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(I); + // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from + // used in BB. The transformation will prevent the operand from // being sunk into the use block. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { @@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (!HaveRewritablePHIs) + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<true, NoFolder> Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + + "." + FalseV->getName()); + SpeculatedStore->setOperand(0, S); + } + // Hoist the instructions. BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), llvm::prior(ThenBB->end())); @@ -2789,9 +2875,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. + bool InvokeRequiresTableEntry = false; + bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + + if (II->hasFnAttr(Attribute::UWTable)) { + // Don't remove an `invoke' instruction if the ABI requires an entry into + // the table. + InvokeRequiresTableEntry = true; + continue; + } + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2811,11 +2908,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); + Changed = true; } - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - return true; + if (!InvokeRequiresTableEntry) + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + + return Changed; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3059,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); - Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && SI->getNumCases() != 0) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); @@ -3944,11 +4049,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) - return LI->getPointerAddressSpace() == 0; + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; // Store to null is undefined. if (StoreInst *SI = dyn_cast<StoreInst>(Use)) - return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; } return false; } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b25febaa14..cadec21c50 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -15,11 +15,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -798,8 +800,7 @@ struct StrToOpt : public LibCallOptimization { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::get(Callee->getContext(), - Attribute::NoCapture)); + CI->addAttribute(1, Attribute::NoCapture); } return 0; @@ -1517,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization { if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return 0; + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1526,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; - Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + return EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1543,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, - TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } @@ -1673,67 +1677,17 @@ class LibCallSimplifierImpl { const TargetLibraryInfo *TLI; const LibCallSimplifier *LCS; bool UnsafeFPShrink; - StringMap<LibCallOptimization*, BumpPtrAllocator> Optimizations; - - // Fortified library call optimizations. - MemCpyChkOpt MemCpyChk; - MemMoveChkOpt MemMoveChk; - MemSetChkOpt MemSetChk; - StrCpyChkOpt StrCpyChk; - StpCpyChkOpt StpCpyChk; - StrNCpyChkOpt StrNCpyChk; - - // String library call optimizations. - StrCatOpt StrCat; - StrNCatOpt StrNCat; - StrChrOpt StrChr; - StrRChrOpt StrRChr; - StrCmpOpt StrCmp; - StrNCmpOpt StrNCmp; - StrCpyOpt StrCpy; - StpCpyOpt StpCpy; - StrNCpyOpt StrNCpy; - StrLenOpt StrLen; - StrPBrkOpt StrPBrk; - StrToOpt StrTo; - StrSpnOpt StrSpn; - StrCSpnOpt StrCSpn; - StrStrOpt StrStr; - - // Memory library call optimizations. - MemCmpOpt MemCmp; - MemCpyOpt MemCpy; - MemMoveOpt MemMove; - MemSetOpt MemSet; - // Math library call optimizations. - UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; - // Integer library call optimizations. - FFSOpt FFS; - AbsOpt Abs; - IsDigitOpt IsDigit; - IsAsciiOpt IsAscii; - ToAsciiOpt ToAscii; - - // Formatting and IO library call optimizations. - PrintFOpt PrintF; - SPrintFOpt SPrintF; - FPrintFOpt FPrintF; - FWriteOpt FWrite; - FPutsOpt FPuts; - PutsOpt Puts; - - void initOptimizations(); - void addOpt(LibFunc::Func F, LibCallOptimization* Opt); - void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + // Math library call optimizations. + CosOpt Cos; + PowOpt Pow; + Exp2Opt Exp2; public: LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, bool UnsafeFPShrink = false) - : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true), - Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { + : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { this->TD = TD; this->TLI = TLI; this->LCS = LCS; @@ -1741,123 +1695,234 @@ public: } Value *optimizeCall(CallInst *CI); + LibCallOptimization *lookupOptimization(CallInst *CI); + bool hasFloatVersion(StringRef FuncName); }; -void LibCallSimplifierImpl::initOptimizations() { - // Fortified library call optimizations. - Optimizations["__memcpy_chk"] = &MemCpyChk; - Optimizations["__memmove_chk"] = &MemMoveChk; - Optimizations["__memset_chk"] = &MemSetChk; - Optimizations["__strcpy_chk"] = &StrCpyChk; - Optimizations["__stpcpy_chk"] = &StpCpyChk; - Optimizations["__strncpy_chk"] = &StrNCpyChk; - Optimizations["__stpncpy_chk"] = &StrNCpyChk; - - // String library call optimizations. - addOpt(LibFunc::strcat, &StrCat); - addOpt(LibFunc::strncat, &StrNCat); - addOpt(LibFunc::strchr, &StrChr); - addOpt(LibFunc::strrchr, &StrRChr); - addOpt(LibFunc::strcmp, &StrCmp); - addOpt(LibFunc::strncmp, &StrNCmp); - addOpt(LibFunc::strcpy, &StrCpy); - addOpt(LibFunc::stpcpy, &StpCpy); - addOpt(LibFunc::strncpy, &StrNCpy); - addOpt(LibFunc::strlen, &StrLen); - addOpt(LibFunc::strpbrk, &StrPBrk); - addOpt(LibFunc::strtol, &StrTo); - addOpt(LibFunc::strtod, &StrTo); - addOpt(LibFunc::strtof, &StrTo); - addOpt(LibFunc::strtoul, &StrTo); - addOpt(LibFunc::strtoll, &StrTo); - addOpt(LibFunc::strtold, &StrTo); - addOpt(LibFunc::strtoull, &StrTo); - addOpt(LibFunc::strspn, &StrSpn); - addOpt(LibFunc::strcspn, &StrCSpn); - addOpt(LibFunc::strstr, &StrStr); - - // Memory library call optimizations. - addOpt(LibFunc::memcmp, &MemCmp); - addOpt(LibFunc::memcpy, &MemCpy); - addOpt(LibFunc::memmove, &MemMove); - addOpt(LibFunc::memset, &MemSet); +bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) { + LibFunc::Func Func; + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + if (TLI->getLibFunc(FloatFuncName, Func)) + return TLI->has(Func); + return false; +} - // Math library call optimizations. - addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); - addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); - addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); - addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); - addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); - addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); - addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); - - if(UnsafeFPShrink) { - addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); +// Fortified library call optimizations. +static MemCpyChkOpt MemCpyChk; +static MemMoveChkOpt MemMoveChk; +static MemSetChkOpt MemSetChk; +static StrCpyChkOpt StrCpyChk; +static StpCpyChkOpt StpCpyChk; +static StrNCpyChkOpt StrNCpyChk; + +// String library call optimizations. +static StrCatOpt StrCat; +static StrNCatOpt StrNCat; +static StrChrOpt StrChr; +static StrRChrOpt StrRChr; +static StrCmpOpt StrCmp; +static StrNCmpOpt StrNCmp; +static StrCpyOpt StrCpy; +static StpCpyOpt StpCpy; +static StrNCpyOpt StrNCpy; +static StrLenOpt StrLen; +static StrPBrkOpt StrPBrk; +static StrToOpt StrTo; +static StrSpnOpt StrSpn; +static StrCSpnOpt StrCSpn; +static StrStrOpt StrStr; + +// Memory library call optimizations. +static MemCmpOpt MemCmp; +static MemCpyOpt MemCpy; +static MemMoveOpt MemMove; +static MemSetOpt MemSet; + +// Math library call optimizations. +static UnaryDoubleFPOpt UnaryDoubleFP(false); +static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + + // Integer library call optimizations. +static FFSOpt FFS; +static AbsOpt Abs; +static IsDigitOpt IsDigit; +static IsAsciiOpt IsAscii; +static ToAsciiOpt ToAscii; + +// Formatting and IO library call optimizations. +static PrintFOpt PrintF; +static SPrintFOpt SPrintF; +static FPrintFOpt FPrintF; +static FWriteOpt FWrite; +static FPutsOpt FPuts; +static PutsOpt Puts; + +LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + + // Next check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return &Pow; + case Intrinsic::exp2: + return &Exp2; + default: + return 0; + } } - addOpt(LibFunc::cosf, &Cos); - addOpt(LibFunc::cos, &Cos); - addOpt(LibFunc::cosl, &Cos); - addOpt(LibFunc::powf, &Pow); - addOpt(LibFunc::pow, &Pow); - addOpt(LibFunc::powl, &Pow); - Optimizations["llvm.pow.f32"] = &Pow; - Optimizations["llvm.pow.f64"] = &Pow; - Optimizations["llvm.pow.f80"] = &Pow; - Optimizations["llvm.pow.f128"] = &Pow; - Optimizations["llvm.pow.ppcf128"] = &Pow; - addOpt(LibFunc::exp2l, &Exp2); - addOpt(LibFunc::exp2, &Exp2); - addOpt(LibFunc::exp2f, &Exp2); - Optimizations["llvm.exp2.ppcf128"] = &Exp2; - Optimizations["llvm.exp2.f128"] = &Exp2; - Optimizations["llvm.exp2.f80"] = &Exp2; - Optimizations["llvm.exp2.f64"] = &Exp2; - Optimizations["llvm.exp2.f32"] = &Exp2; + // Then check for known library functions. + if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + switch (Func) { + case LibFunc::strcat: + return &StrCat; + case LibFunc::strncat: + return &StrNCat; + case LibFunc::strchr: + return &StrChr; + case LibFunc::strrchr: + return &StrRChr; + case LibFunc::strcmp: + return &StrCmp; + case LibFunc::strncmp: + return &StrNCmp; + case LibFunc::strcpy: + return &StrCpy; + case LibFunc::stpcpy: + return &StpCpy; + case LibFunc::strncpy: + return &StrNCpy; + case LibFunc::strlen: + return &StrLen; + case LibFunc::strpbrk: + return &StrPBrk; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + return &StrTo; + case LibFunc::strspn: + return &StrSpn; + case LibFunc::strcspn: + return &StrCSpn; + case LibFunc::strstr: + return &StrStr; + case LibFunc::memcmp: + return &MemCmp; + case LibFunc::memcpy: + return &MemCpy; + case LibFunc::memmove: + return &MemMove; + case LibFunc::memset: + return &MemSet; + case LibFunc::cosf: + case LibFunc::cos: + case LibFunc::cosl: + return &Cos; + case LibFunc::powf: + case LibFunc::pow: + case LibFunc::powl: + return &Pow; + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + return &Exp2; + case LibFunc::ffs: + case LibFunc::ffsl: + case LibFunc::ffsll: + return &FFS; + case LibFunc::abs: + case LibFunc::labs: + case LibFunc::llabs: + return &Abs; + case LibFunc::isdigit: + return &IsDigit; + case LibFunc::isascii: + return &IsAscii; + case LibFunc::toascii: + return &ToAscii; + case LibFunc::printf: + return &PrintF; + case LibFunc::sprintf: + return &SPrintF; + case LibFunc::fprintf: + return &FPrintF; + case LibFunc::fwrite: + return &FWrite; + case LibFunc::fputs: + return &FPuts; + case LibFunc::puts: + return &Puts; + case LibFunc::ceil: + case LibFunc::fabs: + case LibFunc::floor: + case LibFunc::rint: + case LibFunc::round: + case LibFunc::nearbyint: + case LibFunc::trunc: + if (hasFloatVersion(FuncName)) + return &UnaryDoubleFP; + return 0; + case LibFunc::acos: + case LibFunc::acosh: + case LibFunc::asin: + case LibFunc::asinh: + case LibFunc::atan: + case LibFunc::atanh: + case LibFunc::cbrt: + case LibFunc::cosh: + case LibFunc::exp: + case LibFunc::exp10: + case LibFunc::expm1: + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + case LibFunc::sin: + case LibFunc::sinh: + case LibFunc::sqrt: + case LibFunc::tan: + case LibFunc::tanh: + if (UnsafeFPShrink && hasFloatVersion(FuncName)) + return &UnsafeUnaryDoubleFP; + return 0; + case LibFunc::memcpy_chk: + return &MemCpyChk; + default: + return 0; + } + } + + // Finally check for fortified library calls. + if (FuncName.endswith("_chk")) { + if (FuncName == "__memmove_chk") + return &MemMoveChk; + else if (FuncName == "__memset_chk") + return &MemSetChk; + else if (FuncName == "__strcpy_chk") + return &StrCpyChk; + else if (FuncName == "__stpcpy_chk") + return &StpCpyChk; + else if (FuncName == "__strncpy_chk") + return &StrNCpyChk; + else if (FuncName == "__stpncpy_chk") + return &StrNCpyChk; + } + + return 0; - // Integer library call optimizations. - addOpt(LibFunc::ffs, &FFS); - addOpt(LibFunc::ffsl, &FFS); - addOpt(LibFunc::ffsll, &FFS); - addOpt(LibFunc::abs, &Abs); - addOpt(LibFunc::labs, &Abs); - addOpt(LibFunc::llabs, &Abs); - addOpt(LibFunc::isdigit, &IsDigit); - addOpt(LibFunc::isascii, &IsAscii); - addOpt(LibFunc::toascii, &ToAscii); - - // Formatting and IO library call optimizations. - addOpt(LibFunc::printf, &PrintF); - addOpt(LibFunc::sprintf, &SPrintF); - addOpt(LibFunc::fprintf, &FPrintF); - addOpt(LibFunc::fwrite, &FWrite); - addOpt(LibFunc::fputs, &FPuts); - addOpt(LibFunc::puts, &Puts); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { - if (Optimizations.empty()) - initOptimizations(); + LibCallOptimization *LCO = lookupOptimization(CI); // @LOCALMOD-BEGIN Function *Caller = CI->getParent()->getParent(); @@ -1866,8 +1931,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { if (TLI->getLibFunc(Caller->getName(), F)) return 0; // @LOCALMOD-END - Function *Callee = CI->getCalledFunction(); - LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); if (LCO) { IRBuilder<> Builder(CI); return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); @@ -1875,16 +1938,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { return 0; } -void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { - if (TLI->has(F)) - Optimizations[TLI->getName(F)] = Opt; -} - -void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2, - LibCallOptimization* Opt) { - if (TLI->has(F1) && TLI->has(F2)) - Optimizations[TLI->getName(F1)] = Opt; -} LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI, diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 5812d4607d..c3df215c29 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index b5941bdf24..544c5eed7e 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, return VM[V] = const_cast<Value*>(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), None); VM[V] = Dummy; // Check all operands to see if any need to be remapped. diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 76365417aa..17900dabbe 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1771,7 +1771,7 @@ namespace { size_t MaxDepth = DAG.lookup(IJ); DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << DAG.size() << "\n"); // At this point the DAG has been constructed, but, may contain @@ -2086,7 +2086,7 @@ namespace { DEBUG(if (DebugPairSelection) dbgs() << "BBV: found pruned DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << PrunedDAG.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt index e64034ab26..7ae082f55e 100644 --- a/lib/Transforms/Vectorize/CMakeLists.txt +++ b/lib/Transforms/Vectorize/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMVectorize BBVectorize.cpp Vectorize.cpp LoopVectorize.cpp + SLPVectorizer.cpp + VecUtils.cpp ) add_dependencies(LLVMVectorize intrinsics_gen) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a696a2ffba..11ee99ddf1 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops -// and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizer uses (will use) the codegen -// interfaces to generate IR that is likely to result in an optimal binary. +// and generates target-independent LLVM-IR. +// The vectorizer uses the TargetTransformInfo analysis to estimate the costs +// of instructions in order to estimate the profitability of vectorization. // // The loop vectorizer combines consecutive loop iterations into a single // 'wide' iteration. After this transformation the index is incremented @@ -78,7 +78,9 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -87,6 +89,7 @@ #include <map> using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, @@ -112,9 +115,15 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -/// When performing a runtime memory check, do not check more than this -/// number of pointers. Notice that the check is quadratic! -static const unsigned RuntimeMemoryCheckThreshold = 4; +/// When performing memory disambiguation checks at runtime do not make more +/// than this number of comparisons. +static const unsigned RuntimeMemoryCheckThreshold = 8; + +/// We use a metadata with this name to indicate that a scalar loop was +/// vectorized and that we don't need to re-vectorize it if we run into it +/// again. +static const char* +AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized"; namespace { @@ -208,7 +217,7 @@ private: /// This function adds 0, 1, 2 ... to each vector element, starting at zero. /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...). /// The sequence starts at StartIndex. - Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate); + Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate); /// When we go over instructions in the basic block we rely on previous /// values within the current basic block or on loop invariant values. @@ -327,7 +336,7 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0) {} + Induction(0), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -337,8 +346,10 @@ public: RK_IntegerOr, ///< Bitwise or logical OR of numbers. RK_IntegerAnd, ///< Bitwise or logical AND of numbers. RK_IntegerXor, ///< Bitwise or logical XOR of numbers. + RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). RK_FloatAdd, ///< Sum of floats. - RK_FloatMult ///< Product of floats. + RK_FloatMult, ///< Product of floats. + RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()). }; /// This enum represents the kinds of inductions that we support. @@ -350,21 +361,52 @@ public: IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). }; + // This enum represents the kind of minmax reduction. + enum MinMaxReductionKind { + MRK_Invalid, + MRK_UIntMin, + MRK_UIntMax, + MRK_SIntMin, + MRK_SIntMax, + MRK_FloatMin, + MRK_FloatMax + }; + /// This POD struct holds information about reduction variables. struct ReductionDescriptor { ReductionDescriptor() : StartValue(0), LoopExitInstr(0), - Kind(RK_NoReduction) {} + Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} - ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K) - : StartValue(Start), LoopExitInstr(Exit), Kind(K) {} + ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, + MinMaxReductionKind MK) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} // The starting value of the reduction. // It does not have to be zero! - Value *StartValue; + TrackingVH<Value> StartValue; // The instruction who's value is used outside the loop. Instruction *LoopExitInstr; // The kind of the reduction. ReductionKind Kind; + // If this a min/max reduction the kind of reduction. + MinMaxReductionKind MinMaxKind; + }; + + /// This POD struct holds information about a potential reduction operation. + struct ReductionInstDesc { + ReductionInstDesc(bool IsRedux, Instruction *I) : + IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {} + + ReductionInstDesc(Instruction *I, MinMaxReductionKind K) : + IsReduction(true), PatternLastInst(I), MinMaxKind(K) {} + + // Is this instruction a reduction candidate. + bool IsReduction; + // The last instruction in a min/max pattern (select of the select(icmp()) + // pattern), or the current reduction instruction otherwise. + Instruction *PatternLastInst; + // If this is a min/max pattern the comparison predicate. + MinMaxReductionKind MinMaxKind; }; // This POD struct holds information about the memory runtime legality @@ -381,16 +423,18 @@ public: } /// Insert a pointer and calculate the start and end SCEVs. - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr); + void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); /// This flag indicates if we need to add the runtime check. bool Need; /// Holds the pointers that we need to check. - SmallVector<Value*, 2> Pointers; + SmallVector<TrackingVH<Value>, 2> Pointers; /// Holds the pointer value at the beginning of the loop. SmallVector<const SCEV*, 2> Starts; /// Holds the pointer value at the end of the loop. SmallVector<const SCEV*, 2> Ends; + /// Holds the information if this pointer is used for writing to memory. + SmallVector<bool, 2> IsWritePtr; }; /// A POD for saving information about induction variables. @@ -398,7 +442,7 @@ public: InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} InductionInfo() : StartValue(0), IK(IK_NoInduction) {} /// Start value. - Value *StartValue; + TrackingVH<Value> StartValue; /// Induction kind. InductionKind IK; }; @@ -413,7 +457,7 @@ public: /// Alias(Multi)Map stores the values (GEPs or underlying objects and their /// respective Store/Load instruction(s) to calculate aliasing. - typedef DenseMap<Value*, Instruction* > AliasMap; + typedef MapVector<Value*, Instruction* > AliasMap; typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap; /// Returns true if it is legal to vectorize this loop. @@ -455,6 +499,10 @@ public: /// Returns the information that we collected about runtime memory check. RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; } + + /// This function returns the identity element (or neutral element) for + /// the operation K. + static Constant *getReductionIdentity(ReductionKind K, Type *Tp); private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -481,9 +529,17 @@ private: /// Returns True, if 'Phi' is the kind of reduction variable for type /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. bool AddReductionVar(PHINode *Phi, ReductionKind Kind); - /// Returns true if the instruction I can be a reduction variable of type - /// 'Kind'. - bool isReductionInstr(Instruction *I, ReductionKind Kind); + /// Returns a struct describing if the instruction 'I' can be a reduction + /// variable of type 'Kind'. If the reduction is a min/max pattern of + /// select(icmp()) this function advances the instruction pointer 'I' from the + /// compare instruction to the select instruction and stores this pointer in + /// 'PatternLastInst' member of the returned struct. + ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind, + ReductionInstDesc &Desc); + /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction + /// pattern corresponding to a min(X, Y) or max(X, Y). + static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev); /// Returns the induction kind of Phi. This function may return NoInduction /// if the PHI is not an induction variable. InductionKind isInductionVariable(PHINode *Phi); @@ -534,6 +590,8 @@ private: /// We need to check that all of the pointers in this list are disjoint /// at runtime. RuntimePointerCheck PtrRtCheck; + /// Can we assume the absence of NaNs. + bool HasFunNoNaNAttr; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -656,6 +714,11 @@ struct LoopVectorize : public LoopPass { AA = getAnalysisIfAvailable<AliasAnalysis>(); TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + if (DL == NULL) { + DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout"); + return false; + } + DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); @@ -731,7 +794,8 @@ struct LoopVectorize : public LoopPass { void LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, - Loop *Lp, Value *Ptr) { + Loop *Lp, Value *Ptr, + bool WritePtr) { const SCEV *Sc = SE->getSCEV(Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); assert(AR && "Invalid addrec expression"); @@ -740,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, Pointers.push_back(Ptr); Starts.push_back(AR->getStart()); Ends.push_back(ScEnd); + IsWritePtr.push_back(WritePtr); } Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { @@ -765,7 +830,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { return Shuf; } -Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, +Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, bool Negate) { assert(Val->getType()->isVectorTy() && "Must be a vector"); assert(Val->getType()->getScalarType()->isIntegerTy() && @@ -778,8 +843,8 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, // Create a vector of consecutive numbers from zero to VF. for (int i = 0; i < VLen; ++i) { - int Idx = Negate ? (-i): i; - Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx)); + int64_t Idx = Negate ? (-i) : i; + Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate)); } // Add the consecutive indices to the vector value. @@ -899,13 +964,19 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); + unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); + unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); + unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; + + if (ScalarAllocatedSize != VectorElementSize) + return scalarizeInstruction(Instr); // If the pointer is loop invariant or if it is non consecutive, // scalarize the load. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + bool Reverse = ConsecutiveStride < 0; bool UniformLoad = LI && Legal->isUniform(Ptr); - if (Stride == 0 || UniformLoad) + if (!ConsecutiveStride || UniformLoad) return scalarizeInstruction(Instr); Constant *Zero = Builder.getInt32(0); @@ -968,7 +1039,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); } - Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); } } @@ -984,7 +1055,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); } - Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); cast<LoadInst>(LI)->setAlignment(Alignment); Entry[Part] = Reverse ? reverseVector(LI) : LI; @@ -1034,10 +1105,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); - // For each scalar that we create: - for (unsigned Width = 0; Width < VF; ++Width) { - // For each vector unroll 'part': - for (unsigned Part = 0; Part < UF; ++Part) { + // For each vector unroll 'part': + for (unsigned Part = 0; Part < UF; ++Part) { + // For each scalar that we create: + for (unsigned Width = 0; Width < VF; ++Width) { Instruction *Cloned = Instr->clone(); if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); @@ -1104,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { + // No need to check if two readonly pointers intersect. + if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j]) + continue; + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc"); Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc"); Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc"); @@ -1159,6 +1234,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); + // Mark the old scalar loop with metadata that tells us not to vectorize this + // loop again if we run into it. + MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None); + OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); + // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -1425,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /// This function returns the identity element (or neutral element) for /// the operation K. -static Constant* -getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) { +Constant* +LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) { switch (K) { - case LoopVectorizationLegality:: RK_IntegerXor: - case LoopVectorizationLegality:: RK_IntegerAdd: - case LoopVectorizationLegality:: RK_IntegerOr: + case RK_IntegerXor: + case RK_IntegerAdd: + case RK_IntegerOr: // Adding, Xoring, Oring zero to a number does not change it. return ConstantInt::get(Tp, 0); - case LoopVectorizationLegality:: RK_IntegerMult: + case RK_IntegerMult: // Multiplying a number by 1 does not change it. return ConstantInt::get(Tp, 1); - case LoopVectorizationLegality:: RK_IntegerAnd: + case RK_IntegerAnd: // AND-ing a number with an all-1 value does not change it. return ConstantInt::get(Tp, -1, true); - case LoopVectorizationLegality:: RK_FloatMult: + case RK_FloatMult: // Multiplying a number by 1 does not change it. return ConstantFP::get(Tp, 1.0L); - case LoopVectorizationLegality:: RK_FloatAdd: + case RK_FloatAdd: // Adding zero to a number does not change it. return ConstantFP::get(Tp, 0.0L); default: @@ -1555,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { } /// This function translates the reduction kind to an LLVM binary operator. -static Instruction::BinaryOps +static unsigned getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { switch (Kind) { case LoopVectorizationLegality::RK_IntegerAdd: @@ -1572,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { return Instruction::FMul; case LoopVectorizationLegality::RK_FloatAdd: return Instruction::FAdd; + case LoopVectorizationLegality::RK_IntegerMinMax: + return Instruction::ICmp; + case LoopVectorizationLegality::RK_FloatMinMax: + return Instruction::FCmp; default: llvm_unreachable("Unknown reduction operation"); } } +Value *createMinMaxOp(IRBuilder<> &Builder, + LoopVectorizationLegality::MinMaxReductionKind RK, + Value *Left, + Value *Right) { + CmpInst::Predicate P = CmpInst::ICMP_NE; + switch (RK) { + default: + llvm_unreachable("Unknown min/max reduction kind"); + case LoopVectorizationLegality::MRK_UIntMin: + P = CmpInst::ICMP_ULT; + break; + case LoopVectorizationLegality::MRK_UIntMax: + P = CmpInst::ICMP_UGT; + break; + case LoopVectorizationLegality::MRK_SIntMin: + P = CmpInst::ICMP_SLT; + break; + case LoopVectorizationLegality::MRK_SIntMax: + P = CmpInst::ICMP_SGT; + break; + case LoopVectorizationLegality::MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case LoopVectorizationLegality::MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; + } + + Value *Cmp; + if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + void InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { //===------------------------------------------------===// @@ -1632,7 +1754,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -1640,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. - Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType()); - Constant *Identity = ConstantVector::getSplat(VF, Iden); - - // This vector is the Identity vector where the first element is the - // incoming scalar reduction. - Value *VectorStart = Builder.CreateInsertElement(Identity, - RdxDesc.StartValue, Zero); + Value *Identity; + Value *VectorStart; + if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax || + RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) { + // MinMax reduction have the start value as their identify. + VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue, + "minmax.ident"); + } else { + Constant *Iden = + LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind, + VecTy->getScalarType()); + Identity = ConstantVector::getSplat(VF, Iden); + + // This vector is the Identity vector where the first element is the + // incoming scalar reduction. + VectorStart = Builder.CreateInsertElement(Identity, + RdxDesc.StartValue, Zero); + } // Fix the vector-loop phi. // We created the induction variable so we know that the @@ -1688,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = RdxParts[0]; + unsigned Op = getReductionBinOp(RdxDesc.Kind); for (unsigned part = 1; part < UF; ++part) { - Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind); - ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx, - "bin.rdx"); + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op, + RdxParts[part], ReducedPartRdx, + "bin.rdx"); + else + ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind, + ReducedPartRdx, RdxParts[part]); } // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles @@ -1716,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { ConstantVector::get(ShuffleMask), "rdx.shuf"); - Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind); - TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx"); + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, + "bin.rdx"); + else + TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf); } // The result is in the first element of the vector. @@ -1850,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // We know that all PHIs in non header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. - // At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - VectorParts Cond = createEdgeMask(P->getIncomingBlock(0), - P->getParent()); - for (unsigned part = 0; part < UF; ++part) { - VectorParts &In0 = getVectorValue(P->getIncomingValue(0)); - VectorParts &In1 = getVectorValue(P->getIncomingValue(1)); - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part], - "predphi"); + unsigned NumIncoming = P->getNumIncomingValues(); + assert(NumIncoming > 1 && "Invalid PHI"); + + // Generate a sequence of selects of the form: + // SELECT(Mask3, In3, + // SELECT(Mask2, In2, + // ( ...))) + for (unsigned In = 0; In < NumIncoming; In++) { + VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), + P->getParent()); + VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); + + for (unsigned part = 0; part < UF; ++part) { + // We don't need to 'select' the first PHI operand because it is + // the default value if all of the other masks don't match. + if (In == 0) + Entry[part] = In0[part]; + else + // Select between the current value and the previous incoming edge + // based on the incoming mask. + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], + Entry[part], "predphi"); + } } continue; } @@ -1917,7 +2073,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // After broadcasting the induction variable we need to make the // vector consecutive by adding ... -3, -2, -1, 0. for (unsigned part = 0; part < UF; ++part) - Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true); + Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part, + true); continue; } @@ -2077,6 +2234,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case Instruction::Call: { + // Ignore dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + break; + Module *M = BB->getParent()->getParent(); CallInst *CI = cast<CallInst>(it); Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); @@ -2137,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!isa<BranchInst>(BB->getTerminator())) return false; - // We must have at most two predecessors because we need to convert - // all PHIs to selects. - unsigned Preds = std::distance(pred_begin(BB), pred_end(BB)); - if (Preds > 2) - return false; - // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) return false; @@ -2153,7 +2308,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } bool LoopVectorizationLegality::canVectorize() { - assert(TheLoop->getLoopPreheader() && "No preheader!!"); + // We must have a loop in canonical form. Loops with indirectbr in them cannot + // be canonicalized. + if (!TheLoop->getLoopPreheader()) + return false; // We can only vectorize innermost loops. if (TheLoop->getSubLoopsVector().size()) @@ -2220,10 +2378,44 @@ bool LoopVectorizationLegality::canVectorize() { return true; } +/// \brief Check that the instruction has outside loop users and is not an +/// identified reduction variable. +static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, + SmallPtrSet<Value *, 4> &Reductions) { + // Reduction instructions are allowed to have exit users. All other + // instructions must not have external users. + if (!Reductions.count(Inst)) + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); + I != E; ++I) { + Instruction *U = cast<Instruction>(*I); + // This user may be a reduction exit value. + if (!TheLoop->contains(U)) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return true; + } + } + return false; +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); + // If we marked the scalar loop as "already vectorized" then no need + // to vectorize it again. + if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { + DEBUG(dbgs() << "LV: This loop was vectorized before\n"); + return false; + } + + // Look for the attribute signaling the absence of NaNs. + Function &F = *Header->getParent(); + if (F.hasFnAttribute("no-nans-fp-math")) + HasFunNoNaNAttr = F.getAttributes().getAttribute( + AttributeSet::FunctionIndex, + "no-nans-fp-math").getValueAsString() == "true"; + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2233,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { ++it) { if (PHINode *Phi = dyn_cast<PHINode>(it)) { - // This should not happen because the loop should be normalized. - if (Phi->getNumIncomingValues() != 2) { - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); - return false; - } - // Check that this PHI type is allowed. if (!Phi->getType()->isIntegerTy() && !Phi->getType()->isFloatingPointTy() && @@ -2250,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // If this PHINode is not in the header block, then we know that we // can convert it to select during if-conversion. No need to check if // the PHIs in this block are induction or reduction variables. - if (*bb != Header) - continue; + if (*bb != Header) { + // Check that this instruction has no outside users or is an + // identified reduction value with an outside user. + if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) + continue; + return false; + } + + // We only allow if-converted PHIs with more than two incoming values. + if (Phi->getNumIncomingValues() != 2) { + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + return false; + } // This is the value coming from the preheader. Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); @@ -2293,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_IntegerMinMax)) { + DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n"); + continue; + } if (AddReductionVar(Phi, RK_FloatMult)) { DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n"); continue; @@ -2301,14 +2502,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_FloatMinMax)) { + DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n"); + continue; + } DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); return false; }// end of PHI handling - // We still don't handle functions. + // We still don't handle functions. However, we can ignore dbg intrinsic + // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast<CallInst>(it); - if (CI && !getIntrinsicIDForCall(CI, TLI)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -2329,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. - if (!AllowedExit.count(it)) - //Check that all of the users of the loop are inside the BB. - for (Value::use_iterator I = it->use_begin(), E = it->use_end(); - I != E; ++I) { - Instruction *U = cast<Instruction>(*I); - // This user may be a reduction exit value. - if (!TheLoop->contains(U)) { - DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); - return false; - } - } + if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) + return false; + } // next instr. } @@ -2419,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder( bool LoopVectorizationLegality::canVectorizeMemory() { - if (TheLoop->isAnnotatedParallel()) { - DEBUG(dbgs() - << "LV: A loop annotated parallel, ignore memory dependency " - << "checks.\n"); - return true; - } - typedef SmallVector<Value*, 16> ValueVector; typedef SmallPtrSet<Value*, 16> ValueSet; // Holds the Load and Store *instructions*. @@ -2434,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { PtrRtCheck.Pointers.clear(); PtrRtCheck.Need = false; + const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2448,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (it->mayReadFromMemory()) { LoadInst *Ld = dyn_cast<LoadInst>(it); if (!Ld) return false; - if (!Ld->isSimple()) { + if (!Ld->isSimple() && !IsAnnotatedParallel) { DEBUG(dbgs() << "LV: Found a non-simple load.\n"); return false; } @@ -2460,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(it); if (!St) return false; - if (!St->isSimple()) { + if (!St->isSimple() && !IsAnnotatedParallel) { DEBUG(dbgs() << "LV: Found a non-simple store.\n"); return false; } @@ -2507,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() { ReadWrites.insert(std::make_pair(Ptr, ST)); } + if (IsAnnotatedParallel) { + DEBUG(dbgs() + << "LV: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + return true; + } + for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { LoadInst *LD = cast<LoadInst>(*I); Value* Ptr = LD->getPointerOperand(); @@ -2529,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } + unsigned NumReadPtrs = 0; + unsigned NumWritePtrs = 0; + // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -2536,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { Value *V = (*MI).first; if (hasComputableBounds(V)) { - PtrRtCheck.insert(SE, TheLoop, V); + PtrRtCheck.insert(SE, TheLoop, V, true); + NumWritePtrs++; DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; @@ -2546,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { Value *V = (*MI).first; if (hasComputableBounds(V)) { - PtrRtCheck.insert(SE, TheLoop, V); + PtrRtCheck.insert(SE, TheLoop, V, false); + NumReadPtrs++; DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; @@ -2556,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Check that we did not collect too many pointers or found a // unsizeable pointer. - if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { + unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1)); + DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n"); + if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { PtrRtCheck.reset(); CanDoRT = false; } @@ -2619,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { Inst, WriteObjects, MaxByteWidth)) { - DEBUG(dbgs() << "LV: Found a possible write-write reorder:" - << *UI <<"\n"); + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI + << "\n"); return false; } @@ -2663,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { Inst, WriteObjects, MaxByteWidth)) { - DEBUG(dbgs() << "LV: Found a possible read-write reorder:" - << *UI <<"\n"); + DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI + << "\n"); return false; } } @@ -2710,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // used as reduction variables (such as ADD). We may have a single // out-of-block user. The cycle must end with the original PHI. Instruction *Iter = Phi; - while (true) { + + // To recognize min/max patterns formed by a icmp select sequence, we store + // the number of instruction we saw from the recognized min/max pattern, + // such that we don't stop when we see the phi has two uses (one by the select + // and one by the icmp) and to make sure we only see exactly the two + // instructions. + unsigned NumCmpSelectPatternInst = 0; + ReductionInstDesc ReduxDesc(false, 0); + + // Avoid cycles in the chain. + SmallPtrSet<Instruction *, 8> VisitedInsts; + while (VisitedInsts.insert(Iter)) { // If the instruction has no users then this is a broken // chain and can't be a reduction variable. if (Iter->use_empty()) @@ -2749,26 +2967,40 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (isa<PHINode>(Iter) && isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() && TheLoop->contains(U) && - Iter->getNumUses() > 1) + Iter->hasNUsesOrMore(2)) continue; - // We can't have multiple inside users. - if (FoundInBlockUser) + // We can't have multiple inside users except for a combination of + // icmp/select both using the phi. + if (FoundInBlockUser && !NumCmpSelectPatternInst) return false; FoundInBlockUser = true; // Any reduction instr must be of one of the allowed kinds. - if (!isReductionInstr(U, Kind)) + ReduxDesc = isReductionInstr(U, Kind, ReduxDesc); + if (!ReduxDesc.IsReduction) return false; + if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U))) + ++NumCmpSelectPatternInst; + // Reductions of instructions such as Div, and Sub is only // possible if the LHS is the reduction variable. - if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter) + if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) && + !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter) return false; - Iter = U; + Iter = ReduxDesc.PatternLastInst; } + // This means we have seen one but not the other instruction of the + // pattern or more than just a select and cmp. + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) + return false; + // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. @@ -2777,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, AllowedExit.insert(ExitInstruction); // Save the description of this reduction variable. - ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); + ReductionDescriptor RD(RdxStart, ExitInstruction, Kind, + ReduxDesc.MinMaxKind); Reductions[Phi] = RD; // We've ended the cycle. This is a reduction variable if we have an // outside user and it has a binary op. return FoundBinOp && ExitInstruction; } } + + return false; } -bool +/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction +/// pattern corresponding to a min(X, Y) or max(X, Y). +LoopVectorizationLegality::ReductionInstDesc +LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev) { + + assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && + "Expect a select instruction"); + Instruction *Cmp = 0; + SelectInst *Select = 0; + + // We must handle the select(cmp()) as a single instruction. Advance to the + // select. + if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { + if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin()))) + return ReductionInstDesc(false, I); + return ReductionInstDesc(Select, Prev.MinMaxKind); + } + + // Only handle single use cases for now. + if (!(Select = dyn_cast<SelectInst>(I))) + return ReductionInstDesc(false, I); + if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && + !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) + return ReductionInstDesc(false, I); + if (!Cmp->hasOneUse()) + return ReductionInstDesc(false, I); + + Value *CmpLeft; + Value *CmpRight; + + // Look for a min/max pattern. + if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_UIntMin); + else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_UIntMax); + else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_SIntMax); + else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); + + return ReductionInstDesc(false, I); +} + +LoopVectorizationLegality::ReductionInstDesc LoopVectorizationLegality::isReductionInstr(Instruction *I, - ReductionKind Kind) { + ReductionKind Kind, + ReductionInstDesc &Prev) { bool FP = I->getType()->isFloatingPointTy(); bool FastMath = (FP && I->isCommutative() && I->isAssociative()); - switch (I->getOpcode()) { default: - return false; + return ReductionInstDesc(false, I); case Instruction::PHI: - if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd)) - return false; - // possibly. - return true; + if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd && + Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return ReductionInstDesc(I, Prev.MinMaxKind); case Instruction::Sub: case Instruction::Add: - return Kind == RK_IntegerAdd; - case Instruction::SDiv: - case Instruction::UDiv: + return ReductionInstDesc(Kind == RK_IntegerAdd, I); case Instruction::Mul: - return Kind == RK_IntegerMult; + return ReductionInstDesc(Kind == RK_IntegerMult, I); case Instruction::And: - return Kind == RK_IntegerAnd; + return ReductionInstDesc(Kind == RK_IntegerAnd, I); case Instruction::Or: - return Kind == RK_IntegerOr; + return ReductionInstDesc(Kind == RK_IntegerOr, I); case Instruction::Xor: - return Kind == RK_IntegerXor; + return ReductionInstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: - return Kind == RK_FloatMult && FastMath; + return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I); case Instruction::FAdd: - return Kind == RK_FloatAdd && FastMath; - } + return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I); + case Instruction::FCmp: + case Instruction::ICmp: + case Instruction::Select: + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return isMinMaxSelectCmpPattern(I, Prev); + } } LoopVectorizationLegality::InductionKind @@ -3238,6 +3530,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + continue; + unsigned C = getInstructionCost(it, VF); Cost += C; DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << @@ -3297,14 +3593,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::AShr: case Instruction::And: case Instruction::Or: - case Instruction::Xor: - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy); + case Instruction::Xor: { + // Certain instructions can be cheaper to vectorize if they have a constant + // second vector operand. One example of this are shifts on x86. + TargetTransformInfo::OperandValueKind Op1VK = + TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Op2VK = + TargetTransformInfo::OK_AnyValue; + + if (isa<ConstantInt>(I->getOperand(1))) + Op2VK = TargetTransformInfo::OK_UniformConstantValue; + + return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK); + } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); @@ -3335,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); // Scalarized loads/stores. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (0 == Stride) { + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + bool Reverse = ConsecutiveStride < 0; + unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy); + unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF; + if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) { unsigned Cost = 0; // The cost of extracting from the value vector and pointer vector. Type *PtrTy = ToVectorTy(Ptr->getType(), VF); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp new file mode 100644 index 0000000000..cc30cc9278 --- /dev/null +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -0,0 +1,348 @@ +//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass implements the Bottom Up SLP vectorizer. It detects consecutive +// stores that can be put together into vector-stores. Next, it attempts to +// construct vectorizable tree using the use-def chains. If a profitable tree +// was found, the SLP vectorizer performs vectorization on the tree. +// +// The pass is inspired by the work described in the paper: +// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. +// +//===----------------------------------------------------------------------===// +#define SV_NAME "slp-vectorizer" +#define DEBUG_TYPE SV_NAME + +#include "VecUtils.h" +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <map> + +using namespace llvm; + +static cl::opt<int> +SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, + cl::desc("Only vectorize trees if the gain is above this " + "number. (gain = -cost of vectorization)")); +namespace { + +/// The SLPVectorizer Pass. +struct SLPVectorizer : public FunctionPass { + typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap; + + /// Pass identification, replacement for typeid + static char ID; + + explicit SLPVectorizer() : FunctionPass(ID) { + initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); + } + + ScalarEvolution *SE; + DataLayout *DL; + TargetTransformInfo *TTI; + AliasAnalysis *AA; + LoopInfo *LI; + + virtual bool runOnFunction(Function &F) { + SE = &getAnalysis<ScalarEvolution>(); + DL = getAnalysisIfAvailable<DataLayout>(); + TTI = &getAnalysis<TargetTransformInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + LI = &getAnalysis<LoopInfo>(); + + StoreRefs.clear(); + bool Changed = false; + + // Must have DataLayout. We can't require it because some tests run w/o + // triple. + if (!DL) + return false; + + for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { + BasicBlock *BB = it; + bool BBChanged = false; + + // Use the bollom up slp vectorizer to construct chains that start with + // he store instructions. + BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); + + // Vectorize trees that end at reductions. + BBChanged |= vectorizeReductions(BB, R); + + // Vectorize trees that end at stores. + if (unsigned count = collectStores(BB, R)) { + (void)count; + DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n"); + BBChanged |= vectorizeStoreChains(R); + } + + // Try to hoist some of the scalarization code to the preheader. + if (BBChanged) hoistGatherSequence(LI, BB, R); + + Changed |= BBChanged; + } + + if (Changed) { + DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n"); + DEBUG(verifyFunction(F)); + } + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetTransformInfo>(); + AU.addRequired<LoopInfo>(); + } + +private: + + /// \brief Collect memory references and sort them according to their base + /// object. We sort the stores to their base objects to reduce the cost of the + /// quadratic search on the stores. TODO: We can further reduce this cost + /// if we flush the chain creation every time we run into a memory barrier. + unsigned collectStores(BasicBlock *BB, BoUpSLP &R); + + /// \brief Try to vectorize a chain that starts at two arithmetic instrs. + bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); + + /// \brief Try to vectorize a list of operands. + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + + /// \brief Try to vectorize a chain that may start at the operands of \V; + bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); + + /// \brief Vectorize the stores that were collected in StoreRefs. + bool vectorizeStoreChains(BoUpSLP &R); + + /// \brief Try to hoist gather sequences outside of the loop in cases where + /// all of the sources are loop invariant. + void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); + + /// \brief Scan the basic block and look for reductions that may start a + /// vectorization chain. + bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R); + +private: + StoreListMap StoreRefs; +}; + +unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { + unsigned count = 0; + StoreRefs.clear(); + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + StoreInst *SI = dyn_cast<StoreInst>(it); + if (!SI) + continue; + + // Check that the pointer points to scalars. + Type *Ty = SI->getValueOperand()->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) + return 0; + + // Find the base of the GEP. + Value *Ptr = SI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) + Ptr = GEP->getPointerOperand(); + + // Save the store locations. + StoreRefs[Ptr].push_back(SI); + count++; + } + return count; +} + +bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { + if (!A || !B) return false; + Value *VL[] = { A, B }; + return tryToVectorizeList(VL, R); +} + +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { + DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n"); + + // Check that all of the parts are scalar. + for (int i = 0, e = VL.size(); i < e; ++i) { + Type *Ty = VL[i]->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) + return 0; + } + + int Cost = R.getTreeCost(VL); + int ExtrCost = R.getScalarizationCost(VL); + DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << + " Cost of extract:" << ExtrCost << ".\n"); + if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; + DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); + R.vectorizeArith(VL); + return true; +} + +bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { + if (!V) return false; + // Try to vectorize V. + if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) + return true; + + BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); + BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); + // Try to skip B. + if (B && B->hasOneUse()) { + BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); + BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); + if (tryToVectorizePair(A, B0, R)) { + B->moveBefore(V); + return true; + } + if (tryToVectorizePair(A, B1, R)) { + B->moveBefore(V); + return true; + } + } + + // Try to skip A. + if (A && A->hasOneUse()) { + BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); + BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); + if (tryToVectorizePair(A0, B, R)) { + A->moveBefore(V); + return true; + } + if (tryToVectorizePair(A1, B, R)) { + A->moveBefore(V); + return true; + } + } + return 0; +} + +bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) { + bool Changed = false; + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + if (isa<DbgInfoIntrinsic>(it)) continue; + + // Try to vectorize reductions that use PHINodes. + if (PHINode *P = dyn_cast<PHINode>(it)) { + // Check that the PHI is a reduction PHI. + if (P->getNumIncomingValues() != 2) return Changed; + Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) : + (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : + 0)); + // Check if this is a Binary Operator. + BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); + if (!BI) + continue; + + Value *Inst = BI->getOperand(0); + if (Inst == P) Inst = BI->getOperand(1); + Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R); + continue; + } + + // Try to vectorize trees that start at compare instructions. + if (CmpInst *CI = dyn_cast<CmpInst>(it)) { + if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { + Changed |= true; + continue; + } + for (int i = 0; i < 2; ++i) + if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) + Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); + continue; + } + } + + return Changed; +} + +bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { + bool Changed = false; + // Attempt to sort and vectorize each of the store-groups. + for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end(); + it != e; ++it) { + if (it->second.size() < 2) + continue; + + DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " << + it->second.size() << ".\n"); + + Changed |= R.vectorizeStores(it->second, -SLPCostThreshold); + } + return Changed; +} + +void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, + BoUpSLP &R) { + // Check if this block is inside a loop. + Loop *L = LI->getLoopFor(BB); + if (!L) + return; + + // Check if it has a preheader. + BasicBlock *PreHeader = L->getLoopPreheader(); + if (!PreHeader) + return; + + // Mark the insertion point for the block. + Instruction *Location = PreHeader->getTerminator(); + + BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); + for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); + it != e; ++it) { + InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); + + // The InsertElement sequence can be simplified into a constant. + if (!Insert) + continue; + + // If the vector or the element that we insert into it are + // instructions that are defined in this basic block then we can't + // hoist this instruction. + Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); + Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); + if (CurrVec && L->contains(CurrVec)) continue; + if (NewElem && L->contains(NewElem)) continue; + + // We can hoist this instruction. Move it to the pre-header. + Insert->moveBefore(Location); + } +} + +} // end anonymous namespace + +char SLPVectorizer::ID = 0; +static const char lv_name[] = "SLP Vectorizer"; +INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) + +namespace llvm { + Pass *createSLPVectorizerPass() { + return new SLPVectorizer(); + } +} + diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp new file mode 100644 index 0000000000..9b9436683b --- /dev/null +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -0,0 +1,730 @@ +//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "SLP" + +#include "VecUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <map> + +using namespace llvm; + +static const unsigned MinVecRegSize = 128; + +static const unsigned RecursionMaxDepth = 6; + +namespace llvm { + +BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : + BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { + numberInstructions(); +} + +void BoUpSLP::numberInstructions() { + int Loc = 0; + InstrIdx.clear(); + InstrVec.clear(); + // Number the instructions in the block. + for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { + InstrIdx[it] = Loc++; + InstrVec.push_back(it); + assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); + } +} + +Value *BoUpSLP::getPointerOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); + return 0; +} + +unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { + if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); + if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); + return -1; +} + +bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + // Check that the address spaces match and that the pointers are valid. + if (!PtrA || !PtrB || (ASA != ASB)) return false; + + // Check that A and B are of the same type. + if (PtrA->getType() != PtrB->getType()) return false; + + // Calculate the distance. + const SCEV *PtrSCEVA = SE->getSCEV(PtrA); + const SCEV *PtrSCEVB = SE->getSCEV(PtrB); + const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); + const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); + + // Non constant distance. + if (!ConstOffSCEV) return false; + + int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); + // The Instructions are connsecutive if the size of the first load/store is + // the same as the offset. + int64_t Sz = DL->getTypeStoreSize(Ty); + return ((-Offset) == Sz); +} + +bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) { + Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); + unsigned Sz = DL->getTypeSizeInBits(StoreTy); + unsigned VF = MinVecRegSize / Sz; + + if (!isPowerOf2_32(Sz) || VF < 2) return false; + + bool Changed = false; + // Look for profitable vectorizable trees at all offsets, starting at zero. + for (unsigned i = 0, e = Chain.size(); i < e; ++i) { + if (i + VF > e) return Changed; + DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); + ArrayRef<Value *> Operands = Chain.slice(i, VF); + + int Cost = getTreeCost(Operands); + DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); + if (Cost < CostThreshold) { + DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + vectorizeTree(Operands, VF); + i += VF - 1; + Changed = true; + } + } + + return Changed; +} + +bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { + ValueSet Heads, Tails; + SmallDenseMap<Value*, Value*> ConsecutiveChain; + + // We may run into multiple chains that merge into a single chain. We mark the + // stores that we vectorized so that we don't visit the same store twice. + ValueSet VectorizedStores; + bool Changed = false; + + // Do a quadratic search on all of the given stores and find + // all of the pairs of loads that follow each other. + for (unsigned i = 0, e = Stores.size(); i < e; ++i) + for (unsigned j = 0; j < e; ++j) { + if (i == j) continue; + if (isConsecutiveAccess(Stores[i], Stores[j])) { + Tails.insert(Stores[j]); + Heads.insert(Stores[i]); + ConsecutiveChain[Stores[i]] = Stores[j]; + } + } + + // For stores that start but don't end a link in the chain: + for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { + if (Tails.count(*it)) continue; + + // We found a store instr that starts a chain. Now follow the chain and try + // to vectorize it. + ValueList Operands; + Value *I = *it; + // Collect the chain into a list. + while (Tails.count(I) || Heads.count(I)) { + if (VectorizedStores.count(I)) break; + Operands.push_back(I); + // Move to the next value in the chain. + I = ConsecutiveChain[I]; + } + + bool Vectorized = vectorizeStoreChain(Operands, costThreshold); + + // Mark the vectorized stores so that we don't vectorize them again. + if (Vectorized) + VectorizedStores.insert(Operands.begin(), Operands.end()); + Changed |= Vectorized; + } + + return Changed; +} + +int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) { + // Find the type of the operands in VL. + Type *ScalarTy = VL[0]->getType(); + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + // Find the cost of inserting/extracting values from the vector. + return getScalarizationCost(VecTy); +} + +int BoUpSLP::getScalarizationCost(Type *Ty) { + int Cost = 0; + for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) + Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + return Cost; +} + +AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); + return AliasAnalysis::Location(); +} + +Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { + assert(Src->getParent() == Dst->getParent() && "Not the same BB"); + BasicBlock::iterator I = Src, E = Dst; + /// Scan all of the instruction from SRC to DST and check if + /// the source may alias. + for (++I; I != E; ++I) { + // Ignore store instructions that are marked as 'ignore'. + if (MemBarrierIgnoreList.count(I)) continue; + if (Src->mayWriteToMemory()) /* Write */ { + if (!I->mayReadOrWriteMemory()) continue; + } else /* Read */ { + if (!I->mayWriteToMemory()) continue; + } + AliasAnalysis::Location A = getLocation(&*I); + AliasAnalysis::Location B = getLocation(Src); + + if (!A.Ptr || !B.Ptr || AA->alias(A, B)) + return I; + } + return 0; +} + +void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) { + Value *Vec = vectorizeTree(Operands, Operands.size()); + BasicBlock::iterator Loc = cast<Instruction>(Vec); + IRBuilder<> Builder(++Loc); + // After vectorizing the operands we need to generate extractelement + // instructions and replace all of the uses of the scalar values with + // the values that we extracted from the vectorized tree. + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); + Operands[i]->replaceAllUsesWith(S); + } +} + +int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { + // Get rid of the list of stores that were removed, and from the + // lists of instructions with multiple users. + MemBarrierIgnoreList.clear(); + LaneMap.clear(); + MultiUserVals.clear(); + MustScalarize.clear(); + + // Scan the tree and find which value is used by which lane, and which values + // must be scalarized. + getTreeUses_rec(VL, 0); + + // Check that instructions with multiple users can be vectorized. Mark unsafe + // instructions. + for (ValueSet::iterator it = MultiUserVals.begin(), + e = MultiUserVals.end(); it != e; ++it) { + // Check that all of the users of this instr are within the tree + // and that they are all from the same lane. + int Lane = -1; + for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); + I != E; ++I) { + if (LaneMap.find(*I) == LaneMap.end()) { + MustScalarize.insert(*it); + DEBUG(dbgs()<<"SLP: Adding " << **it << + " to MustScalarize because of an out of tree usage.\n"); + break; + } + if (Lane == -1) Lane = LaneMap[*I]; + if (Lane != LaneMap[*I]) { + MustScalarize.insert(*it); + DEBUG(dbgs()<<"Adding " << **it << + " to MustScalarize because multiple lane use it: " + << Lane << " and " << LaneMap[*I] << ".\n"); + break; + } + } + } + + // Now calculate the cost of vectorizing the tree. + return getTreeCost_rec(VL, 0); +} + +void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { + if (Depth == RecursionMaxDepth) return; + + // Don't handle vectors. + if (VL[0]->getType()->isVectorTy()) return; + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + if (SI->getValueOperand()->getType()->isVectorTy()) return; + + // Check if all of the operands are constants. + bool AllConst = true; + bool AllSameScalar = true; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If one of the instructions is out of this BB, we need to scalarize all. + if (I && I->getParent() != BB) return; + } + + // If all of the operands are identical or constant we have a simple solution. + if (AllConst || AllSameScalar) return; + + // Scalarize unknown structures. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (!VL0) return; + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return; + } + + // Mark instructions with multiple users. + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // Remember to check if all of the users of this instr are vectorized + // within our tree. + if (I && I->getNumUses() > 1) MultiUserVals.insert(I); + } + + for (int i = 0, e = VL.size(); i < e; ++i) { + // Check that the instruction is only used within + // one lane. + if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; + // Make this instruction as 'seen' and remember the lane. + LaneMap[VL[i]] = i; + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + getTreeUses_rec(Operands, Depth+1); + } + return; + } + case Instruction::Store: { + ValueList Operands; + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + getTreeUses_rec(Operands, Depth+1); + return; + } + default: + return; + } +} + +int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { + Type *ScalarTy = VL[0]->getType(); + + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + + /// Don't mess with vectors. + if (ScalarTy->isVectorTy()) return max_cost; + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + + if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); + + // Check if all of the operands are constants. + bool AllConst = true; + bool AllSameScalar = true; + bool MustScalarizeFlag = false; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + // Must have a single use. + Instruction *I = dyn_cast<Instruction>(VL[i]); + MustScalarizeFlag |= MustScalarize.count(VL[i]); + // This instruction is outside the basic block. + if (I && I->getParent() != BB) + return getScalarizationCost(VecTy); + } + + // Is this a simple vector constant. + if (AllConst) return 0; + + // If all of the operands are identical we can broadcast them. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (AllSameScalar) { + // If we are in a loop, and this is not an instruction (e.g. constant or + // argument) or the instruction is defined outside the loop then assume + // that the cost is zero. + if (L && (!VL0 || !L->contains(VL0))) + return 0; + + // We need to broadcast the scalar. + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); + } + + // If this is not a constant, or a scalar from outside the loop then we + // need to scalarize it. + if (MustScalarizeFlag) + return getScalarizationCost(VecTy); + + if (!VL0) return getScalarizationCost(VecTy); + assert(VL0->getParent() == BB && "Wrong BB"); + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); + } + + // Check if it is safe to sink the loads or the stores. + if (Opcode == Instruction::Load || Opcode == Instruction::Store) { + int MaxIdx = InstrIdx[VL0]; + for (unsigned i = 1, e = VL.size(); i < e; ++i ) + MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); + + Instruction *Last = InstrVec[MaxIdx]; + for (unsigned i = 0, e = VL.size(); i < e; ++i ) { + if (VL[i] == Last) continue; + Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); + if (Barrier) { + DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << + *Last << "\n because of " << *Barrier << "\n"); + return max_cost; + } + } + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + int Cost = 0; + ValueList Operands; + Type *SrcTy = VL0->getOperand(0)->getType(); + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) { + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + // Check that the casted type is the same for all users. + if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy) + return getScalarizationCost(VecTy); + } + + Cost += getTreeCost_rec(Operands, Depth+1); + if (Cost >= max_cost) return max_cost; + + // Calculate the cost of this instruction. + int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), + VL0->getType(), SrcTy); + + VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); + int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); + Cost += (VecCost - ScalarCost); + return Cost; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + int Cost = 0; + // Calculate the cost of all of the operands. + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + Cost += getTreeCost_rec(Operands, Depth+1); + if (Cost >= max_cost) return max_cost; + } + + // Calculate the cost of this instruction. + int ScalarCost = VecTy->getNumElements() * + TTI->getArithmeticInstrCost(Opcode, ScalarTy); + + int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); + Cost += (VecCost - ScalarCost); + return Cost; + } + case Instruction::Load: { + // If we are scalarize the loads, add the cost of forming the vector. + for (unsigned i = 0, e = VL.size()-1; i < e; ++i) + if (!isConsecutiveAccess(VL[i], VL[i+1])) + return getScalarizationCost(VecTy); + + // Cost of wide load - cost of scalar loads. + int ScalarLdCost = VecTy->getNumElements() * + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); + int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); + return VecLdCost - ScalarLdCost; + } + case Instruction::Store: { + // We know that we can merge the stores. Calculate the cost. + int ScalarStCost = VecTy->getNumElements() * + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); + int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); + int StoreCost = VecStCost - ScalarStCost; + + ValueList Operands; + for (unsigned j = 0; j < VL.size(); ++j) { + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + MemBarrierIgnoreList.insert(VL[j]); + } + + int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); + return TotalCost; + } + default: + // Unable to vectorize unknown instructions. + return getScalarizationCost(VecTy); + } +} + +Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) { + int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; + for (unsigned i = 0; i < VF; ++i ) + MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); + return InstrVec[MaxIdx + 1]; +} + +Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { + IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); + Value *Vec = UndefValue::get(Ty); + for (unsigned i=0; i < Ty->getNumElements(); ++i) { + // Generate the 'InsertElement' instruction. + Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); + // Remember that this instruction is used as part of a 'gather' sequence. + // The caller of the bottom-up slp vectorizer can try to hoist the sequence + // if the users are outside of the basic block. + GatherInstructions.push_back(Vec); + } + + return Vec; +} + +Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { + Value *V = vectorizeTree_rec(VL, VF); + // We moved some instructions around. We have to number them again + // before we can do any analysis. + numberInstructions(); + MustScalarize.clear(); + return V; +} + +Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { + Type *ScalarTy = VL[0]->getType(); + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + VectorType *VecTy = VectorType::get(ScalarTy, VF); + + // Check if all of the operands are constants or identical. + bool AllConst = true; + bool AllSameScalar = true; + for (unsigned i = 0, e = VF; i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + // The instruction must be in the same BB, and it must be vectorizable. + Instruction *I = dyn_cast<Instruction>(VL[i]); + if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) + return Scalarize(VL, VecTy); + } + + // Check that this is a simple vector constant. + if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); + + // Scalarize unknown structures. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (!VL0) return Scalarize(VL, VecTy); + + if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VF; i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + ValueList INVL; + for (int i = 0; i < VF; ++i) + INVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + Value *InVec = vectorizeTree_rec(INVL, VF); + IRBuilder<> Builder(GetLastInstr(VL, VF)); + CastInst *CI = dyn_cast<CastInst>(VL0); + Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); + VectorizedValues[VL0] = V; + return V; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + ValueList LHSVL, RHSVL; + for (int i = 0; i < VF; ++i) { + RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); + } + + Value *RHS = vectorizeTree_rec(RHSVL, VF); + Value *LHS = vectorizeTree_rec(LHSVL, VF); + IRBuilder<> Builder(GetLastInstr(VL, VF)); + BinaryOperator *BinOp = cast<BinaryOperator>(VL0); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); + VectorizedValues[VL0] = V; + return V; + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(VL0); + unsigned Alignment = LI->getAlignment(); + + // Check if all of the loads are consecutive. + for (unsigned i = 1, e = VF; i < e; ++i) + if (!isConsecutiveAccess(VL[i-1], VL[i])) + return Scalarize(VL, VecTy); + + IRBuilder<> Builder(GetLastInstr(VL, VF)); + Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), + VecTy->getPointerTo()); + LI = Builder.CreateLoad(VecPtr); + LI->setAlignment(Alignment); + VectorizedValues[VL0] = LI; + return LI; + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(VL0); + unsigned Alignment = SI->getAlignment(); + + ValueList ValueOp; + for (int i = 0; i < VF; ++i) + ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); + + Value *VecValue = vectorizeTree_rec(ValueOp, VF); + + IRBuilder<> Builder(GetLastInstr(VL, VF)); + Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), + VecTy->getPointerTo()); + Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); + + for (int i = 0; i < VF; ++i) + cast<Instruction>(VL[i])->eraseFromParent(); + return 0; + } + default: + Value *S = Scalarize(VL, VecTy); + VectorizedValues[VL0] = S; + return S; + } +} + +} // end of namespace diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h new file mode 100644 index 0000000000..5456c6c779 --- /dev/null +++ b/lib/Transforms/Vectorize/VecUtils.h @@ -0,0 +1,164 @@ +//===- VecUtils.h - Vectorization Utilities -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of classes and functions manipulate vectors and chains of +// vectors. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H +#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include <vector> + +namespace llvm { + +class BasicBlock; class Instruction; class Type; +class VectorType; class StoreInst; class Value; +class ScalarEvolution; class DataLayout; +class TargetTransformInfo; class AliasAnalysis; +class Loop; + +/// Bottom Up SLP vectorization utility class. +struct BoUpSLP { + typedef SmallVector<Value*, 8> ValueList; + typedef SmallPtrSet<Value*, 16> ValueSet; + typedef SmallVector<StoreInst*, 8> StoreList; + static const int max_cost = 1<<20; + + // \brief C'tor. + BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl, + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp); + + /// \brief Take the pointer operand from the Load/Store instruction. + /// \returns NULL if this is not a valid Load/Store instruction. + static Value *getPointerOperand(Value *I); + + /// \brief Take the address space operand from the Load/Store instruction. + /// \returns -1 if this is not a valid Load/Store instruction. + static unsigned getAddressSpaceOperand(Value *I); + + /// \returns true if the memory operations A and B are consecutive. + bool isConsecutiveAccess(Value *A, Value *B); + + /// \brief Vectorize the tree that starts with the elements in \p VL. + /// \returns the vectorized value. + Value *vectorizeTree(ArrayRef<Value *> VL, int VF); + + /// \returns the vectorization cost of the subtree that starts at \p VL. + /// A negative number means that this is profitable. + int getTreeCost(ArrayRef<Value *> VL); + + /// \returns the scalarization cost for this list of values. Assuming that + /// this subtree gets vectorized, we may need to extract the values from the + /// roots. This method calculates the cost of extracting the values. + int getScalarizationCost(ArrayRef<Value *> VL); + + /// \brief Attempts to order and vectorize a sequence of stores. This + /// function does a quadratic scan of the given stores. + /// \returns true if the basic block was modified. + bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold); + + /// \brief Vectorize a group of scalars into a vector tree. + void vectorizeArith(ArrayRef<Value *> Operands); + + /// \returns the list of new instructions that were added in order to collect + /// scalars into vectors. This list can be used to further optimize the gather + /// sequences. + ValueList &getGatherSeqInstructions() {return GatherInstructions; } + +private: + /// \brief This method contains the recursive part of getTreeCost. + int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth); + + /// \brief This recursive method looks for vectorization hazards such as + /// values that are used by multiple users and checks that values are used + /// by only one vector lane. It updates the variables LaneMap, MultiUserVals. + void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth); + + /// \brief This method contains the recursive part of vectorizeTree. + Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF); + + /// \brief Number all of the instructions in the block. + void numberInstructions(); + + /// \brief Vectorize a sorted sequence of stores. + bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold); + + /// \returns the scalarization cost for this type. Scalarization in this + /// context means the creation of vectors from a group of scalars. + int getScalarizationCost(Type *Ty); + + /// \returns the AA location that is being access by the instruction. + AliasAnalysis::Location getLocation(Instruction *I); + + /// \brief Checks if it is possible to sink an instruction from + /// \p Src to \p Dst. + /// \returns the pointer to the barrier instruction if we can't sink. + Value *isUnsafeToSink(Instruction *Src, Instruction *Dst); + + /// \returns the instruction that appears last in the BB from \p VL. + /// Only consider the first \p VF elements. + Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF); + + /// \returns a vector from a collection of scalars in \p VL. + Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty); + +private: + /// Maps instructions to numbers and back. + SmallDenseMap<Value*, int> InstrIdx; + /// Maps integers to Instructions. + std::vector<Instruction*> InstrVec; + + // -- containers that are used during getTreeCost -- // + + /// Contains values that must be scalarized because they are used + /// by multiple lanes, or by users outside the tree. + /// NOTICE: The vectorization methods also use this set. + ValueSet MustScalarize; + + /// Contains a list of values that are used outside the current tree. This + /// set must be reset between runs. + ValueSet MultiUserVals; + /// Maps values in the tree to the vector lanes that uses them. This map must + /// be reset between runs of getCost. + std::map<Value*, int> LaneMap; + /// A list of instructions to ignore while sinking + /// memory instructions. This map must be reset between runs of getCost. + SmallPtrSet<Value *, 8> MemBarrierIgnoreList; + + // -- Containers that are used during vectorizeTree -- // + + /// Maps between the first scalar to the vector. This map must be reset + ///between runs. + DenseMap<Value*, Value*> VectorizedValues; + + // -- Containers that are used after vectorization by the caller -- // + + /// A list of instructions that are used when gathering scalars into vectors. + /// In many cases these instructions can be hoisted outside of the BB. + /// Iterating over this list is faster than calling LICM. + ValueList GatherInstructions; + + // Analysis and block reference. + BasicBlock *BB; + ScalarEvolution *SE; + DataLayout *DL; + TargetTransformInfo *TTI; + AliasAnalysis *AA; + Loop *L; +}; + +} // end of namespace + +#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 19eefd2f87..a927fe1451 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -1,4 +1,4 @@ - //===-- Vectorize.cpp -----------------------------------------------------===// +//===-- Vectorize.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,7 @@ using namespace llvm; void llvm::initializeVectorization(PassRegistry &Registry) { initializeBBVectorizePass(Registry); initializeLoopVectorizePass(Registry); + initializeSLPVectorizerPass(Registry); } void LLVMInitializeVectorization(LLVMPassRegistryRef R) { @@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopVectorizePass()); } + +void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSLPVectorizerPass()); +} |