diff options
Diffstat (limited to 'lib/Transforms')
25 files changed, 2544 insertions, 1323 deletions
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a75212a386..bc5109b4d4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1,4 +1,4 @@ -//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ // to the function does not create any copies of the pointer value that // outlive the call. This more or less means that the pointer is only // dereferenced, and not returned from the function or stored in a global. +// Finally, well-known library call declarations are marked with all +// attributes that are consistent with the function's standard definition. // This pass is implemented as a bottom-up traversal of the call-graph. // //===----------------------------------------------------------------------===// @@ -32,12 +34,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { @@ -62,14 +66,63 @@ namespace { // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const CallGraphSCC &SCC); + // Utility methods used by inferPrototypeAttributes to add attributes + // and maintain annotation statistics. + + void setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + } + } + + void setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + } + } + + void setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + } + } + + void setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + } + } + + void setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + } + } + + // inferPrototypeAttributes - Analyze the name and prototype of the + // given function and set any applicable attributes. Returns true + // if any attributes were set and false otherwise. + bool inferPrototypeAttributes(Function &F); + + // annotateLibraryCalls - Adds attributes to well-known standard library + // call declarations. + bool annotateLibraryCalls(const CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); CallGraphSCCPass::getAnalysisUsage(AU); } private: AliasAnalysis *AA; + TargetLibraryInfo *TLI; }; } @@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } +/// inferPrototypeAttributes - Analyze the name and prototype of the +/// given function and set any applicable attributes. Returns true +/// if any attributes were set and false otherwise. +bool FunctionAttrs::inferPrototypeAttributes(Function &F) { + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) + return false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strcmp: + case LibFunc::strspn: + case LibFunc::strncmp: + case LibFunc::strcspn: + case LibFunc::strcoll: + case LibFunc::strcasecmp: + case LibFunc::strncasecmp: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::scanf: + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat: + case LibFunc::sscanf: + case LibFunc::sprintf: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + break; + case LibFunc::system: + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotAlias(F, 0); + break; + case LibFunc::mkdir: + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::read: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::rmdir: + case LibFunc::rewind: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::rename: + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::chmod: + case LibFunc::chown: + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + break; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + break; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + case LibFunc::fread: + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + case LibFunc::fputs: + case LibFunc::fscanf: + case LibFunc::fprintf: + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::gets: + case LibFunc::getchar: + setDoesNotThrow(F); + break; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::uname: + case LibFunc::unlink: + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::pread: + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + break; + case LibFunc::putchar: + setDoesNotThrow(F); + break; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vsscanf: + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + break; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + break; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + default: + // Didn't mark any attributes. + return false; + } + + return true; +} + +/// annotateLibraryCalls - Adds attributes to well-known standard library +/// call declarations. +bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { + bool MadeChange = false; + + // Check each function in turn annotating well-known library function + // declarations with attributes. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F != 0 && F->isDeclaration()) + MadeChange |= inferPrototypeAttributes(*F); + } + + return MadeChange; +} + bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { AA = &getAnalysis<AliasAnalysis>(); + TLI = &getAnalysis<TargetLibraryInfo>(); - bool Changed = AddReadAttrs(SCC); + bool Changed = annotateLibraryCalls(SCC); + Changed |= AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); return Changed; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..7595da08d3 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -44,7 +44,7 @@ namespace { } void set(const APFloat& C); - + void negate(); bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } @@ -79,6 +79,14 @@ namespace { bool isInt() const { return !IsFp; } + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; @@ -150,7 +158,9 @@ namespace { typedef SmallVector<const FAddend*, 4> AddendVect; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); @@ -159,6 +169,7 @@ namespace { Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); @@ -203,7 +214,31 @@ void FAddendCoef::set(const APFloat& C) { IsFp = BufHasFpVal = true; } -void FAddendCoef::operator=(const FAddendCoef& That) { +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; +} + +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { if (That.isInt()) set(That.IntVal); else @@ -222,13 +257,13 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); return; } APFloat &T = getFpVal(); - T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } void FAddendCoef::operator-=(const FAddendCoef &That) { @@ -243,13 +278,13 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); return; } APFloat &T = getFpVal(); - T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); } void FAddendCoef::operator*=(const FAddendCoef &That) { @@ -272,11 +307,12 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) - set(APFloat(Semantic, IntVal)); + convertToFpType(Semantic); APFloat &F0 = getFpVal(); if (That.isInt()) - F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); else F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); @@ -388,6 +424,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -471,7 +579,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -627,7 +736,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +749,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index d162223a6f..2ee1278d23 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1610,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1631,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } @@ -1653,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bad46b4dab..a96e754f3d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, } } +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { @@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (ArrayElementCount <= 32 || - (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { - Type *Ty; - if (ArrayElementCount <= 32) + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } } return 0; @@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; } + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + break; + } + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; @@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); + // If the shift is NSW and we compare to 0, then it is just shifting out + // sign bits, no need for an AND either. + if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); @@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } + // If this is a signed comparison to 0 and the shift is sign preserving, + // use the shift LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && @@ -1333,13 +1399,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, @@ -1531,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(pred, X, NegX); } } + break; + case Instruction::Mul: + if (RHSV == 0 && BO->hasNoSignedWrap()) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // The trivial case (mul X, 0) is handled by InstSimplify + // General case : (mul X, C) != 0 iff X != 0 + // (mul X, C) == 0 iff X == 0 + if (!BOC->isZero()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Constant::getNullValue(RHS->getType())); + } + } + break; default: break; } } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index a262d711d3..121aa1f8d7 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // If this is a non-volatile load or a cast from the same type, // merge. if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + Type *FIOpndTy = FI->getOperand(0)->getType(); + if (TI->getOperand(0)->getType() != FIOpndTy) return 0; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); - if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != - FI->getOperand(0)->getType()->getVectorNumElements()) + if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || + CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) return 0; } else { return 0; // unknown unary op. diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475b1d..623c470506 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v3"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass { Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F); - void createInitializerPoisonCalls(Module &M, - Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); void emitShadowMapping(Module &M, IRBuilder<> &IRB) const; virtual bool doInitialization(Module &M); @@ -315,7 +313,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass { void initializeCallbacks(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); - void createInitializerPoisonCalls(Module &M, Value *FirstAddr, - Value *LastAddr); + void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t RedzoneSize() const { return RedzoneSizeForScale(Mapping.Scale); } @@ -531,9 +528,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -750,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, } void AddressSanitizerModule::createInitializerPoisonCalls( - Module &M, Value *FirstAddr, Value *LastAddr) { + Module &M, GlobalValue *ModuleName) { // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); // If that function is not present, this TU contains no globals, or they have @@ -762,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls( IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. - IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); + IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); @@ -836,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); @@ -885,11 +886,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -897,9 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - // The addresses of the first and last dynamically initialized globals in - // this TU. Used in initialization order checking. - Value *FirstDynamic = 0, *LastDynamic = 0; + bool HasDynamicallyInitializedGlobals = false; + + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + // We shouldn't merge same module names, as this string serves as unique + // module ID in runtime. + ModuleName->setUnnamedAddr(false); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; @@ -930,11 +936,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,15 +960,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); // Populate the first and last globals declared in this TU. - if (CheckInitOrder && GlobalHasDynamicInitializer) { - LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); - if (FirstDynamic == 0) - FirstDynamic = LastDynamic; - } + if (CheckInitOrder && GlobalHasDynamicInitializer) + HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } @@ -977,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (CheckInitOrder && FirstDynamic && LastDynamic) - createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + if (CheckInitOrder && HasDynamicallyInitializedGlobals) + createInitializerPoisonCalls(M, ModuleName); IRB.CreateCall2(AsanRegisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); @@ -1095,6 +1095,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); @@ -1312,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() { ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); } - // This string will be parsed by the run-time (DescribeStackAddress). + // This string will be parsed by the run-time (DescribeAddressIfStack). SmallString<2048> StackDescriptionStorage; raw_svector_ostream StackDescription(StackDescriptionStorage); - StackDescription << F.getName() << " " << AllocaVec.size() << " "; + StackDescription << AllocaVec.size() << " "; // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; @@ -1348,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() { } assert(Pos == LocalStackSize); - // Write the Magic value and the frame description constant to the redzone. + // The left-most redzone has enough space for at least 4 pointers. + // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), BasePlus0); - Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, - ASan.LongSize/8)); - BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), + IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), StackDescription.str()); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, + 2 * ASan.LongSize/8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); // Poison the stack redzones at the entry. Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a79873cbf6..2edd151869 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,8 +29,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/PathV2.h" #include "llvm/Support/raw_ostream.h" @@ -39,35 +41,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), - UseExtraChecksum(false), NoRedZone(false), - NoFunctionNamesInData(false) { - memcpy(Version, DefaultGCovVersion, 4); + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, const char (&Version)[4], - bool UseExtraChecksum, bool NoRedZone, - bool NoFunctionNamesInData) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - UseExtraChecksum(UseExtraChecksum), NoRedZone(NoRedZone), - NoFunctionNamesInData(NoFunctionNamesInData) { - memcpy(this->Version, Version, 4); - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -78,6 +102,8 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteWriteoutFunctionListFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -88,23 +114,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - char Version[4]; - bool UseExtraChecksum; - bool NoRedZone; - bool NoFunctionNamesInData; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -115,13 +140,14 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - const char (&Version)[4], - bool UseExtraChecksum, - bool NoRedZone, - bool NoFunctionNamesInData) { - return new GCOVProfiler(EmitNotes, EmitData, Version, UseExtraChecksum, - NoRedZone, NoFunctionNamesInData); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); +} + +static std::string getFunctionName(DISubprogram SP) { + if (!SP.getLinkageName().empty()) + return SP.getLinkageName(); + return SP.getName(); } namespace { @@ -260,7 +286,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseExtraChecksum) { + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -272,16 +298,16 @@ namespace { ReturnBlock = new GCOVBlock(i++, os); writeBytes(FunctionTag, 4); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum - writeGCOVString(SP.getName()); + writeGCOVString(getFunctionName(SP)); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); } @@ -356,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { SmallString<128> Filename = CU.getFilename(); sys::path::replace_extension(Filename, NewStem); - return sys::path::filename(Filename.str()); + StringRef FName = sys::path::filename(Filename); + SmallString<128> CurPath; + if (sys::fs::current_path(CurPath)) return FName; + sys::path::append(CurPath, FName.str()); + return CurPath.str(); } bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -382,7 +412,7 @@ void GCOVProfiler::emitGCNO() { raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); out.write("oncg", 4); - out.write(Version, 4); + out.write(ReversedVersion, 4); out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); @@ -392,7 +422,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, i, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -522,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Type *Params[] = { + PointerType::get(FTy, 0), + PointerType::get(FTy, 0) + }; + FTy = FunctionType::get(Builder.getVoidTy(), Params, false); + + // Inialize the environment and register the local writeout and flush + // functions. + Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); + Builder.CreateCall2(GCOVInit, WriteoutF, FlushF); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -619,6 +679,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() { return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy); +} + +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -637,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -646,7 +716,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -664,15 +734,15 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(CU, "gcda"); Builder.CreateCall2(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda), - Builder.CreateGlobalStringPtr(Version)); + Builder.CreateGlobalStringPtr(ReversedVersion)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { DISubprogram SP(CountersBySP[j].second); - Builder.CreateCall3(EmitFunction, - Builder.getInt32(j), - NoFunctionNamesInData ? - Constant::getNullValue(Builder.getInt8PtrTy()) : - Builder.CreateGlobalStringPtr(SP.getName()), - Builder.getInt8(UseExtraChecksum)); + Builder.CreateCall3( + EmitFunction, Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(getFunctionName(SP)) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = @@ -684,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout( Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Builder.getInt32Ty(), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -715,7 +765,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); // Create basic blocks for function. @@ -760,18 +810,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -796,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index fce6513a97..4e75904ded 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given patter"), cl::Hidden, cl::init(0xff)); +static cl::opt<bool> ClPoisonUndef("msan-poison-undef", + cl::desc("poison undef temps"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), @@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Clean shadow (all zeroes) means all bits of the value are defined /// (initialized). - Value *getCleanShadow(Value *V) { + Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) return 0; @@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return ConstantStruct::get(ST, Vals); } + /// \brief Create a dirty shadow for a given value. + Constant *getPoisonedShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return getPoisonedShadow(ShadowTy); + } + /// \brief Create a clean (zero) origin. Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); @@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return Shadow; } if (UndefValue *U = dyn_cast<UndefValue>(V)) { - Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V); DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); (void)U; return AllOnes; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index f93c5ab4c8..299060a42f 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -56,6 +57,9 @@ static cl::opt<bool> ClInstrumentFuncEntryExit( static cl::opt<bool> ClInstrumentAtomics( "tsan-instrument-atomics", cl::init(true), cl::desc("Instrument atomics"), cl::Hidden); +static cl::opt<bool> ClInstrumentMemIntrinsics( + "tsan-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); +STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads"); STATISTIC(NumOmittedReadsFromConstantGlobals, "Number of reads from constant globals"); STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); @@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass { void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I); bool instrumentAtomic(Instruction *I); + bool instrumentMemIntrinsic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, SmallVectorImpl<Instruction*> &All); bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr); DataLayout *TD; + Type *IntptrTy; SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; IntegerType *OrdTy; @@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanAtomicThreadFence; Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; + Function *TsanVptrLoad; + Function *MemmoveFn, *MemcpyFn, *MemsetFn; }; } // namespace @@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); + + MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL)); + MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL)); + MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy, NULL)); } bool ThreadSanitizer::doInitialization(Module &M) { @@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); + IntptrTy = IRB.getIntPtrTy(TD); Value *TsanInit = M.getOrInsertFunction("__tsan_init", IRB.getVoidTy(), NULL); appendToGlobalCtors(M, cast<Function>(TsanInit), 0); @@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; SmallVector<Instruction*, 8> AtomicAccesses; + SmallVector<Instruction*, 8> MemIntrinCalls; bool Res = false; bool HasCalls = false; @@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { else if (isa<ReturnInst>(BI)) RetVec.push_back(BI); else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { + if (isa<MemIntrinsic>(BI)) + MemIntrinCalls.push_back(BI); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } @@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Res |= instrumentAtomic(AtomicAccesses[i]); } + if (ClInstrumentMemIntrinsics) + for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) { + Res |= instrumentMemIntrinsic(MemIntrinCalls[i]); + } + // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); @@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { NumInstrumentedVtableWrites++; return true; } + if (!IsWrite && isVtableAccess(I)) { + IRB.CreateCall(TsanVptrLoad, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + NumInstrumentedVtableReads++; + return true; + } Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); if (IsWrite) NumInstrumentedWrites++; @@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { return IRB->getInt32(v); } +// If a memset intrinsic gets inlined by the code gen, we will miss races on it. +// So, we either need to ensure the intrinsic is not inlined, or instrument it. +// We do not instrument memset/memmove/memcpy intrinsics (too complicated), +// instead we simply replace them with regular function calls, which are then +// intercepted by the run-time. +// Since tsan is running after everyone else, the calls should not be +// replaced back with intrinsics. If that becomes wrong at some point, +// we will need to call e.g. __tsan_memset to avoid the intrinsics. +bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { + IRBuilder<> IRB(I); + if (MemSetInst *M = dyn_cast<MemSetInst>(I)) { + IRB.CreateCall3(MemsetFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) { + IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } + return false; +} + // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x // standards. For background see C++11 standard. A slightly older, publically // available draft of the standard (not entirely up-to-date, but close enough diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 5aada9c373..8f917aeb37 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, switch (Class) { case IC_Autorelease: case IC_AutoreleaseRV: + case IC_IntrinsicUser: case IC_User: // These operations never directly modify a reference count. return false; diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index e062b66555..39670f339e 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_copyWeak") || M.getNamedValue("objc_retainedObject") || M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); + M.getNamedValue("objc_unretainedPointer") || + M.getNamedValue("clang.arc.use"); } /// \enum InstructionClass @@ -89,6 +90,7 @@ enum InstructionClass { IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) IC_StoreStrong, ///< objc_storeStrong (derived) + IC_IntrinsicUser, ///< clang.arc.use IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -97,6 +99,13 @@ enum InstructionClass { raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class); +/// \brief Test if the given class is a kind of user. +inline static bool IsUser(InstructionClass Class) { + return Class == IC_User || + Class == IC_CallOrUser || + Class == IC_IntrinsicUser; +} + /// \brief Test if the given class is objc_retain or equivalent. static inline bool IsRetain(InstructionClass Class) { return Class == IC_Retain || @@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) { /// \brief Test if the given class represents instructions which return their /// argument verbatim. static inline bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. return Class == IC_Retain || Class == IC_RetainRV || Class == IC_Autorelease || Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || Class == IC_NoopCast; } @@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); } -static inline bool isNullOrUndef(const Value *V) { +static inline bool IsNullOrUndef(const Value *V) { return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); } -static inline bool isNoopInstruction(const Instruction *I) { +static inline bool IsNoopInstruction(const Instruction *I) { return isa<BitCastInst>(I) || (isa<GetElementPtrInst>(I) && cast<GetElementPtrInst>(I)->hasAllZeroIndices()); diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 1c13d1cbea..b96c64fe81 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -410,7 +410,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { break; } --BBI; - } while (isNoopInstruction(BBI)); + } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " @@ -429,7 +429,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; @@ -453,6 +453,10 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; + case IC_IntrinsicUser: + // Remove calls to @clang.arc.use(...). + Inst->eraseFromParent(); + continue; default: continue; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 9c14949877..924fb0a9da 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -211,6 +212,9 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { // These special functions make copies of their pointer arguments. return true; } + case IC_IntrinsicUser: + // Use by the use intrinsic is not an escape. + continue; case IC_User: case IC_None: // Use by an instruction which copies the value is an escape if the @@ -387,10 +391,6 @@ namespace { /// KnownSafe is true when either of these conditions is satisfied. bool KnownSafe; - /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain - /// calls). - bool IsRetainBlock; - /// True of the objc_release calls are all marked with the "tail" keyword. bool IsTailCallRelease; @@ -407,9 +407,7 @@ namespace { SmallPtrSet<Instruction *, 2> ReverseInsertPts; RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); }; @@ -417,7 +415,6 @@ namespace { void RRInfo::clear() { KnownSafe = false; - IsRetainBlock = false; IsTailCallRelease = false; ReleaseMetadata = 0; Calls.clear(); @@ -451,11 +448,11 @@ namespace { KnownPositiveRefCount = true; } - void ClearRefCount() { + void ClearKnownPositiveRefCount() { KnownPositiveRefCount = false; } - bool IsKnownIncremented() const { + bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; } @@ -486,10 +483,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - // If we're not in a sequence (anymore), drop all associated state. if (Seq == S_None) { Partial = false; @@ -698,6 +691,228 @@ void BBState::MergeSucc(const BBState &Other) { MI->second.Merge(PtrState(), /*TopDown=*/false); } +// Only enable ARC Annotations if we are building a debug version of +// libObjCARCOpts. +#ifndef NDEBUG +#define ARC_ANNOTATIONS +#endif + +// Define some macros along the lines of DEBUG and some helper functions to make +// it cleaner to create annotations in the source code and to no-op when not +// building in debug mode. +#ifdef ARC_ANNOTATIONS + +#include "llvm/Support/CommandLine.h" + +/// Enable/disable ARC sequence annotations. +static cl::opt<bool> +EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false)); + +/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an +/// instruction so that we can track backwards when post processing via the llvm +/// arc annotation processor tool. If the function is an +static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, + Value *Ptr) { + MDString *Hash = 0; + + // If pointer is a result of an instruction and it does not have a source + // MDNode it, attach a new MDNode onto it. If pointer is a result of + // an instruction and does have a source MDNode attached to it, return a + // reference to said Node. Otherwise just return 0. + if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) { + MDNode *Node; + if (!(Node = Inst->getMetadata(NodeId))) { + // We do not have any node. Generate and attatch the hash MDString to the + // instruction. + + // We just use an MDString to ensure that this metadata gets written out + // of line at the module level and to provide a very simple format + // encoding the information herein. Both of these makes it simpler to + // parse the annotations by a simple external program. + std::string Str; + raw_string_ostream os(Str); + os << "(" << Inst->getParent()->getParent()->getName() << ",%" + << Inst->getName() << ")"; + + Hash = MDString::get(Inst->getContext(), os.str()); + Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash)); + } else { + // We have a node. Grab its hash and return it. + assert(Node->getNumOperands() == 1 && + "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand."); + Hash = cast<MDString>(Node->getOperand(0)); + } + } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) { + std::string str; + raw_string_ostream os(str); + os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName() + << ")"; + Hash = MDString::get(Arg->getContext(), os.str()); + } + + return Hash; +} + +static std::string SequenceToString(Sequence A) { + std::string str; + raw_string_ostream os(str); + os << A; + return os.str(); +} + +/// Helper function to change a Sequence into a String object using our overload +/// for raw_ostream so we only have printing code in one location. +static MDString *SequenceToMDString(LLVMContext &Context, + Sequence A) { + return MDString::get(Context, SequenceToString(A)); +} + +/// A simple function to generate a MDNode which describes the change in state +/// for Value *Ptr caused by Instruction *Inst. +static void AppendMDNodeToInstForPtr(unsigned NodeId, + Instruction *Inst, + Value *Ptr, + MDString *PtrSourceMDNodeID, + Sequence OldSeq, + Sequence NewSeq) { + MDNode *Node = 0; + Value *tmp[3] = {PtrSourceMDNodeID, + SequenceToMDString(Inst->getContext(), + OldSeq), + SequenceToMDString(Inst->getContext(), + NewSeq)}; + Node = MDNode::get(Inst->getContext(), + ArrayRef<Value*>(tmp, 3)); + + Inst->setMetadata(NodeId, Node); +} + +/// Add to the beginning of the basic block llvm.ptr.annotations which show the +/// state of a pointer at the entrance to a basic block. +static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Add to the end of the basic block llvm.ptr.annotations which show the state +/// of the pointer at the bottom of the basic block. +static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, llvm::prior(BB->end())); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Adds a source annotation to pointer and a state change annotation to Inst +/// referencing the source annotation and the old/new state of pointer. +static void GenerateARCAnnotation(unsigned InstMDId, + unsigned PtrMDId, + Instruction *Inst, + Value *Ptr, + Sequence OldSeq, + Sequence NewSeq) { + if (EnableARCAnnotations) { + // First generate the source annotation on our pointer. This will return an + // MDString* if Ptr actually comes from an instruction implying we can put + // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), + // then we know that our pointer is from an Argument so we put a reference + // to the argument number. + // + // The point of this is to make it easy for the + // llvm-arc-annotation-processor tool to cross reference where the source + // pointer is in the LLVM IR since the LLVM IR parser does not submit such + // information via debug info for backends to use (since why would anyone + // need such a thing from LLVM IR besides in non standard cases + // [i.e. this]). + MDString *SourcePtrMDNode = + AppendMDNodeToSourcePtr(PtrMDId, Ptr); + AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq, + NewSeq); + } +} + +// The actual interface for accessing the above functionality is defined via +// some simple macros which are defined below. We do this so that the user does +// not need to pass in what metadata id is needed resulting in cleaner code and +// additionally since it provides an easy way to conditionally no-op all +// annotation support in a non-debug build. + +/// Use this macro to annotate a sequence state change when processing +/// instructions bottom up, +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) +/// Use this macro to annotate a sequence state change when processing +/// instructions top down. +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) + +#else // !ARC_ANNOTATION +// If annotations are off, noop. +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) +#endif // !ARC_ANNOTATION + namespace { /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { @@ -738,6 +953,15 @@ namespace { /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. unsigned NoObjCARCExceptionsMDKind; +#ifdef ARC_ANNOTATIONS + /// The Metadata Kind for llvm.arc.annotation.bottomup metadata. + unsigned ARCAnnotationBottomUpMDKind; + /// The Metadata Kind for llvm.arc.annotation.topdown metadata. + unsigned ARCAnnotationTopDownMDKind; + /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata. + unsigned ARCAnnotationProvenanceSourceMDKind; +#endif // ARC_ANNOATIONS + Constant *getRetainRVCallee(Module *M); Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); @@ -751,6 +975,8 @@ namespace { bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, InstructionClass &Class); + bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock, + InstructionClass &Class); void OptimizeIndividualCalls(Function &F); void CheckForCFGHazards(const BasicBlock *BB, @@ -958,7 +1184,7 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { // Check that the call is next to the retain. BasicBlock::const_iterator I = Call; ++I; - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I != Retain) return; @@ -990,14 +1216,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { if (Call->getParent() == RetainRV->getParent()) { BasicBlock::const_iterator I = Call; ++I; - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { BasicBlock *RetainRVParent = RetainRV->getParent(); if (II->getNormalDest() == RetainRVParent) { BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } @@ -1008,7 +1234,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // pointer. In this case, we can delete the pair. BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); + do --I; while (I != Begin && IsNoopInstruction(I)); if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && GetObjCArg(I) == Arg) { Changed = true; @@ -1084,6 +1310,35 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, } +// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain +// calls. +// +// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and +// does not escape (following the rules of block escaping), strength reduce the +// objc_retainBlock to an objc_retain. +// +// TODO: If an objc_retainBlock call is dominated period by a previous +// objc_retainBlock call, strength reduce the objc_retainBlock to an +// objc_retain. +bool +ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst, + InstructionClass &Class) { + assert(GetBasicInstructionClass(Inst) == Class); + assert(IC_RetainBlock == Class); + + // If we can not optimize Inst, return false. + if (!IsRetainBlockOptimizable(Inst)) + return false; + + CallInst *RetainBlock = cast<CallInst>(Inst); + RetainBlock->setCalledFunction(getRetainCallee(F.getParent())); + // Remove copy_on_escape metadata. + RetainBlock->setMetadata(CopyOnEscapeMDKind, 0); + Class = IC_Retain; + + return true; +} + /// Visit each call, one at a time, and make simplifications without doing any /// additional analysis. void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { @@ -1125,7 +1380,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_InitWeak: case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { + if (IsNullOrUndef(CI->getArgOperand(0))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), @@ -1146,8 +1401,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_CopyWeak: case IC_MoveWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(0)) || + IsNullOrUndef(CI->getArgOperand(1))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), @@ -1167,6 +1422,12 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } break; } + case IC_RetainBlock: + // If we strength reduce an objc_retainBlock to amn objc_retain, continue + // onto the objc_retain peephole optimizations. Otherwise break. + if (!OptimizeRetainBlockCall(F, Inst, Class)) + break; + // FALLTHROUGH case IC_Retain: OptimizeRetainCall(F, Inst); break; @@ -1245,7 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { const Value *Arg = GetObjCArg(Inst); // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { + if (IsNullOrUndef(Arg)) { Changed = true; ++NumNoops; DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " @@ -1280,7 +1541,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) + if (IsNullOrUndef(Incoming)) HasNull = true; else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) .getNumSuccessors() != 1) { @@ -1334,7 +1595,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { + if (!IsNullOrUndef(Incoming)) { CallInst *Clone = cast<CallInst>(CInst->clone()); Value *Op = PN->getIncomingValue(i); Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); @@ -1502,21 +1763,21 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, } MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; + ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq); + S.ResetSequenceProgress(NewSeq); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1524,7 +1785,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - switch (S.GetSeq()) { + Sequence OldSeq = S.GetSeq(); + switch (OldSeq) { case S_Stop: case S_Release: case S_MovableRelease: @@ -1534,10 +1796,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_CanRelease: // Don't do retain+release tracking for IC_RetainRV, because it's // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; + if (Class != IC_RetainRV) Retains[Inst] = S.RRI; - } S.ClearSequenceProgress(); break; case S_None: @@ -1545,6 +1805,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); return NestingDetected; } case IC_AutoreleasepoolPop: @@ -1571,10 +1832,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Use: S.SetSeq(S_CanRelease); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq()); continue; case S_CanRelease: case S_Release: @@ -1601,10 +1863,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, else S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } else if (Seq == S_Release && IsUser(Class)) { // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); + ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); assert(S.RRI.ReverseInsertPts.empty()); // As above; handle invoke specially. if (isa<InvokeInst>(Inst)) @@ -1614,8 +1877,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, } break; case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { S.SetSeq(S_Use); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } break; case S_CanRelease: case S_Use: @@ -1654,6 +1919,21 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, } } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(), + E = MyStates.bottom_up_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.bottomup.bbend", + BB, Ptr, Seq); + } + } +#endif + + // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { Instruction *Inst = llvm::prior(I); @@ -1677,6 +1957,20 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(), + E = MyStates.bottom_up_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBEntranceAnnotation("llvm.arc.annotation.bottomup.bbstart", + BB, Ptr, Seq); + } + } +#endif + return NestingDetected; } @@ -1690,11 +1984,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (Class) { case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1714,9 +2007,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, if (S.GetSeq() == S_Retain) NestingDetected = true; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain); S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.Calls.insert(Inst); } @@ -1730,7 +2023,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -1741,6 +2034,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); Releases[Inst] = S.RRI; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); S.ClearSequenceProgress(); break; case S_None: @@ -1776,10 +2070,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease); assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); @@ -1801,8 +2096,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible direct uses. switch (Seq) { case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { S.SetSeq(S_Use); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); + } break; case S_Retain: case S_Use: @@ -1843,6 +2140,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, } } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBEntranceAnnotation("llvm.arc.annotation.topdown.bbstart", + BB, Ptr, Seq); + } + } +#endif + // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; @@ -1852,6 +2163,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.topdown.bbend", + BB, Ptr, Seq); + } + } +#endif + CheckForCFGHazards(BB, BBStates, MyStates); return NestingDetected; } @@ -1991,15 +2316,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); + CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef<Value *>())); - else - Call->setTailCall(); + Call->setTailCall(); DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call << "\n" @@ -2075,7 +2394,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> unsigned OldCount = 0; unsigned NewCount = 0; bool FirstRelease = true; - bool FirstRetain = true; for (;;) { for (SmallVectorImpl<Instruction *>::const_iterator NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { @@ -2156,16 +2474,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> OldDelta += PathCount; OldCount += PathCount; - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - return false; - // Collect the optimal insertion points. if (!KnownSafe) for (SmallPtrSet<Instruction *, 2>::const_iterator @@ -2271,6 +2579,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> ReleasesToMove, Arg, KnownSafe, AnyPairsCompletelyEliminated); +#ifdef ARC_ANNOTATIONS + // Do not move calls if ARC annotations are requested. If we were to move + // calls in this case, we would not be able + PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations; +#endif // ARC_ANNOTATIONS + if (PerformMoveCalls) { // Ok, everything checks out and we're all set. Let's move/delete some // code! @@ -2392,6 +2706,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { goto clobbered; case IC_AutoreleasepoolPush: case IC_None: + case IC_IntrinsicUser: case IC_User: // Weak pointers are only modified through the weak entry points // (and arbitrary calls, which could call the weak entry points). @@ -2617,6 +2932,14 @@ bool ObjCARCOpt::doInitialization(Module &M) { M.getContext().getMDKindID("clang.arc.copy_on_escape"); NoObjCARCExceptionsMDKind = M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); +#ifdef ARC_ANNOTATIONS + ARCAnnotationBottomUpMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.bottomup"); + ARCAnnotationTopDownMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.topdown"); + ARCAnnotationProvenanceSourceMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.provenancesource"); +#endif // ARC_ANNOTATIONS // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index a841c64a9f..03e12d4fd7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, return OS << "IC_Call"; case IC_User: return OS << "IC_User"; + case IC_IntrinsicUser: + return OS << "IC_IntrinsicUser"; case IC_None: return OS << "IC_None"; } @@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - // No arguments. + // No (mandatory) arguments. if (AI == AE) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Case("clang.arc.use", IC_IntrinsicUser) .Default(IC_CallOrUser); // One argument. @@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { return StringSwitch<InstructionClass>(F->getName()) .Case("objc_moveWeak", IC_MoveWeak) .Case("objc_copyWeak", IC_CopyWeak) + // Ignore annotation calls. This is important to stop the + // optimizer from treating annotations as uses which would + // make the state of the pointers they are attempting to + // elucidate to be incorrect. + .Case("llvm.arc.annotation.topdown.bbstart", IC_None) + .Case("llvm.arc.annotation.topdown.bbend", IC_None) + .Case("llvm.arc.annotation.bottomup.bbstart", IC_None) + .Case("llvm.arc.annotation.bottomup.bbend", IC_None) .Default(IC_CallOrUser); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447f1c..129af8d45d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d646..5d02c68a7a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -64,10 +65,16 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +85,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -87,6 +111,7 @@ namespace { virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); + virtual bool doFinalization(Module &M); const char *getPassName() const { return "Merge internal globals"; @@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } @@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) { return false; } +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + Pass *llvm::createGlobalMergePass(const TargetLowering *tli) { return new GlobalMerge(tli); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 97fff7e782..8e76c78f5a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; + // Computing the value outside of the loop brings no benefit if : + // - it is definitely used inside the loop in a way which can not be + // optimized away. + // - no use outside of the loop can take advantage of hoisting the + // computation out of the loop + if (ExitValue->getSCEVType()>=scMulExpr) { + unsigned NumHardInternalUses = 0; + unsigned NumSoftExternalUses = 0; + unsigned NumUses = 0; + for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); + IB!=IE && NumUses<=6 ; ++IB) { + Instruction *UseInstr = cast<Instruction>(*IB); + unsigned Opc = UseInstr->getOpcode(); + NumUses++; + if (L->contains(UseInstr)) { + if (Opc == Instruction::Call || Opc == Instruction::Ret) + NumHardInternalUses++; + } else { + if (Opc == Instruction::PHI) { + // Do not count the Phi as a use. LCSSA may have inserted + // plenty of trivial ones. + NumUses--; + for (Value::use_iterator PB=UseInstr->use_begin(), + PE=UseInstr->use_end(); + PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { + unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode(); + if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) + NumSoftExternalUses++; + } + continue; + } + if (Opc != Instruction::Call && Opc != Instruction::Ret) + NumSoftExternalUses++; + } + } + if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses) + continue; + } + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9c67e327e2..0b62050b17 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -34,13 +34,9 @@ namespace { } // Possibly eliminate loop L if it is dead. - bool runOnLoop(Loop* L, LPPassManager& LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM); - bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, - bool &Changed, BasicBlock *Preheader); - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); AU.addRequired<ScalarEvolution>(); @@ -53,6 +49,12 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); } + + private: + bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, + bool &Changed, BasicBlock *Preheader); + }; } @@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) -Pass* llvm::createLoopDeletionPass() { +Pass *llvm::createLoopDeletionPass() { return new LoopDeletion(); } -/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// isLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. -bool LoopDeletion::IsLoopDead(Loop* L, - SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, +bool LoopDeletion::isLoopDead(Loop *L, + SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop @@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L, // sufficient to guarantee that no loop-variant values are used outside // of the loop. BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]); // Make sure all exiting blocks produce the same incoming value for the exit // block. If there are different incoming values for different exiting // blocks, then it is impossible to statically determine which value should // be used. - for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) { if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - if (Instruction* I = dyn_cast<Instruction>(incoming)) + if (Instruction *I = dyn_cast<Instruction>(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; @@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L, /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. -bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { +bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. - BasicBlock* preheader = L->getLoopPreheader(); + BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; @@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); + ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { SE.forgetLoop(L); // Connect the preheader directly to the exit block. - TerminatorInst* TI = preheader->getTerminator(); + TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. - BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { + while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); @@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. - DominatorTree& DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { @@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. - LoopInfo& loopInfo = getAnalysis<LoopInfo>(); + LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4e4cb86464..73e44d7edf 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, } if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); - if (isLoser()) + if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } @@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (!NewRHS) return Cond; } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) @@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // by LSR. const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); + // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); Value *IVSrc = 0; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 0da3746950..1f343136e5 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -110,6 +110,51 @@ namespace { } }; }; + + /// Utility class representing a non-constant Xor-operand. We classify + /// non-constant Xor-Operands into two categories: + /// C1) The operand is in the form "X & C", where C is a constant and C != ~0 + /// C2) + /// C2.1) The operand is in the form of "X | C", where C is a non-zero + /// constant. + /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this + /// operand as "E | 0" + class XorOpnd { + public: + XorOpnd(Value *V); + const XorOpnd &operator=(const XorOpnd &That); + + bool isInvalid() const { return SymbolicPart == 0; } + bool isOrExpr() const { return isOr; } + Value *getValue() const { return OrigVal; } + Value *getSymbolicPart() const { return SymbolicPart; } + unsigned getSymbolicRank() const { return SymbolicRank; } + const APInt &getConstPart() const { return ConstPart; } + + void Invalidate() { SymbolicPart = OrigVal = 0; } + void setSymbolicRank(unsigned R) { SymbolicRank = R; } + + // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. + // The purpose is twofold: + // 1) Cluster together the operands sharing the same symbolic-value. + // 2) Operand having smaller symbolic-value-rank is permuted earlier, which + // could potentially shorten crital path, and expose more loop-invariants. + // Note that values' rank are basically defined in RPO order (FIXME). + // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier + // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", + // "z" in the order of X-Y-Z is better than any other orders. + struct PtrSortFunctor { + bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { + return LHS->getSymbolicRank() < RHS->getSymbolicRank(); + } + }; + private: + Value *OrigVal; + Value *SymbolicPart; + APInt ConstPart; + unsigned SymbolicRank; + bool isOr; + }; } namespace { @@ -137,6 +182,11 @@ namespace { Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, + Value *&Res); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res); bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, SmallVectorImpl<Factor> &Factors); Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, @@ -148,6 +198,42 @@ namespace { }; } +XorOpnd::XorOpnd(Value *V) { + assert(!isa<ConstantInt>(V) && "No ConstantInt"); + OrigVal = V; + Instruction *I = dyn_cast<Instruction>(V); + SymbolicRank = 0; + + if (I && (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And)) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (isa<ConstantInt>(V0)) + std::swap(V0, V1); + + if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) { + ConstPart = C->getValue(); + SymbolicPart = V0; + isOr = (I->getOpcode() == Instruction::Or); + return; + } + } + + // view the operand as "V | 0" + SymbolicPart = V; + ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + isOr = true; +} + +const XorOpnd &XorOpnd::operator=(const XorOpnd &That) { + OrigVal = That.OrigVal; + SymbolicPart = That.SymbolicPart; + ConstPart = That.ConstPart; + SymbolicRank = That.SymbolicRank; + isOr = That.isOr; + return *this; +} + char Reassociate::ID = 0; INITIALIZE_PASS(Reassociate, "reassociate", "Reassociate expressions", false, false) @@ -1040,6 +1126,240 @@ static Value *OptimizeAndOrXor(unsigned Opcode, return 0; } +/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and +/// instruction with the given two operands, and return the resulting +/// instruction. There are two special cases: 1) if the constant operand is 0, +/// it will return NULL. 2) if the constant is ~0, the symbolic operand will +/// be returned. +static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, + const APInt &ConstOpnd) { + if (ConstOpnd != 0) { + if (!ConstOpnd.isAllOnesValue()) { + LLVMContext &Ctx = Opnd->getType()->getContext(); + Instruction *I; + I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), + "and.ra", InsertBefore); + I->setDebugLoc(InsertBefore->getDebugLoc()); + return I; + } + return Opnd; + } + return 0; +} + +// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" +// into "R ^ C", where C would be 0, and R is a symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively; otherwise, false is returned, +// and both "Res" and "ConstOpnd" remain unchanged. +// +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, + APInt &ConstOpnd, Value *&Res) { + // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 + // = ((x | c1) ^ c1) ^ (c1 ^ c2) + // = (x & ~c1) ^ (c1 ^ c2) + // It is useful only when c1 == c2. + if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { + if (!Opnd1->getValue()->hasOneUse()) + return false; + + const APInt &C1 = Opnd1->getConstPart(); + if (C1 != ConstOpnd) + return false; + + Value *X = Opnd1->getSymbolicPart(); + Res = createAndInstr(I, X, ~C1); + // ConstOpnd was C2, now C1 ^ C2. + ConstOpnd ^= C1; + + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + return true; + } + return false; +} + + +// Helper function of OptimizeXor(). It tries to simplify +// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively (If the entire expression is +// evaluated to a constant, the Res is set to NULL); otherwise, false is +// returned, and both "Res" and "ConstOpnd" remain unchanged. +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res) { + Value *X = Opnd1->getSymbolicPart(); + if (X != Opnd2->getSymbolicPart()) + return false; + + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); + + // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) + int DeadInstNum = 1; + if (Opnd1->getValue()->hasOneUse()) + DeadInstNum++; + if (Opnd2->getValue()->hasOneUse()) + DeadInstNum++; + + // Xor-Rule 2: + // (x | c1) ^ (x & c2) + // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1 + // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1 + // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3 + // + if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) { + if (Opnd2->isOrExpr()) + std::swap(Opnd1, Opnd2); + + APInt C3((~C1) ^ C2); + + // Do not increase code size! + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C1; + + } else if (Opnd1->isOrExpr()) { + // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 + // + APInt C3 = C1 ^ C2; + + // Do not increase code size + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C3; + } else { + // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) + // + APInt C3 = C1 ^ C2; + Res = createAndInstr(I, X, C3); + } + + // Put the original operands in the Redo list; hope they will be deleted + // as dead code. + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue())) + RedoInsts.insert(T); + + return true; +} + +/// Optimize a series of operands to an 'xor' instruction. If it can be reduced +/// to a single Value, it is returned, otherwise the Ops list is mutated as +/// necessary. +Value *Reassociate::OptimizeXor(Instruction *I, + SmallVectorImpl<ValueEntry> &Ops) { + if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) + return V; + + if (Ops.size() == 1) + return 0; + + SmallVector<XorOpnd, 8> Opnds; + SmallVector<XorOpnd*, 8> OpndPtrs; + Type *Ty = Ops[0].Op->getType(); + APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); + + // Step 1: Convert ValueEntry to XorOpnd + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + Value *V = Ops[i].Op; + if (!isa<ConstantInt>(V)) { + XorOpnd O(V); + O.setSymbolicRank(getRank(O.getSymbolicPart())); + Opnds.push_back(O); + OpndPtrs.push_back(&Opnds.back()); + } else + ConstOpnd ^= cast<ConstantInt>(V)->getValue(); + } + + // Step 2: Sort the Xor-Operands in a way such that the operands containing + // the same symbolic value cluster together. For instance, the input operand + // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: + // ("x | 123", "x & 789", "y & 456"). + std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); + + // Step 3: Combine adjacent operands + XorOpnd *PrevOpnd = 0; + bool Changed = false; + for (unsigned i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd *CurrOpnd = OpndPtrs[i]; + // The combined value + Value *CV; + + // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" + if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { + Changed = true; + if (CV) + *CurrOpnd = XorOpnd(CV); + else { + CurrOpnd->Invalidate(); + continue; + } + } + + if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) { + PrevOpnd = CurrOpnd; + continue; + } + + // step 3.2: When previous and current operands share the same symbolic + // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" + // + if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { + // Remove previous operand + PrevOpnd->Invalidate(); + if (CV) { + *CurrOpnd = XorOpnd(CV); + PrevOpnd = CurrOpnd; + } else { + CurrOpnd->Invalidate(); + PrevOpnd = 0; + } + Changed = true; + } + } + + // Step 4: Reassemble the Ops + if (Changed) { + Ops.clear(); + for (unsigned int i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd &O = Opnds[i]; + if (O.isInvalid()) + continue; + ValueEntry VE(getRank(O.getValue()), O.getValue()); + Ops.push_back(VE); + } + if (ConstOpnd != 0) { + Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + ValueEntry VE(getRank(C), C); + Ops.push_back(VE); + } + int Sz = Ops.size(); + if (Sz == 1) + return Ops.back().Op; + else if (Sz == 0) { + assert(ConstOpnd == 0); + return ConstantInt::get(Ty->getContext(), ConstOpnd); + } + } + + return 0; +} + /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This /// optimizes based on identities. If it can be reduced to a single Value, it /// is returned, otherwise the Ops list is mutated as necessary. @@ -1431,11 +1751,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, default: break; case Instruction::And: case Instruction::Or: - case Instruction::Xor: if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) return Result; break; + case Instruction::Xor: + if (Value *Result = OptimizeXor(I, Ops)) + return Result; + break; + case Instruction::Add: if (Value *Result = OptimizeAdd(I, Ops)) return Result; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 810a553c74..f6bb365216 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -57,11 +57,15 @@ using namespace llvm; STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); -STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); -STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); +STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); +STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); +STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); +STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); +STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); +STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); -STATISTIC(NumDeleted, "Number of instructions deleted"); -STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumDeleted, "Number of instructions deleted"); +STATISTIC(NumVectorized, "Number of vectorized aggregates"); /// Hidden option to force the pass to not use DomTree and mem2reg, instead /// forming SSA values through the SSAUpdater infrastructure. @@ -69,112 +73,167 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { +/// \brief A custom IRBuilder inserter which prefixes all names if they are +/// preserved. +template <bool preserveNames = true> +class IRBuilderPrefixedInserter : + public IRBuilderDefaultInserter<preserveNames> { + std::string Prefix; + public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; + void SetNamePrefix(const Twine &P) { Prefix = P.str(); } - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; +protected: + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<preserveNames>::InsertHelper( + I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); + } +}; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} +// Specialization for not preserving the name is trivial. +template <> +class IRBuilderPrefixedInserter<false> : + public IRBuilderDefaultInserter<false> { +public: + void SetNamePrefix(const Twine &P) {} +}; - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } +/// \brief Provide a typedef for IRBuilder that drops names in release builds. +#ifndef NDEBUG +typedef llvm::IRBuilder<true, ConstantFolder, + IRBuilderPrefixedInserter<true> > IRBuilderTy; +#else +typedef llvm::IRBuilder<false, ConstantFolder, + IRBuilderPrefixedInserter<false> > IRBuilderTy; +#endif +} - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } +namespace { +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief A partition of an alloca. + /// \brief Support for ordering ranges. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } + + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; + +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. + /// + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } + + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; + +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} - /// \brief A particular use of a partition of the alloca. + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} + +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} + +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +515,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +533,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +555,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +566,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +858,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +873,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +889,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -868,7 +929,7 @@ private: uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(); - MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; + const MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && Offsets.DestBegin == Offsets.SourceBegin) return markAsDead(II); // Skip identity transfers without side-effects. @@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) splitAndMergePartitions(); } + // Record how many partitions we end up with. + NumAllocaPartitions += Partitions.size(); + MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); + // Now build up the user lists for each of these disjoint partitions by // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); @@ -1084,22 +1149,31 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) PtrI = UB.visitPtr(AI); assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); + + unsigned NumUses = 0; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) + NumUses += Uses[Idx].size(); +#endif + NumAllocaPartitionUses += NumUses; + MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); } Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); else return 0; // Bail if we have weird uses. @@ -1139,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1243,12 +1318,12 @@ public: // may be zapped by an optimization pass in future. if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(ZExt->getOperand(0)); - if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(SExt->getOperand(0)); if (!Arg) - Arg = SI->getOperand(0); + Arg = SI->getValueOperand(); } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - Arg = LI->getOperand(0); + Arg = LI->getPointerOperand(); } else { continue; } @@ -1374,11 +1449,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1472,7 +1547,7 @@ private: assert(!Loads.empty()); Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); - IRBuilder<> PHIBuilder(&PN); + IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -1495,7 +1570,7 @@ private: TerminatorInst *TI = Pred->getTerminator(); Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); Value *InVal = PN.getIncomingValue(Idx); - IRBuilder<> PredBuilder(TI); + IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + @@ -1522,8 +1597,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1576,10 +1651,10 @@ private: if (!isSafeSelectToSpeculate(SI, Loads)) return; - IRBuilder<> IRB(&SI); + IRBuilderTy IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1590,7 +1665,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1622,8 +1697,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1640,9 +1715,8 @@ private: /// /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { +static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, + SmallVectorImpl<Value *> &Indices) { if (Indices.empty()) return BasePtr; @@ -1651,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); + return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1663,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, /// TargetTy. If we can't find one with the same type, we at least try to use /// one with the same size. If none of that works, we just produce the GEP as /// indicated by Indices to have the correct offset. -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, Value *BasePtr, Type *Ty, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Ty == TargetTy) - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); // See if we can descend into a struct and locate a field with the correct // type. @@ -1695,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, if (ElementTy != TargetTy) Indices.erase(Indices.end() - NumLayers, Indices.end()); - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); } /// \brief Recursively compute indices for a natural GEP. /// /// This is the recursive step for getNaturalGEPWithOffset that walks down the /// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Offset == 0) - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); // We can't recurse through pointer types. if (Ty->isPointerTy()) @@ -1728,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, Prefix); + Offset, TargetTy, Indices); } if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { @@ -1741,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } StructType *STy = dyn_cast<StructType>(Ty); @@ -1760,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Get a natural GEP from a base pointer to a particular offset and @@ -1773,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, /// Indices, and setting Ty to the result subtype. /// /// If no natural GEP can be constructed, this function returns null. -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, APInt Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { PointerType *Ty = cast<PointerType>(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1795,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the @@ -1813,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. -static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, - Value *Ptr, APInt Offset, Type *PointerTy, - const Twine &Prefix) { +static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, + Value *Ptr, APInt Offset, Type *PointerTy) { // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; @@ -1849,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, // See if we can perform a natural GEP here. Indices.clear(); if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, - Indices, Prefix)) { + Indices)) { if (P->getType() == PointerTy) { // Zap any offset pointer that we ended up computing in previous rounds. if (OffsetPtr && OffsetPtr->use_empty()) @@ -1884,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, if (!OffsetPtr) { if (!Int8Ptr) { Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), - Prefix + ".raw_cast"); + "raw_cast"); Int8PtrOffset = Offset; } OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), - Prefix + ".raw_idx"); + "raw_idx"); } Ptr = OffsetPtr; // On the off chance we were targeting i8*, guard the bitcast here. if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); + Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); return Ptr; } @@ -1910,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1932,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { /// This will try various different casting techniques, such as bitcasts, /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test /// two types for viability with this routine. -static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *Ty) { assert(canConvertValue(DL, V->getType(), Ty) && "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1976,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1996,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2062,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2073,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2088,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2104,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2124,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, return WholeAllocaOp; } -static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { DEBUG(dbgs() << " start: " << *V << "\n"); @@ -2147,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, return V; } -static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, +static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name) { IntegerType *IntTy = cast<IntegerType>(Old->getType()); IntegerType *Ty = cast<IntegerType>(V->getType()); @@ -2178,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } -static Value *extractVector(IRBuilder<> &IRB, Value *V, +static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(V->getType()); @@ -2206,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V, return V; } -static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, +static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(Old->getType()); assert(VecTy && "Can only insert a vector into a vector"); @@ -2296,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; - // The name prefix to use when rewriting instructions for this alloca. - std::string NamePrefix; + // Utility IR builder, whose name prefix is setup for each visited use, and + // the insertion point is set to point to the user. + IRBuilderTy IRB; public: AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, @@ -2313,7 +2395,8 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), + IRB(NewAI.getContext(), ConstantFolder()) { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2335,14 +2418,21 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); - NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); + + Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); + IRB.SetInsertPoint(OldUserI); + IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); + + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2364,14 +2454,10 @@ private: llvm_unreachable("No rewrite rule for this instruction!"); } - Twine getName(const Twine &Suffix) { - return NamePrefix + Suffix; - } - - Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { + Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); - return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); + return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); } /// \brief Compute suitable alignment to access an offset into the new alloca. @@ -2421,27 +2507,27 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { + Value *rewriteVectorizedLoadInst() { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); + "load"); + return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); } - Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; if (Offset > 0 || EndOffset < NewAllocaEndOffset) V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); + "extract"); return V; } @@ -2451,56 +2537,37 @@ private: assert(OldOp == OldPtr); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - - IRBuilder<> IRB(&LI); - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB); + V = rewriteVectorizedLoadInst(); } else if (IntTy && LI.getType()->isIntegerTy()) { - V = rewriteIntegerLoad(IRB, LI); + V = rewriteIntegerLoad(LI); } else if (BeginOffset == NewAllocaBeginOffset && canConvertValue(TD, NewAllocaTy, LI.getType())) { V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); } else { Type *LTy = TargetTy->getPointerTo(); V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), getPartitionTypeAlign(TargetTy), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); IsPtrAdjusted = true; } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2510,7 +2577,7 @@ private: Value *Placeholder = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, - getName(".insert")); + "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; @@ -2524,7 +2591,7 @@ private: return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); @@ -2539,8 +2606,8 @@ private: // Mix in the existing elements. Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + "load"); + V = insertVector(IRB, Old, V, BeginIndex, "vec"); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2550,17 +2617,17 @@ private: return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { + bool rewriteIntegerStore(Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, - getName(".insert")); + "insert"); } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); @@ -2574,7 +2641,6 @@ private: DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); assert(OldOp == OldPtr); - IRBuilder<> IRB(&SI); Value *V = SI.getValueOperand(); @@ -2587,23 +2653,21 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, - getName(".extract")); + "extract"); } if (VecTy) - return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + return rewriteVectorizedStoreInst(V, SI, OldOp); if (IntTy && V->getType()->isIntegerTy()) - return rewriteIntegerStore(IRB, V, SI); + return rewriteIntegerStore(V, SI); StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && @@ -2634,7 +2698,7 @@ private: /// /// \param V The i8 value to splat. /// \param Size The number of bytes in the output (assuming i8 is one byte) - Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + Value *getIntegerSplat(Value *V, unsigned Size) { assert(Size > 0 && "Expected a positive number of bytes."); IntegerType *VTy = cast<IntegerType>(V->getType()); assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); @@ -2642,26 +2706,25 @@ private: return V; Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), ConstantExpr::getUDiv( Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), SplatIntTy)), - getName(".isplat")); + "isplat"); return V; } /// \brief Compute a vector splat for a given element value. - Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { - V = IRB.CreateVectorSplat(NumElements, V, NamePrefix); + Value *getVectorSplat(Value *V, unsigned NumElements) { + V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); DEBUG(dbgs() << " splat: " << *V << "\n"); return V; } bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawDest() == OldPtr); // If the memset has a variable size, it cannot be split, just adjust the @@ -2718,31 +2781,31 @@ private: unsigned NumElements = EndIndex - BeginIndex; assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Value *Splat = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ElementTy)/8); + Value *Splat = + getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); Splat = convertValue(TD, IRB, Splat, ElementTy); if (NumElements > 1) - Splat = getVectorSplat(IRB, Splat, NumElements); + Splat = getVectorSplat(Splat, NumElements); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + "oldload"); + V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); } else if (IntTy) { // If this is a memset on an alloca where we can widen stores, insert the // set integer. assert(!II.isVolatile()); uint64_t Size = EndOffset - BeginOffset; - V = getIntegerSplat(IRB, II.getValue(), Size); + V = getIntegerSplat(II.getValue(), Size); if (IntTy && (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaBeginOffset)) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + V = insertInteger(TD, IRB, Old, V, Offset, "insert"); } else { assert(V->getType() == IntTy && "Wrong type for an alloca wide integer!"); @@ -2753,10 +2816,9 @@ private: assert(BeginOffset == NewAllocaBeginOffset); assert(EndOffset == NewAllocaEndOffset); - V = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ScalarTy)/8); + V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) - V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + V = getVectorSplat(V, AllocaVecTy->getNumElements()); V = convertValue(TD, IRB, V, AllocaTy); } @@ -2773,7 +2835,6 @@ private: // them into two categories: split intrinsics and unsplit intrinsics. DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); bool IsDest = II.getRawDest() == OldPtr; @@ -2857,8 +2918,7 @@ private: // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() @@ -2901,8 +2961,7 @@ private: OtherPtrTy = SubIntTy->getPointerTo(); } - Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); @@ -2910,31 +2969,31 @@ private: Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); + "load"); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); Src = convertValue(TD, IRB, Src, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract")); + Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), - getName(".copyload")); + "copyload"); } if (VecTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + "oldload"); + Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert")); + Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); Src = convertValue(TD, IRB, Src, NewAllocaTy); } @@ -2949,7 +3008,6 @@ private: assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. @@ -2977,7 +3035,9 @@ private: // as local as possible to the PHI. To do that, we re-use the location of // the old pointer, which necessarily must be in the right position to // dominate the PHI. - IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr)); + IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); + PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); // Replace the operands which were using the old pointer. @@ -2990,7 +3050,6 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // Find the operand we need to rewrite here. bool IsTrueVal = SI.getTrueValue() == OldPtr; @@ -3065,7 +3124,7 @@ private: class OpSplitter { protected: /// The builder used to form new instructions. - IRBuilder<> IRB; + IRBuilderTy IRB; /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector<unsigned, 4> Indices; @@ -3277,12 +3336,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; - if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) + if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) return 0; - if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) + } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) return 0; + } Offset -= NumSkippedElements * ElementSize; // First check if we need to recurse. @@ -3380,7 +3440,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 916b37d4a8..3514e6c2aa 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! @@ -35,7 +34,6 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); //===----------------------------------------------------------------------===// // Optimizer Base Class @@ -91,8 +89,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - - bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID) { @@ -104,14 +100,6 @@ namespace { void InitOptimizations(); bool runOnFunction(Function &F); - void setDoesNotAccessMemory(Function &F); - void setOnlyReadsMemory(Function &F); - void setDoesNotThrow(Function &F); - void setDoesNotCapture(Function &F, unsigned n); - void setDoesNotAlias(Function &F, unsigned n); - bool doInitialization(Module &M); - - void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfo>(); } @@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { return Changed; } -// Utility methods for doInitialization. - -void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - Modified = true; - } -} - - -void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "stpncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } -} - -/// doInitialization - Add attributes to well-known functions. -/// -bool SimplifyLibCalls::doInitialization(Module &M) { - Modified = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; - if (F.isDeclaration() && F.hasName()) - inferPrototypeAttributes(F); - } - return Modified; -} - // TODO: // Additional cases that we need to add to this file: // diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d2598a221..e9828d60cd 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -82,7 +82,8 @@ namespace { /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. - void forwardResume(ResumeInst *RI); + void forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// block. When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI) { +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. If the code doesn't have calls or unwinds, we know there is - // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls) { - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); - return; - } - + // rewrite. InvokeInliningInfo Invoke(II); - + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { @@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, continue; } + // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI); + Invoke.forwardResume(RI, InlinedLPads); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the + // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a54ee08b67..be80d34d96 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -985,22 +985,17 @@ bool llvm::removeUnreachableBlocks(Function &F) { if (Reachable.count(I)) continue; - // Remove the block as predecessor of all its reachable successors. - // Unreachable successors don't matter as they'll soon be removed, too. for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(I); + I->dropAllReferences(); + } - // Zap all instructions in this basic block. - while (!I->empty()) { - Instruction &Inst = I->back(); - if (!Inst.use_empty()) - Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); - I->getInstList().pop_back(); - } + for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; - --I; - llvm::next(I)->eraseFromParent(); - } return true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 07dd453424..930d9c412f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); |