aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
committerEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
commitc6cf05cb5108f356dde97c01ee4188b0671d4542 (patch)
tree436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/Transforms
parentc75199c649c739aade160289d93f257edc798cde (diff)
parent7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff)
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts: docs/LangRef.rst include/llvm/CodeGen/CallingConvLower.h include/llvm/IRReader/IRReader.h include/llvm/Target/TargetMachine.h lib/CodeGen/CallingConvLower.cpp lib/IRReader/IRReader.cpp lib/IRReader/LLVMBuild.txt lib/IRReader/Makefile lib/LLVMBuild.txt lib/Makefile lib/Support/MemoryBuffer.cpp lib/Support/Unix/PathV2.inc lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMTargetMachine.cpp lib/Target/Mips/CMakeLists.txt lib/Target/Mips/MipsDelaySlotFiller.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsInstrInfo.td lib/Target/Mips/MipsSubtarget.cpp lib/Target/Mips/MipsSubtarget.h lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrControl.td lib/Target/X86/X86InstrFormats.td lib/Transforms/IPO/ExtractGV.cpp lib/Transforms/InstCombine/InstCombineCompares.cpp lib/Transforms/Utils/SimplifyLibCalls.cpp test/CodeGen/X86/fast-isel-divrem.ll test/MC/ARM/data-in-code.ll tools/Makefile tools/llvm-extract/llvm-extract.cpp tools/llvm-link/CMakeLists.txt tools/opt/CMakeLists.txt tools/opt/LLVMBuild.txt tools/opt/Makefile tools/opt/opt.cpp
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp15
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp8
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp741
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp13
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp110
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp24
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp29
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp7
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h1
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp279
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp176
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp82
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp145
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp192
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp103
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp85
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp76
-rw-r--r--lib/Transforms/Instrumentation/BlackList.cpp3
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp248
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp15
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp62
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp1
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.cpp1
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h18
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp73
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp1218
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCUtil.cpp13
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp11
-rw-r--r--lib/Transforms/Scalar/GVN.cpp401
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp81
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp54
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp17
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp336
-rw-r--r--lib/Transforms/Scalar/SROA.cpp722
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp17
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp703
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp38
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp67
-rw-r--r--lib/Transforms/Utils/Local.cpp48
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp127
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp425
-rw-r--r--lib/Transforms/Utils/Utils.cpp1
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp2
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp4
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt2
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp591
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp348
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp730
-rw-r--r--lib/Transforms/Vectorize/VecUtils.h164
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp7
59 files changed, 6156 insertions, 2616 deletions
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 8336d3ad34..a7bf18896b 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
- for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
- dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
- UsedValues.insert(GV);
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ GlobalValue *GV = cast<GlobalValue>(Operand);
+ UsedValues.insert(GV);
+ }
}
// True if A is better than B.
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 09f9d7c788..b5a05a7f1c 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -69,7 +69,7 @@ namespace {
// @LOCALMOD-END
}
- bool Local = I->hasLocalLinkage();
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -95,8 +95,8 @@ namespace {
continue;
// @LOCALMOD-END
}
-
- bool Local = I->hasLocalLinkage();
+
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -113,7 +113,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->hasLocalLinkage()) {
+ if (CurI->isDiscardableIfUnused()) {
CurI->setVisibility(GlobalValue::HiddenVisibility);
CurI->setLinkage(GlobalValue::ExternalLinkage);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index a75212a386..bc5109b4d4 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,4 +1,4 @@
-//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
// to the function does not create any copies of the pointer value that
// outlive the call. This more or less means that the pointer is only
// dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
// This pass is implemented as a bottom-up traversal of the call-graph.
//
//===----------------------------------------------------------------------===//
@@ -32,12 +34,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
@@ -62,14 +66,63 @@ namespace {
// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+ // Utility methods used by inferPrototypeAttributes to add attributes
+ // and maintain annotation statistics.
+
+ void setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ }
+ }
+
+ // inferPrototypeAttributes - Analyze the name and prototype of the
+ // given function and set any applicable attributes. Returns true
+ // if any attributes were set and false otherwise.
+ bool inferPrototypeAttributes(Function &F);
+
+ // annotateLibraryCalls - Adds attributes to well-known standard library
+ // call declarations.
+ bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
private:
AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
};
}
@@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes. Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+ return false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strcmp:
+ case LibFunc::strspn:
+ case LibFunc::strncmp:
+ case LibFunc::strcspn:
+ case LibFunc::strcoll:
+ case LibFunc::strcasecmp:
+ case LibFunc::strncasecmp:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::scanf:
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat:
+ case LibFunc::sscanf:
+ case LibFunc::sprintf:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::mkdir:
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::rmdir:
+ case LibFunc::rewind:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::rename:
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ break;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ case LibFunc::fread:
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ case LibFunc::fputs:
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::uname:
+ case LibFunc::unlink:
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::pread:
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vsscanf:
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ break;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ break;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ default:
+ // Didn't mark any attributes.
+ return false;
+ }
+
+ return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+ bool MadeChange = false;
+
+ // Check each function in turn annotating well-known library function
+ // declarations with attributes.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F != 0 && F->isDeclaration())
+ MadeChange |= inferPrototypeAttributes(*F);
+ }
+
+ return MadeChange;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
- bool Changed = AddReadAttrs(SCC);
+ bool Changed = annotateLibraryCalls(SCC);
+ Changed |= AddReadAttrs(SCC);
Changed |= AddNoCaptureAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
return Changed;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492990..201f320c43 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+ SmallPtrSet<Constant *, 8> SeenConstants;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
+ SeenConstants.clear();
return Changed;
}
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GlobalIsNeeded(GV);
-
+
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
- if (Constant *OpC = dyn_cast<Constant>(*I))
- MarkUsedGlobalsAsNeeded(OpC);
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ // If we've already processed this constant there's no need to do it again.
+ Constant *Op = dyn_cast<Constant>(*I);
+ if (Op && SeenConstants.insert(Op))
+ MarkUsedGlobalsAsNeeded(Op);
+ }
}
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 700d0dfb5c..6cab6ed0ff 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -473,8 +473,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
DataLayout *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
- User *U = *UI++;
+ SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+ while (!WorkList.empty()) {
+ User *U = WorkList.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (Init) {
@@ -537,7 +538,6 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// us, and if they are all dead, nuke them without remorse.
if (SafeToDestroyConstant(C)) {
C->destroyConstant();
- // This could have invalidated UI, start over from scratch.
CleanupConstantGlobalUsers(V, Init, TD, TLI);
return true;
}
@@ -3062,6 +3062,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
+static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
+ for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ Use *U = &I.getUse();
+ if (Tried.count(U))
+ continue;
+
+ User *Usr = *I;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
+ if (!GV || !GV->hasName()) {
+ Tried.insert(U);
+ return I;
+ }
+
+ StringRef Name = GV->getName();
+ if (Name != "llvm.used" && Name != "llvm.compiler_used") {
+ Tried.insert(U);
+ return I;
+ }
+ }
+ return V->use_end();
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
+
+static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ Constant *Op = CA->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
+ assert(Replacement != CA && "CA didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CA->use_empty())
+ CA->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Constant *Op = CE->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = CE->getWithOperands(NewOps);
+ assert(Replacement != CE && "CE didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CE->use_empty())
+ CE->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
+ Use *U) {
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ return replaceUsesOfWithOnConstant(CA, From, To, U);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return replaceUsesOfWithOnConstant(CE, From, To, U);
+ C->replaceUsesOfWithOnConstant(From, To, U);
+ return true;
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
+ SmallPtrSet<Use*, 8> Tried;
+ bool Ret = false;
+ for (;;) {
+ Value::use_iterator I = getFirst(Old, Tried);
+ if (I == Old->use_end())
+ break;
+ Use &U = I.getUse();
+
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ Ret = true;
+ }
+ return Ret;
+}
+
bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
@@ -3081,11 +3180,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
// Make all users of the alias use the aliasee instead.
- if (!J->use_empty()) {
- J->replaceAllUsesWith(Aliasee);
+ if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
++NumAliasesResolved;
Changed = true;
}
+ if (!J->use_empty())
+ continue;
// If the alias is externally visible, we may still be able to simplify it.
if (!J->hasLocalLinkage()) {
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f058..4ce749cfec 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+ if (Ty->isPointerTy())
+ return Type::IntegerTyID;
+ return Ty->getTypeID();
+}
+
/// Creates a hash-code for the function which is the same for any two
/// functions that will compare equal, without looking at the instructions
/// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
ID.AddInteger(F->getCallingConv());
ID.AddBoolean(F->hasGC());
ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
return ID.ComputeHash();
}
@@ -200,8 +209,7 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
- Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ Type *RetTy = NewG->getReturnType();
+ if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+ Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+ else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+ Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+ else
+ Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
}
NewG->copyAttributesFrom(G);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 47b2b51899..986c0b8928 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+ cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+ cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- Vectorize = RunBBVectorization;
+ BBVectorize = RunBBVectorization;
+ SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
}
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
- if (Vectorize) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
if (OptLevel > 1 && UseGVNAfterVectorization)
@@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalDCEPass());
}
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+ return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+ return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
@@ -391,9 +408,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM,
- bool Internalize,
- bool RunInliner) {
+ LLVMBool Internalize,
+ LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
PassManagerBase *LPM = unwrap(PM);
- Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+ Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 5f8681ff45..3396f7929e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
UsedValues.insert(LLVMUsed);
-
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
+
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index 72cfe2c985..a25696ec03 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine
InstCombineMulDivRem.cpp
InstCombinePHI.cpp
InstCombineSelect.cpp
- InstCombineShifts.cpp
+ InstCombineShifts.cpp
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5e33..2a36074750 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
+ Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6f00..166f8dfdb4 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
/// Class representing coefficient of floating-point addend.
/// This class needs to be highly efficient, which is especially true for
/// the constructor. As of I write this comment, the cost of the default
- /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
/// perform write-merging).
- ///
+ ///
class FAddendCoef {
public:
// The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
//
FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
~FAddendCoef();
-
+
void set(short C) {
assert(!insaneIntVal(C) && "Insane coefficient");
IsFp = false; IntVal = C;
}
-
+
void set(const APFloat& C);
-
+
void negate();
-
+
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
Value *getValue(Type *) const;
-
+
// If possible, don't define operator+/operator- etc because these
// operators inevitably call FAddendCoef's constructor which is not cheap.
void operator=(const FAddendCoef &A);
void operator+=(const FAddendCoef &A);
void operator-=(const FAddendCoef &A);
void operator*=(const FAddendCoef &S);
-
+
bool isOne() const { return isInt() && IntVal == 1; }
bool isTwo() const { return isInt() && IntVal == 2; }
bool isMinusOne() const { return isInt() && IntVal == -1; }
bool isMinusTwo() const { return isInt() && IntVal == -2; }
-
+
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
@@ -74,18 +74,28 @@ namespace {
return *getFpValPtr();
}
- APFloat &getFpVal(void)
- { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-
+ APFloat &getFpVal(void) {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
bool isInt() const { return !IsFp; }
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
-
+
// True iff FpValBuf contains an instance of APFloat.
bool BufHasFpVal;
-
+
// The integer coefficient of an individual addend is either 1 or -1,
// and we try to simplify at most 4 addends from neighboring at most
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -94,7 +104,7 @@ namespace {
AlignedCharArrayUnion<APFloat> FpValBuf;
};
-
+
/// FAddend is used to represent floating-point addend. An addend is
/// represented as <C, V>, where the V is a symbolic value, and C is a
/// constant coefficient. A constant addend is represented as <C, 0>.
@@ -102,10 +112,10 @@ namespace {
class FAddend {
public:
FAddend() { Val = 0; }
-
+
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
-
+
bool isConstant() const { return Val == 0; }
bool isZero() const { return Coeff.isZero(); }
@@ -114,17 +124,17 @@ namespace {
{ Coeff.set(Coefficient); Val = V; }
void set(const ConstantFP* Coefficient, Value *V)
{ Coeff.set(Coefficient->getValueAPF()); Val = V; }
-
+
void negate() { Coeff.negate(); }
-
+
/// Drill down the U-D chain one step to find the definition of V, and
/// try to break the definition into one or two addends.
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-
+
/// Similar to FAddend::drillDownOneStep() except that the value being
/// splitted is the addend itself.
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-
+
void operator+=(const FAddend &T) {
assert((Val == T.Val) && "Symbolic-values disagree");
Coeff += T.Coeff;
@@ -132,12 +142,12 @@ namespace {
private:
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-
+
// This addend has the value of "Coeff * Val".
Value *Val;
FAddendCoef Coeff;
};
-
+
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
/// with its neighboring at most two instructions.
///
@@ -145,27 +155,30 @@ namespace {
public:
FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
Value *simplify(Instruction *FAdd);
-
+
private:
typedef SmallVector<const FAddend*, 4> AddendVect;
-
+
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-
+
/// Return the number of instructions needed to emit the N-ary addition.
unsigned calcInstrNumber(const AddendVect& Vect);
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
-
+
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
-
+
private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
@@ -177,7 +190,7 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+}
//===----------------------------------------------------------------------===//
//
@@ -200,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) {
} else
*P = C;
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
}
-void FAddendCoef::operator=(const FAddendCoef& That) {
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
if (That.isInt())
set(That.IntVal);
else
@@ -219,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
getFpVal().add(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
return;
}
-
+
APFloat &T = getFpVal();
- T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
void FAddendCoef::operator-=(const FAddendCoef &That) {
@@ -240,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
getFpVal().subtract(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().subtract(T, RndMode);
return;
}
APFloat &T = getFpVal();
- T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
}
void FAddendCoef::operator*=(const FAddendCoef &That) {
@@ -268,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
return;
}
- const fltSemantics &Semantic =
+ const fltSemantics &Semantic =
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
- set(APFloat(Semantic, IntVal));
+ convertToFpType(Semantic);
APFloat &F0 = getFpVal();
if (That.isInt())
- F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
else
F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
@@ -302,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
// A - B <1, A>, <1,B>
// 0 - B <-1, B>
// C * A, <C, A>
-// A + C <1, A> <C, NULL>
+// A + C <1, A> <C, NULL>
// 0 +/- 0 <0, NULL> (corner case)
//
// Legend: A and B are not constant, C is constant
-//
+//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
Instruction *I = 0;
@@ -377,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
return 0;
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
- if (!BreakNum || Coeff.isOne())
+ if (!BreakNum || Coeff.isOne())
return BreakNum;
Addend0.Scale(Coeff);
@@ -388,6 +426,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -398,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
- // Save the instruction before calling other member-functions.
+ // Save the instruction before calling other member-functions.
Instr = I;
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -409,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
unsigned Opnd0_ExpNum = 0;
unsigned Opnd1_ExpNum = 0;
- if (!Opnd0.isConstant())
+ if (!Opnd0.isConstant())
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -431,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -471,7 +581,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -479,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
- // For saving intermediate results;
+ // For saving intermediate results;
unsigned NextTmpIdx = 0;
FAddend TmpResult[3];
@@ -495,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
AddendVect SimpVect;
// The outer loop works on one symbolic-value at a time. Suppose the input
- // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
// The symbolic-values will be processed in this order: x, y, z.
//
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -522,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = 0;
SimpVect.push_back(T);
}
}
@@ -535,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
R += *SimpVect[Idx];
// Pop all addends being folded and push the resulting folded addend.
- SimpVect.resize(StartIdx);
+ SimpVect.resize(StartIdx);
if (Val != 0) {
if (!R.isZero()) {
SimpVect.push_back(&R);
@@ -548,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -570,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
assert(!Opnds.empty() && "Expect at least one addend");
// Step 1: Check if the # of instructions needed exceeds the quota.
- //
+ //
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
return 0;
@@ -591,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
// Iterate the addends, creating fadd/fsub using adjacent two addends.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
I != E; I++) {
- bool NeedNeg;
+ bool NeedNeg;
Value *V = createAddendVal(**I, NeedNeg);
if (!LastVal) {
LastVal = V;
@@ -617,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
}
#ifndef NDEBUG
- assert(CreateInstrNum == InstrNeeded &&
+ assert(CreateInstrNum == InstrNeeded &&
"Inconsistent in instruction numbers");
#endif
@@ -627,7 +738,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +751,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -665,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
// Adjust the number of instructions needed to emit the N-ary add.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -853,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1111,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4332467371..ec75dd2e04 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -22,8 +22,8 @@ using namespace PatternMatch;
/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue() + 1);
}
/// SubOne - Subtract one from a ConstantInt.
static Constant *SubOne(ConstantInt *C) {
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
return 0;
}
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi). In practice, we emit the more efficient
/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return 0;
+
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- if (LHSCst == SubOne(RHSCst)) {
- // (X == 13 | X == 14) -> X-13 <u 2
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return Builder->CreateICmpULT(Add, AddCST);
- }
-
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
// (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd278..78b4a2c6c9 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+ const AttributeSet &NewPAL =
+ AttributeSet::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a960ab2499..2ee1278d23 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
@@ -1604,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1625,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ unsigned Elt = 0;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
@@ -1647,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6eca399a40..518a8323b6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
}
}
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+ if (!ICmpInst::isSigned(pred))
+ return false;
+
+ if (RHS->isZero())
+ return ICmpInst::isRelational(pred);
+
+ if (RHS->isOne()) {
+ if (pred == ICmpInst::ICMP_SLT) {
+ pred = ICmpInst::ICMP_SLE;
+ return true;
+ }
+ } else if (RHS->isAllOnesValue()) {
+ if (pred == ICmpInst::ICMP_SGT) {
+ pred = ICmpInst::ICMP_SGE;
+ return true;
+ }
+ }
+
+ return false;
+}
+
// isHighOnes - Return true if the constant is of the form 1+0+.
// This is the same as lowones(~X).
static bool isHighOnes(const ConstantInt *CI) {
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
- // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+ // If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
- if (ArrayElementCount <= 32 ||
- (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
- Type *Ty;
- if (ArrayElementCount <= 32)
+ {
+ Type *Ty = 0;
+
+ // Look for an appropriate type:
+ // - The type of Idx if the magic fits
+ // - The smallest fitting legal type if we have a DataLayout
+ // - Default to i32
+ if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+ Ty = Idx->getType();
+ else if (TD)
+ Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
- else
- Ty = Type::getInt64Ty(Init->getContext());
- Value *V = Builder->CreateIntCast(Idx, Ty, false);
- V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
- V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
- return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+ if (Ty != 0) {
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
}
return 0;
@@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
break;
}
+ case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
+ ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!Val) break;
+
+ // If this is a signed comparison to 0 and the mul is sign preserving,
+ // use the mul LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) && !Val->isZero() &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(Val->isNegative() ?
+ ICmpInst::getSwappedPredicate(pred) : pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ break;
+ }
+
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!ShAmt) break;
@@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getLShr(RHS, ShAmt));
+ // If the shift is NSW and we compare to 0, then it is just shifting out
+ // sign bits, no need for an AND either.
+ if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (LHSI->hasOneUse() &&
@@ -1333,18 +1399,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- // @LOCALMOD-BEGIN
- // We don't want to introduce non-power-of-two integer sizes for PNaCl's
- // stable wire format, so modify this transformation for NaCl.
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt &&
- isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8) {
- // @LOCALMOD-END
+ if (LHSI->hasOneUse() &&
+ // @LOCALMOD-BEGIN
+ // We don't want to introduce non-power-of-two integer sizes for PNaCl's
+ // stable wire format, so modify this transformation for NaCl.
+ isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8 &&
+ // @LOCALMOD-END
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
@@ -1536,6 +1603,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(pred, X, NegX);
}
}
+ break;
+ case Instruction::Mul:
+ if (RHSV == 0 && BO->hasNoSignedWrap()) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // The trivial case (mul X, 0) is handled by InstSimplify
+ // General case : (mul X, C) != 0 iff X != 0
+ // (mul X, C) == 0 iff X == 0
+ if (!BOC->isZero())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+ break;
default: break;
}
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
@@ -2416,6 +2496,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, Y, Z);
}
+ // icmp slt (X + -1), Y -> icmp sle X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+ // icmp sge (X + -1), Y -> icmp sgt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+ // icmp sle (X + 1), Y -> icmp slt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+ // icmp sgt (X + 1), Y -> icmp sge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+ // if C1 has greater magnitude than C2:
+ // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // s.t. C3 = C1 - C2
+ //
+ // if C2 has greater magnitude than C1:
+ // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // s.t. C3 = C2 - C1
+ if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+ const APInt &AP1 = C1->getValue();
+ const APInt &AP2 = C2->getValue();
+ if (AP1.isNegative() == AP2.isNegative()) {
+ APInt AP1Abs = C1->getValue().abs();
+ APInt AP2Abs = C2->getValue().abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+ Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+ Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
+ }
+
+
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
A = 0; B = 0; C = 0; D = 0;
@@ -2549,6 +2678,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
{ Value *A, *B;
+ // Transform (A & ~B) == 0 --> (A & B) != 0
+ // and (A & ~B) != 0 --> (A & B) == 0
+ // if A is a power of 2.
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+ return new ICmpInst(I.getInversePredicate(),
+ Builder->CreateAnd(A, B),
+ Op1);
+
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe32a8..e2d7966cb3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
- IsOffset || !GEP->hasAllZeroIndices()))
+ if (!isOnlyCopiedFromConstantGlobal(
+ GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
return false;
continue;
}
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
+ SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// If the two stores had the same TBAA tag, preserve it.
if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
OtherStore->getMetadata(LLVMContext::MD_tbaa))))
NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
// Nuke the old stores.
EraseInstFromFunction(SI);
EraseInstFromFunction(*OtherStore);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8e4267f898..ecc9fc3e45 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// if this is safe. For example, the use could be in dynamically unreached
// code.
if (!V->hasOneUse()) return 0;
-
+
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
-
+
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
I->setOperand(0, V2);
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
I->setIsExact();
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
I->setHasNoUnsignedWrap();
MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// TODO: Lots more we could do here:
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
-
+
return MadeChange ? V : 0;
}
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
LHSExt = LHSExt.zext(W * 2);
RHSExt = RHSExt.zext(W * 2);
}
-
+
APInt MulExt = LHSExt * RHSExt;
-
+
if (!sign)
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-
+
APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
+
// ((X << C1)*C2) == (X * (C2 << C1))
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
if (SI->getOpcode() == Instruction::Shl)
if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
return BinaryOperator::CreateMul(SI->getOperand(0),
ConstantExpr::getShl(CI, ShOp));
-
+
const APInt &Val = CI->getValue();
if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
return Shl;
}
-
+
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
}
-
+
// Simplify mul instructions with a constant RHS.
- if (isa<Constant>(Op1)) {
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *Op1C = Op1;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
if (!BO ||
- (BO->getOpcode() != Instruction::UDiv &&
+ (BO->getOpcode() != Instruction::UDiv &&
BO->getOpcode() != Instruction::SDiv)) {
Op1C = Op0;
BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_Shl(m_One(), m_Value(Y))))
return BinaryOperator::CreateShl(Op0, Y);
}
-
+
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-
+
Value *BoolCast = 0, *OtherOp = 0;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
return;
if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
return;
-
+
ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
if (CFP && CFP->isExactlyValue(0.5)) {
Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
CFP = dyn_cast<ConstantFP>(I->getOperand(1));
if (CFP && CFP->isExactlyValue(0.5))
Y = I->getOperand(0);
-}
+}
/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
/// true iff the given value is FMul or FDiv with one and only one operand
/// being a normal constant (i.e. not Zero/NaN/Infinity).
static bool isFMulOrFDivWithConstant(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || (I->getOpcode() != Instruction::FMul &&
+ if (!I || (I->getOpcode() != Instruction::FMul &&
I->getOpcode() != Instruction::FDiv))
return false;
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the
+/// This function is to simplify "FMulOrDiv * C" and returns the
/// resulting expression. Note that this function could return NULL in
/// case the constants cannot be folded into a normal floating-point.
-///
+///
Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
Instruction *InsertBefore) {
assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
if (isNormalFp(F)) {
R = BinaryOperator::CreateFMul(Opnd0, F);
} else {
- // (X / C1) * C => X / (C1/C)
+ // (X / C1) * C => X / (C1/C)
Constant *F = ConstantExpr::getFDiv(C1, C);
if (isNormalFp(cast<ConstantFP>(F)))
R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2)
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
Instruction *FAddSub = dyn_cast<Instruction>(Op0);
if (FAddSub &&
(FAddSub->getOpcode() == Instruction::FAdd ||
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C0) {
std::swap(C0, C1);
std::swap(Opnd0, Opnd1);
- Swap = true;
+ Swap = true;
}
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
- Value *M0 = ConstantExpr::getFMul(C1, C);
- Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ?
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
+ // The purpose is two-fold:
// 1) to form a power expression (of X).
// 2) potentially shorten the critical path: After transformation, the
// latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // B * (uitofp i1 C) -> select C, B, 0
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *B, *C;
+ if (!match(RHS, m_UIToFp(m_Value(C))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ B = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(B->getType());
+ return SelectInst::Create(C, B, Zero);
+ }
+ }
+
+ // A * (1 - uitofp i1 C) -> select C, 0, A
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *A, *C;
+ if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+ C->getType()->isIntegerTy(1)) {
+ A = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(A->getType());
+ return SelectInst::Create(C, Zero, A);
+ }
+ }
+
if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);
else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
/// instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-
+
// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
int NonNullOperand = -1;
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
if (ST->isNullValue())
NonNullOperand = 1;
-
+
if (NonNullOperand == -1)
return false;
-
+
Value *SelectCond = SI->getOperand(0);
-
+
// Change the div/rem to use 'Y' instead of the select.
I.setOperand(1, SI->getOperand(NonNullOperand));
-
+
// Okay, we know we replace the operand of the div/rem with 'Y' with no
// problem. However, the select, or the condition of the select may have
// multiple uses. Based on our knowledge that the operand must be non-zero,
// propagate the known value for the select into other uses of it, and
// propagate a known value of the condition into its other users.
-
+
// If the select and condition only have a single use, don't bother with this,
// early exit.
if (SI->use_empty() && SelectCond->hasOneUse())
return true;
-
+
// Scan the current block backward, looking for other uses of SI.
BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-
+
while (BBI != BBFront) {
--BBI;
// If we found a call to a function, we can't assume it will return, so
// information from below it cannot be propagated above it.
if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
break;
-
+
// Replace uses of the select or its condition with the known values.
for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
Worklist.Add(BBI);
}
}
-
+
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
SI = 0;
if (&*BBI == SelectCond)
SelectCond = 0;
-
+
// If we ran out of things to eliminate, break out of the loop.
if (SelectCond == 0 && SI == 0)
break;
-
+
}
return true;
}
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
I.setOperand(1, V);
return &I;
}
-
+
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
-
- {
+
+ {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(),
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, N);
}
}
-
+
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
{ Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Construct the "on true" case of the select
Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
I.isExact());
-
+
// Construct the "on false" case of the select
Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
I.isExact());
-
+
// construct the select instruction and return it.
return SelectInst::Create(Cond, TSI, FSI);
}
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
-
+
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
}
-
+
return 0;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
/// FP value and:
-/// 1) 1/C is exact, or
+/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
/// If the convertion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
const APFloat &FpVal = Divisor->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
-
+
if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
(void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Constant *C = ConstantExpr::getFMul(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
if (F.isNormal() && !F.isDenormal()) {
- Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
AllowReciprocal);
if (!Res)
- Res = BinaryOperator::CreateFDiv(X, C);
+ Res = BinaryOperator::CreateFDiv(X, C);
}
}
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Fold) {
const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
if (FoldC.isNormal() && !FoldC.isDenormal()) {
- Instruction *R = CreateDiv ?
+ Instruction *R = CreateDiv ?
BinaryOperator::CreateFDiv(Fold, X) :
BinaryOperator::CreateFMul(X, Fold);
R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
-
+
// X urem C^2 -> X and C-1
{ const APInt *C;
if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
ConstantInt::get(I.getType(), *C-1));
}
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
-
+
if (Value *RHSNeg = dyn_castNegVal(Op1))
if (!isa<Constant>(RHSNeg) ||
(isa<ConstantInt>(RHSNeg) &&
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998cca7..bd14e81c3f 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
unsigned Opc = FirstInst->getOpcode();
Value *LHSVal = FirstInst->getOperand(0);
Value *RHSVal = FirstInst->getOperand(1);
-
+
Type *LHSType = LHSVal->getType();
Type *RHSType = RHSVal->getType();
-
+
bool isNUW = false, isNSW = false, isExact = false;
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
} else if (PossiblyExactOperator *PEO =
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
if (isExact)
isExact = cast<PossiblyExactOperator>(I)->isExact();
-
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
return 0;
-
+
// Otherwise, this is safe to transform!
-
+
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewLHS, PN);
LHSVal = NewLHS;
}
-
+
if (RHSVal == 0) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewRHS, PN);
RHSVal = NewRHS;
}
-
+
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
}
-
+
if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-
- SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
// This is true if all GEP bases are allocas and if all indices into them are
// constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// more than one phi, which leads to higher register pressure. This is
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
-
+
bool AllInBounds = true;
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
return 0;
AllInBounds &= GEP->isInBounds();
-
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
!GEP->hasAllConstantIndices()))
AllBasePointersAreAllocas = false;
-
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
continue;
-
+
// Don't merge two GEPs when two operands differ (introducing phi nodes)
// if one of the PHIs has a constant for the index. The index may be
// substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
return 0;
-
+
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
return 0;
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
NeededPhi = true;
}
}
-
+
// If all of the base pointers of the PHI'd GEPs are from allocas, don't
// bother doing this transformation. At best, this will just save a bit of
// offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
return 0;
-
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-
+
bool HasAnyPHIs = false;
for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
if (FixedOperands[i]) continue; // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
FirstOp->getName()+".pn");
InsertNewInstBefore(NewPN, PN);
-
+
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[i] = NewPN;
FixedOperands[i] = NewPN;
HasAnyPHIs = true;
}
-
+
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
BasicBlock *InBB = PN.getIncomingBlock(i);
-
+
for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
if (PHINode *OpPhi = OperandPhis[op])
OpPhi->addIncoming(InGEP->getOperand(op), InBB);
}
}
-
+
Value *Base = FixedOperands[0];
- GetElementPtrInst *NewGEP =
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
-
+
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
isAddressTaken = true;
break;
}
-
+
if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
-
+
// If this load is a load from a GEP with a constant offset from an alloca,
// then we don't want to sink it. In its present form, it will be
// load [constant stack offset]. Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
return false;
-
+
return true;
}
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
bool isVolatile = FirstLI->isVolatile();
unsigned LoadAlignment = FirstLI->getAlignment();
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-
+
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
return 0;
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
+
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
return 0;
-
- // We can't sink the load if the loaded value could be modified between
+
+ // We can't sink the load if the loaded value could be modified between
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
return 0;
-
+
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
return 0;
-
+
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
}
-
+
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
PN.getNumIncomingValues(),
PN.getName()+".in");
-
+
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-
+
// Add all operands to the new PHI.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InVal = 0;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
-
+
Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewPN, PN);
PhiVal = NewPN;
}
-
+
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-
+
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return FoldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
return FoldPHIArgLoadIntoPHI(PN);
-
+
// Scan the instruction, looking for input operations that can be folded away.
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Constant *ConstantOp = 0;
Type *CastSrcTy = 0;
bool isNUW = false, isNSW = false, isExact = false;
-
+
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return 0;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
- // Can fold binop, compare or shift here if the RHS is a constant,
+ // Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
-
+
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
NewCI->setDebugLoc(FirstInst->getDebugLoc());
return NewCI;
}
-
+
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
BinOp->setDebugLoc(FirstInst->getDebugLoc());
return BinOp;
}
-
+
CmpInst *CIOp = cast<CmpInst>(FirstInst);
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
// Remember this node, and if we find the cycle, return.
if (!PotentiallyDeadPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (PotentiallyDeadPHIs.size() == 16)
return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
/// PHIsEqualValue - Return true if this phi node is always equal to
/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
// See if we already saw this PHI node.
if (!ValueEqualPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (ValueEqualPHIs.size() == 16)
return false;
-
+
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
} else if (Op != NonPhiInVal)
return false;
}
-
+
return true;
}
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
-
+
PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
: PHIId(pn), Shift(Sh), Inst(User) {}
-
+
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
RHS.Inst->getType()->getPrimitiveSizeInBits();
}
};
-
+
struct LoweredPHIRecord {
PHINode *PN; // The PHI that was lowered.
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
-
+
LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
: PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-
+
// Ctor form used by DenseMap.
LoweredPHIRecord(PHINode *pn, unsigned Sh)
: PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHIUsers - Keep track of all of the truncated values extracted from a set
// of PHIs, along with their offset. These are the things we want to rewrite.
SmallVector<PHIUsageRecord, 16> PHIUsers;
-
+
// PHIs are often mutually cyclic, so we keep track of a whole set of PHI
// nodes which are extracted from. PHIsToSlice is a set we use to avoid
// revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
// check the uses of (to ensure they are all extracts).
SmallVector<PHINode*, 8> PHIsToSlice;
SmallPtrSet<PHINode*, 8> PHIsInspected;
-
+
PHIsToSlice.push_back(&FirstPhi);
PHIsInspected.insert(&FirstPhi);
-
+
for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
PHINode *PN = PHIsToSlice[PHIId];
-
+
// Scan the input list of the PHI. If any input is an invoke, and if the
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (II == 0) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
-
+
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
return 0;
}
-
-
+
+
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
// If the user is a PHI, inspect its uses recursively.
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (PHIsInspected.insert(UserPN))
PHIsToSlice.push_back(UserPN);
continue;
}
-
+
// Truncates are always ok.
if (isa<TruncInst>(User)) {
PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
continue;
}
-
+
// Otherwise it must be a lshr which can only be used by one trunc.
if (User->getOpcode() != Instruction::LShr ||
!User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
!isa<ConstantInt>(User->getOperand(1)))
return 0;
-
+
unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
}
}
-
+
// If we have no users, they must be all self uses, just nuke the PHI.
if (PHIUsers.empty())
return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-
+
// If this phi node is transformable, create new PHIs for all the pieces
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-
+
DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
);
-
+
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
DenseMap<BasicBlock*, Value*> PredValues;
-
+
// ExtractedVals - Each new PHI we introduce is saved here so we don't
// introduce redundant PHIs.
DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-
+
for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
unsigned PHIId = PHIUsers[UserI].PHIId;
PHINode *PN = PHIsToSlice[PHIId];
unsigned Offset = PHIUsers[UserI].Shift;
Type *Ty = PHIUsers[UserI].Inst->getType();
-
+
PHINode *EltPHI;
-
+
// If we've already lowered a user like this, reuse the previously lowered
// value.
if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-
+
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
PN->getName()+".off"+Twine(Offset), PN);
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
Value *&PredVal = PredValues[Pred];
-
+
// If we already have a value for this predecessor, reuse it.
if (PredVal) {
EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
EltPHI->addIncoming(PredVal, Pred);
continue;
}
-
+
if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
// If the incoming value was a PHI, and if it was one of the PHIs we
// already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
continue;
}
}
-
+
// Otherwise, do an extract in the predecessor.
Builder->SetInsertPoint(Pred, Pred->getTerminator());
Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Res = Builder->CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
-
+
// If the incoming value was a PHI, and if it was one of the PHIs we are
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
OldInVal)-PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
cast<Instruction>(Res)));
++UserE;
}
}
PredValues.clear();
-
+
DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
-
+
// Replace the use of this piece with the PHI node.
ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
}
-
+
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with undefs.
Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (DeadPHICycle(PU, PotentiallyDeadPHIs))
return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
}
-
+
// If this phi has a single use, and if that use just computes a value for
// the next iteration of a loop, delete the phi. This occurs with unused
// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (InValNo != NumIncomingVals) {
Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-
+
// Scan the rest of the operands to see if there are any conflicts, if so
// there is no need to recursively scan other phis.
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
}
-
+
// If we scanned over all operands, then we have one unique value plus
// phi values. Scan PHI nodes to see if they all merge in each other or
// the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
-
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a262d711d3..59502fb988 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// If this is a non-volatile load or a cast from the same type,
// merge.
if (TI->isCast()) {
- if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ Type *FIOpndTy = FI->getOperand(0)->getType();
+ if (TI->getOperand(0)->getType() != FIOpndTy)
return 0;
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
- if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
- FI->getOperand(0)->getType()->getVectorNumElements())
+ if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+ CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
return 0;
} else {
return 0; // unknown unary op.
@@ -349,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return 0;
}
+/// foldSelectICmpAndOr - We want to turn:
+/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+/// (or (shl (and X, C1), C3), y)
+/// iff:
+/// C1 and C2 are both powers of 2
+/// where:
+/// C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+
+ if (!match(CmpRHS, m_Zero()))
+ return 0;
+
+ Value *X;
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ return 0;
+
+ const APInt *C2;
+ bool OrOnTrueVal = false;
+ bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+ if (!OrOnFalseVal)
+ OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+ if (!OrOnFalseVal && !OrOnTrueVal)
+ return 0;
+
+ Value *V = CmpLHS;
+ Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+ unsigned C1Log = C1->logBase2();
+ unsigned C2Log = C2->logBase2();
+ if (C2Log > C1Log) {
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder->CreateShl(V, C2Log - C1Log);
+ } else if (C1Log > C2Log) {
+ V = Builder->CreateLShr(V, C1Log - C2Log);
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ } else
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ V = Builder->CreateXor(V, *C2);
+
+ return Builder->CreateOr(V, Y);
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -520,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : 0;
}
@@ -675,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// Change: A = select B, false, C --> A = and !B, C
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return BinaryOperator::CreateAnd(NotCond, FalseVal);
- } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ }
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
if (C->getZExtValue() == false) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -689,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select a, a, b -> a|b
if (CondVal == TrueVal)
return BinaryOperator::CreateOr(CondVal, FalseVal);
- else if (CondVal == FalseVal)
+ if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
// select a, ~a, b -> (~a)&b
// select a, b, ~a -> (~a)|b
if (match(TrueVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateAnd(TrueVal, FalseVal);
- else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ if (match(FalseVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
@@ -837,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NewFalseOp = NegVal;
if (AddOp != TI)
std::swap(NewTrueOp, NewFalseOp);
- Value *NewSel =
+ Value *NewSel =
Builder->CreateSelect(CondVal, NewTrueOp,
NewFalseOp, SI.getName() + ".p");
@@ -861,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *LHS, *RHS, *LHS2, *RHS2;
if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
- if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -907,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -917,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
- // Form a shufflevector instruction.
- SmallVector<Constant *, 8> Mask(VWidth);
- Type *Int32Ty = Type::getInt32Ty(CV->getContext());
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elem = cast<Constant>(CV->getOperand(i));
- if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
- Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
- else if (isa<UndefValue>(Elem))
- Mask[i] = UndefValue::get(Int32Ty);
- else
- return 0;
- }
- Constant *MaskVal = ConstantVector::get(Mask);
- Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
- return ReplaceInstUsesWith(SI, V);
- }
-
if (isa<ConstantAggregateZero>(CondVal)) {
return ReplaceInstUsesWith(SI, FalseVal);
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1a4b..4301ddb5aa 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
return 0;
}
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+ // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+ if (!PN->hasNUses(2))
+ return NULL;
+
+ // If so, it's known at this point that one operand is PHI and the other is
+ // an extractelement node. Find the PHI user that is not the extractelement
+ // node.
+ Value::use_iterator iu = PN->use_begin();
+ Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+ if (PHIUser == cast<Instruction>(&EI))
+ PHIUser = cast<Instruction>(*(++iu));
+
+ // Verify that this PHI user has one use, which is the PHI itself,
+ // and that it is a binary operation which is cheap to scalarize.
+ // otherwise return NULL.
+ if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !CheapToScalarize(PHIUser, true))
+ return NULL;
+
+ // Create a scalar PHI node that will replace the vector PHI node
+ // just before the current PHI node.
+ PHINode * scalarPHI = cast<PHINode>(
+ InsertNewInstWith(PHINode::Create(EI.getType(),
+ PN->getNumIncomingValues(), ""), *PN));
+ // Scalarize each PHI operand.
+ for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ Value *PHIInVal = PN->getIncomingValue(i);
+ BasicBlock *inBB = PN->getIncomingBlock(i);
+ Value *Elt = EI.getIndexOperand();
+ // If the operand is the PHI induction variable:
+ if (PHIInVal == PHIUser) {
+ // Scalarize the binary operation. Its first operand is the
+ // scalar PHI and the second operand is extracted from the other
+ // vector operand.
+ BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+ unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+ Value *Op = Builder->CreateExtractElement(
+ B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ Value *newPHIUser = InsertNewInstWith(
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+ *B0);
+ scalarPHI->addIncoming(newPHIUser, inBB);
+ } else {
+ // Scalarize PHI input:
+ Instruction *newEI =
+ ExtractElementInst::Create(PHIInVal, Elt, "");
+ // Insert the new instruction into the predecessor basic block.
+ Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+ BasicBlock::iterator InsertPos;
+ if (pos && !isa<PHINode>(pos)) {
+ InsertPos = pos;
+ ++InsertPos;
+ } else {
+ InsertPos = inBB->getFirstInsertionPt();
+ }
+
+ InsertNewInstWith(newEI, *InsertPos);
+
+ scalarPHI->addIncoming(newEI, inBB);
+ }
+ }
+ return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
+
+ // If there's a vector PHI feeding a scalar use through this extractelement
+ // instruction, try to scalarize the PHI.
+ if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return (scalarPHI);
+ }
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement)
// bitcasts can change the number of vector elements and they cost nothing
- if (CI->hasOneUse() && EI.hasOneUse() &&
- (CI->getOpcode() != Instruction::BitCast)) {
+ if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
+ Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
}
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Update Mask to reflect that `ScalarOp' has been inserted at
+ // position `InsertedIdx' within the vector returned by IEI.
+ Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3e91..ec10751202 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
Module *M = II->getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
- ArrayRef<Value *>(), "", II->getParent());
+ None, "", II->getParent());
}
return EraseInstFromFunction(MI);
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6877475b1d..623c470506 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v1";
+static const char *kAsanInitName = "__asan_init_v3";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = false,
+ AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
StringRef BlacklistFile = StringRef(),
@@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass {
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F);
- void createInitializerPoisonCalls(Module &M,
- Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
virtual bool doInitialization(Module &M);
@@ -315,7 +313,7 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = false,
+ AddressSanitizerModule(bool CheckInitOrder = true,
StringRef BlacklistFile = StringRef(),
bool ZeroBaseShadow = false)
: ModulePass(ID),
@@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass {
void initializeCallbacks(Module &M);
bool ShouldInstrumentGlobal(GlobalVariable *G);
- void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
- Value *LastAddr);
+ void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
size_t RedzoneSize() const {
return RedzoneSizeForScale(Mapping.Scale);
}
@@ -531,9 +528,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst,
kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
}
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
@@ -750,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
void AddressSanitizerModule::createInitializerPoisonCalls(
- Module &M, Value *FirstAddr, Value *LastAddr) {
+ Module &M, GlobalValue *ModuleName) {
// We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
// If that function is not present, this TU contains no globals, or they have
@@ -762,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
// Add a call to poison all external globals before the given function starts.
- IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+ Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+ IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
// Add calls to unpoison all globals before each return instruction.
for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
@@ -836,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
@@ -885,11 +886,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // const char *module_name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
- IntptrTy, NULL);
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
@@ -897,9 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
assert(CtorFunc);
IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
- // The addresses of the first and last dynamically initialized globals in
- // this TU. Used in initialization order checking.
- Value *FirstDynamic = 0, *LastDynamic = 0;
+ bool HasDynamicallyInitializedGlobals = false;
+
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+ // We shouldn't merge same module names, as this string serves as unique
+ // module ID in runtime.
+ ModuleName->setUnnamedAddr(false);
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
@@ -930,11 +936,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- SmallString<2048> DescriptionOfGlobal = G->getName();
- DescriptionOfGlobal += " (";
- DescriptionOfGlobal += M.getModuleIdentifier();
- DescriptionOfGlobal += ")";
- GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -958,15 +960,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
// Populate the first and last globals declared in this TU.
- if (CheckInitOrder && GlobalHasDynamicInitializer) {
- LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
- if (FirstDynamic == 0)
- FirstDynamic = LastDynamic;
- }
+ if (CheckInitOrder && GlobalHasDynamicInitializer)
+ HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
@@ -977,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
// Create calls for poisoning before initializers run and unpoisoning after.
- if (CheckInitOrder && FirstDynamic && LastDynamic)
- createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+ if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+ createInitializerPoisonCalls(M, ModuleName);
IRB.CreateCall2(AsanRegisterGlobals,
IRB.CreatePointerCast(AllGlobals, IntptrTy),
ConstantInt::get(IntptrTy, n));
@@ -1095,6 +1095,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
@@ -1312,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() {
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
}
- // This string will be parsed by the run-time (DescribeStackAddress).
+ // This string will be parsed by the run-time (DescribeAddressIfStack).
SmallString<2048> StackDescriptionStorage;
raw_svector_ostream StackDescription(StackDescriptionStorage);
- StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+ StackDescription << AllocaVec.size() << " ";
// Insert poison calls for lifetime intrinsics for alloca.
bool HavePoisonedAllocas = false;
@@ -1348,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() {
}
assert(Pos == LocalStackSize);
- // Write the Magic value and the frame description constant to the redzone.
+ // The left-most redzone has enough space for at least 4 pointers.
+ // Write the Magic value to redzone[0].
Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
BasePlus0);
- Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
- ConstantInt::get(IntptrTy,
- ASan.LongSize/8));
- BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+ // Write the frame description constant to redzone[1].
+ Value *BasePlus1 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
+ IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), StackDescription.str());
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
IntptrTy);
IRB.CreateStore(Description, BasePlus1);
+ // Write the PC to redzone[2].
+ Value *BasePlus2 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
+ 2 * ASan.LongSize/8)),
+ IntptrPtrTy);
+ IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
// Poison the stack redzones at the entry.
Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d2af..39de4b0401 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
bool BlackList::isInInit(const GlobalVariable &G) const {
return (isIn(*G.getParent()) ||
inSection("global-init", G.getName()) ||
- inSection("global-init-type", GetGVTypeString(G)));
+ inSection("global-init-type", GetGVTypeString(G)) ||
+ inSection("global-init-src", G.getParent()->getModuleIdentifier()));
}
bool BlackList::inSection(const StringRef Section,
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 095b852d93..2edd151869 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -29,8 +29,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/PathV2.h"
#include "llvm/Support/raw_ostream.h"
@@ -39,33 +41,57 @@
#include <utility>
using namespace llvm;
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
- UseExtraChecksum(false), NoRedZone(false),
- NoFunctionNamesInData(false) {
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool Use402Format,
- bool UseExtraChecksum, bool NoRedZone,
- bool NoFunctionNamesInData)
- : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(Use402Format), UseExtraChecksum(UseExtraChecksum),
- NoRedZone(NoRedZone), NoFunctionNamesInData(NoFunctionNamesInData) {
- assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "GCOV Profiler";
}
+
private:
bool runOnModule(Module &M);
- // Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO();
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
@@ -76,6 +102,8 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getDeleteWriteoutFunctionListFunc();
+ Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
// Create or retrieve an i32 state value that is used to represent the
@@ -86,23 +114,22 @@ namespace {
// block number.
GlobalVariable *buildEdgeLookupTable(Function *F,
GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
std::string mangleName(DICompileUnit CU, const char *NewStem);
- bool EmitNotes;
- bool EmitData;
- bool Use402Format;
- bool UseExtraChecksum;
- bool NoRedZone;
- bool NoFunctionNamesInData;
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
Module *M;
LLVMContext *Ctx;
@@ -113,13 +140,14 @@ char GCOVProfiler::ID = 0;
INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format,
- bool UseExtraChecksum,
- bool NoRedZone,
- bool NoFunctionNamesInData) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum,
- NoRedZone, NoFunctionNamesInData);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
+}
+
+static std::string getFunctionName(DISubprogram SP) {
+ if (!SP.getLinkageName().empty())
+ return SP.getLinkageName();
+ return SP.getName();
}
namespace {
@@ -257,8 +285,8 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os,
- bool Use402Format, bool UseExtraChecksum) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+ bool UseCfgChecksum) {
this->os = os;
Function *F = SP.getFunction();
@@ -270,17 +298,16 @@ namespace {
ReturnBlock = new GCOVBlock(i++, os);
writeBytes(FunctionTag, 4);
- uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
- uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
write(0); // lineno checksum
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
write(0); // cfg checksum
- writeGCOVString(SP.getName());
+ writeGCOVString(getFunctionName(SP));
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
}
@@ -355,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
SmallString<128> Filename = CU.getFilename();
sys::path::replace_extension(Filename, NewStem);
- return sys::path::filename(Filename.str());
+ StringRef FName = sys::path::filename(Filename);
+ SmallString<128> CurPath;
+ if (sys::fs::current_path(CurPath)) return FName;
+ sys::path::append(CurPath, FName.str());
+ return CurPath.str();
}
bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- if (EmitNotes) emitGCNO();
- if (EmitData) return emitProfileArcs();
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -380,10 +411,9 @@ void GCOVProfiler::emitGCNO() {
std::string ErrorInfo;
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out.write("oncg*404MVLL", 12);
- else
- out.write("oncg*204MVLL", 12);
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write("MVLL", 4);
DIArray SPs = CU.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
@@ -392,7 +422,7 @@ void GCOVProfiler::emitGCNO() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
GCOVBlock &Block = Func.getBlock(BB);
@@ -522,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() {
}
}
- insertCounterWriteout(CountersBySP);
- insertFlush(CountersBySP);
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Type *Params[] = {
+ PointerType::get(FTy, 0),
+ PointerType::get(FTy, 0)
+ };
+ FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+ // Inialize the environment and register the local writeout and flush
+ // functions.
+ Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
if (InsertIndCounterIncrCode)
@@ -581,8 +641,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
}
Constant *GCOVProfiler::getStartFileFunc() {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Type::getInt8PtrTy(*Ctx), false);
+ Type *Args[] = {
+ Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+ Type::getInt8PtrTy(*Ctx), // const char version[4]
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
}
@@ -598,9 +661,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
}
Constant *GCOVProfiler::getEmitFunctionFunc() {
- Type *Args[2] = {
+ Type *Args[3] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
@@ -611,11 +675,20 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Args, false);
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
Constant *GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -634,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
return GV;
}
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -643,7 +716,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
WriteoutF->addFnAttr(Attribute::NoRedZone);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
@@ -659,20 +732,19 @@ void GCOVProfiler::insertCounterWriteout(
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string FilenameGcda = mangleName(CU, "gcda");
- Builder.CreateCall(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda));
- for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
- I = CountersBySP.begin(), E = CountersBySP.end();
- I != E; ++I) {
- DISubprogram SP(I->second);
- intptr_t ident = reinterpret_cast<intptr_t>(I->second);
- Builder.CreateCall2(EmitFunction,
- Builder.getInt32(ident),
- NoFunctionNamesInData ?
- Constant::getNullValue(Builder.getInt8PtrTy()) :
- Builder.CreateGlobalStringPtr(SP.getName()));
-
- GlobalVariable *GV = I->first;
+ Builder.CreateCall2(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateGlobalStringPtr(ReversedVersion));
+ for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
+ DISubprogram SP(CountersBySP[j].second);
+ Builder.CreateCall3(
+ EmitFunction, Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
+
+ GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
Builder.CreateCall2(EmitArcs,
@@ -682,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout(
Builder.CreateCall(EndFile);
}
}
- Builder.CreateRetVoid();
- // Create a small bit of code that registers the "__llvm_gcov_writeout"
- // function to be executed at exit.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(true);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BB = BasicBlock::Create(*Ctx, "entry", F);
- Builder.SetInsertPoint(BB);
-
- FTy = FunctionType::get(Builder.getInt32Ty(),
- PointerType::get(FTy, 0), false);
- Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
- Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
+ return WriteoutF;
}
void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -713,7 +765,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
Fn->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
Fn->addFnAttr(Attribute::NoRedZone);
// Create basic blocks for function.
@@ -758,18 +810,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.CreateRetVoid();
}
-void GCOVProfiler::
+Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__gcov_flush");
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
if (!FlushF)
FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__gcov_flush", M);
+ "__llvm_gcov_flush", M);
else
FlushF->setLinkage(GlobalValue::InternalLinkage);
FlushF->setUnnamedAddr(true);
FlushF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
FlushF->addFnAttr(Attribute::NoRedZone);
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
@@ -794,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
if (RetTy == Type::getVoidTy(*Ctx))
Builder.CreateRetVoid();
else if (RetTy->isIntegerTy())
- // Used if __gcov_flush was implicitly declared.
+ // Used if __llvm_gcov_flush was implicitly declared.
Builder.CreateRet(ConstantInt::get(RetTy, 0));
else
- report_fatal_error("invalid return type for __gcov_flush");
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8ba102559b..9f353967f3 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fce6513a97..4e75904ded 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
cl::desc("poison uninitialized stack variables with the given patter"),
cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+ cl::desc("poison undef temps"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
@@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Clean shadow (all zeroes) means all bits of the value are defined
/// (initialized).
- Value *getCleanShadow(Value *V) {
+ Constant *getCleanShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
return 0;
@@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ConstantStruct::get(ST, Vals);
}
+ /// \brief Create a dirty shadow for a given value.
+ Constant *getPoisonedShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return 0;
+ return getPoisonedShadow(ShadowTy);
+ }
+
/// \brief Create a clean (zero) origin.
Value *getCleanOrigin() {
return Constant::getNullValue(MS.OriginTy);
@@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return Shadow;
}
if (UndefValue *U = dyn_cast<UndefValue>(V)) {
- Value *AllOnes = getPoisonedShadow(getShadowTy(V));
+ Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
(void)U;
return AllOnes;
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index f93c5ab4c8..299060a42f 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -56,6 +57,9 @@ static cl::opt<bool> ClInstrumentFuncEntryExit(
static cl::opt<bool> ClInstrumentAtomics(
"tsan-instrument-atomics", cl::init(true),
cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
+ "tsan-instrument-memintrinsics", cl::init(true),
+ cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite,
"Number of reads ignored due to following writes");
STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
STATISTIC(NumOmittedReadsFromConstantGlobals,
"Number of reads from constant globals");
STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
@@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass {
void initializeCallbacks(Module &M);
bool instrumentLoadOrStore(Instruction *I);
bool instrumentAtomic(Instruction *I);
+ bool instrumentMemIntrinsic(Instruction *I);
void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
SmallVectorImpl<Instruction*> &All);
bool addrPointsToConstantData(Value *Addr);
int getMemoryAccessFuncIndex(Value *Addr);
DataLayout *TD;
+ Type *IntptrTy;
SmallString<64> BlacklistFile;
OwningPtr<BlackList> BL;
IntegerType *OrdTy;
@@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass {
Function *TsanAtomicThreadFence;
Function *TsanAtomicSignalFence;
Function *TsanVptrUpdate;
+ Function *TsanVptrLoad;
+ Function *MemmoveFn, *MemcpyFn, *MemsetFn;
};
} // namespace
@@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), NULL));
+ TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+
+ MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL));
+ MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IntptrTy, NULL));
+ MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+ IntptrTy, NULL));
}
bool ThreadSanitizer::doInitialization(Module &M) {
@@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
+ IntptrTy = IRB.getIntPtrTy(TD);
Value *TsanInit = M.getOrInsertFunction("__tsan_init",
IRB.getVoidTy(), NULL);
appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
@@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
SmallVector<Instruction*, 8> AtomicAccesses;
+ SmallVector<Instruction*, 8> MemIntrinCalls;
bool Res = false;
bool HasCalls = false;
@@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
else if (isa<ReturnInst>(BI))
RetVec.push_back(BI);
else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ if (isa<MemIntrinsic>(BI))
+ MemIntrinCalls.push_back(BI);
HasCalls = true;
chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
}
@@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
Res |= instrumentAtomic(AtomicAccesses[i]);
}
+ if (ClInstrumentMemIntrinsics)
+ for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
+ Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+ }
+
// Instrument function entry/exit points if there were instrumented accesses.
if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
NumInstrumentedVtableWrites++;
return true;
}
+ if (!IsWrite && isVtableAccess(I)) {
+ IRB.CreateCall(TsanVptrLoad,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ NumInstrumentedVtableReads++;
+ return true;
+ }
Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
if (IsWrite) NumInstrumentedWrites++;
@@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
return IRB->getInt32(v);
}
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+ IRBuilder<> IRB(I);
+ if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+ IRB.CreateCall3(MemsetFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+ IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ }
+ return false;
+}
+
// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
// standards. For background see C++11 standard. A slightly older, publically
// available draft of the standard (not entirely up-to-date, but close enough
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 5aada9c373..8f917aeb37 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
switch (Class) {
case IC_Autorelease:
case IC_AutoreleaseRV:
+ case IC_IntrinsicUser:
case IC_User:
// These operations never directly modify a reference count.
return false;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0de1..373168e898 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
bool llvm::objcarc::EnableARCOpts;
static cl::opt<bool, true>
EnableARCOptimizations("enable-objc-arc-opts",
+ cl::desc("enable/disable all ARC Optimizations"),
cl::location(EnableARCOpts),
cl::init(true));
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index e062b66555..39670f339e 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) {
M.getNamedValue("objc_copyWeak") ||
M.getNamedValue("objc_retainedObject") ||
M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer");
+ M.getNamedValue("objc_unretainedPointer") ||
+ M.getNamedValue("clang.arc.use");
}
/// \enum InstructionClass
@@ -89,6 +90,7 @@ enum InstructionClass {
IC_CopyWeak, ///< objc_copyWeak (derived)
IC_DestroyWeak, ///< objc_destroyWeak (derived)
IC_StoreStrong, ///< objc_storeStrong (derived)
+ IC_IntrinsicUser, ///< clang.arc.use
IC_CallOrUser, ///< could call objc_release and/or "use" pointers
IC_Call, ///< could call objc_release
IC_User, ///< could "use" a pointer
@@ -97,6 +99,13 @@ enum InstructionClass {
raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
+/// \brief Test if the given class is a kind of user.
+inline static bool IsUser(InstructionClass Class) {
+ return Class == IC_User ||
+ Class == IC_CallOrUser ||
+ Class == IC_IntrinsicUser;
+}
+
/// \brief Test if the given class is objc_retain or equivalent.
static inline bool IsRetain(InstructionClass Class) {
return Class == IC_Retain ||
@@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) {
/// \brief Test if the given class represents instructions which return their
/// argument verbatim.
static inline bool IsForwarding(InstructionClass Class) {
- // objc_retainBlock technically doesn't always return its argument
- // verbatim, but it doesn't matter for our purposes here.
return Class == IC_Retain ||
Class == IC_RetainRV ||
Class == IC_Autorelease ||
Class == IC_AutoreleaseRV ||
- Class == IC_RetainBlock ||
Class == IC_NoopCast;
}
@@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) {
return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
}
-static inline bool isNullOrUndef(const Value *V) {
+static inline bool IsNullOrUndef(const Value *V) {
return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
}
-static inline bool isNoopInstruction(const Instruction *I) {
+static inline bool IsNoopInstruction(const Instruction *I) {
return isa<BitCastInst>(I) ||
(isa<GetElementPtrInst>(I) &&
cast<GetElementPtrInst>(I)->hasAllZeroIndices());
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 1c13d1cbea..c43f4f4a44 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -66,6 +66,8 @@ namespace {
Constant *RetainAutoreleaseCallee;
/// Declaration for objc_retainAutoreleaseReturnValue().
Constant *RetainAutoreleaseRVCallee;
+ /// Declaration for objc_retainAutoreleasedReturnValue().
+ Constant *RetainRVCallee;
/// The inline asm string to insert between calls and RetainRV calls to make
/// the optimization work on targets which need it.
@@ -77,9 +79,12 @@ namespace {
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainRVCallee(Module *M);
Constant *getRetainAutoreleaseCallee(Module *M);
Constant *getRetainAutoreleaseRVCallee(Module *M);
+ bool OptimizeRetainCall(Function &F, Instruction *Retain);
+
bool ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
SmallPtrSet<Instruction *, 4>
@@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
return RetainAutoreleaseRVCallee;
}
+Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attribute);
+ }
+ return RetainRVCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value. We do this late so we do not disrupt the dataflow analysis in
+/// ObjCARCOpt.
+bool
+ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
+ if (!Call)
+ return false;
+ if (Call->getParent() != Retain->getParent())
+ return false;
+
+ // Check that the call is next to the retain.
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return false;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue.
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Transforming objc_retain => "
+ "objc_retainAutoreleasedReturnValue since the operand is a "
+ "return value.\nOld: "<< *Retain << "\n");
+
+ // We do not have to worry about tail calls/does not throw since
+ // retain/retainRV have the same properties.
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+ DEBUG(dbgs() << "New: " << *Retain << "\n");
+ return true;
+}
+
/// Merge an autorelease with a retain into a fused call.
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
StoreStrongCallee = 0;
RetainAutoreleaseCallee = 0;
RetainAutoreleaseRVCallee = 0;
+ RetainRVCallee = 0;
// Initialize RetainRVMarker.
RetainRVMarker = 0;
@@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// objc_retainBlock does not necessarily return its argument.
InstructionClass Class = GetBasicInstructionClass(Inst);
switch (Class) {
- case IC_Retain:
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
break;
@@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
continue;
break;
+ case IC_Retain:
+ // Attempt to convert retains to retainrvs if they are next to function
+ // calls.
+ if (!OptimizeRetainCall(F, Inst))
+ break;
+ // If we succeed in our optimization, fall through.
+ // FALLTHROUGH
case IC_RetainRV: {
// If we're compiling for a target which needs a special inline-asm
// marker to do the retainAutoreleasedReturnValue optimization,
@@ -410,7 +473,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
break;
}
--BBI;
- } while (isNoopInstruction(BBI));
+ } while (IsNoopInstruction(BBI));
if (&*BBI == GetObjCArg(Inst)) {
DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
@@ -429,7 +492,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
case IC_InitWeak: {
// objc_initWeak(p, null) => *p = null
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(1))) {
+ if (IsNullOrUndef(CI->getArgOperand(1))) {
Value *Null =
ConstantPointerNull::get(cast<PointerType>(CI->getType()));
Changed = true;
@@ -453,6 +516,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (isa<AllocaInst>(Inst))
TailOkForStoreStrongs = false;
continue;
+ case IC_IntrinsicUser:
+ // Remove calls to @clang.arc.use(...).
+ Inst->eraseFromParent();
+ continue;
default:
continue;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9c14949877..43e2e20035 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -190,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
do {
const Value *V = Worklist.pop_back_val();
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+ DEBUG(dbgs() << "Visiting: " << *V << "\n");
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const User *UUser = *UI;
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+ DEBUG(dbgs() << "User: " << *UUser << "\n");
// Special - Use by a call (callee or argument) is not considered
// to be an escape.
@@ -206,11 +207,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
case IC_StoreStrong:
case IC_Autorelease:
case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
- "arguments. Pointer Escapes!\n");
+ DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
// These special functions make copies of their pointer arguments.
return true;
}
+ case IC_IntrinsicUser:
+ // Use by the use intrinsic is not an escape.
+ continue;
case IC_User:
case IC_None:
// Use by an instruction which copies the value is an escape if the
@@ -219,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
- "Ptr escapes if result escapes. Adding to list.\n");
+ DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+ " Adding to list.\n");
Worklist.push_back(UUser);
} else {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
- "\n");
+ DEBUG(dbgs() << "Already visited node.\n");
}
continue;
}
@@ -241,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
continue;
}
// Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+ DEBUG(dbgs() << "Assuming ptr escapes.\n");
return true;
}
} while (!Worklist.empty());
// No escapes found.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+ DEBUG(dbgs() << "Ptr does not escape.\n");
return false;
}
@@ -301,6 +303,16 @@ STATISTIC(NumRets, "Number of return value forwarding "
"retain+autoreleaes eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+STATISTIC(NumRetainsBeforeOpt,
+ "Number of retains before optimization.");
+STATISTIC(NumReleasesBeforeOpt,
+ "Number of releases before optimization.");
+#ifndef NDEBUG
+STATISTIC(NumRetainsAfterOpt,
+ "Number of retains after optimization.");
+STATISTIC(NumReleasesAfterOpt,
+ "Number of releases after optimization.");
+#endif
namespace {
/// \enum Sequence
@@ -371,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
namespace {
/// \brief Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverese sequence.
+ /// reverse sequence.
struct RRInfo {
/// After an objc_retain, the reference count of the referenced
/// object is known to be positive. Similarly, before an objc_release, the
@@ -387,10 +399,6 @@ namespace {
/// KnownSafe is true when either of these conditions is satisfied.
bool KnownSafe;
- /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain
- /// calls).
- bool IsRetainBlock;
-
/// True of the objc_release calls are all marked with the "tail" keyword.
bool IsTailCallRelease;
@@ -407,17 +415,18 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownSafe(false), IsRetainBlock(false),
- IsTailCallRelease(false),
- ReleaseMetadata(0) {}
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
void clear();
+
+ bool IsTrackingImpreciseReleases() {
+ return ReleaseMetadata != 0;
+ }
};
}
void RRInfo::clear() {
KnownSafe = false;
- IsRetainBlock = false;
IsTailCallRelease = false;
ReleaseMetadata = 0;
Calls.clear();
@@ -431,7 +440,7 @@ namespace {
/// True if the reference count is known to be incremented.
bool KnownPositiveRefCount;
- /// True of we've seen an opportunity for partial RR elimination, such as
+ /// True if we've seen an opportunity for partial RR elimination, such as
/// pushing calls into a CFG triangle or into one side of a CFG diamond.
bool Partial;
@@ -451,15 +460,16 @@ namespace {
KnownPositiveRefCount = true;
}
- void ClearRefCount() {
+ void ClearKnownPositiveRefCount() {
KnownPositiveRefCount = false;
}
- bool IsKnownIncremented() const {
+ bool HasKnownPositiveRefCount() const {
return KnownPositiveRefCount;
}
void SetSeq(Sequence NewSeq) {
+ DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
Seq = NewSeq;
}
@@ -472,7 +482,8 @@ namespace {
}
void ResetSequenceProgress(Sequence NewSeq) {
- Seq = NewSeq;
+ DEBUG(dbgs() << "Resetting sequence progress.\n");
+ SetSeq(NewSeq);
Partial = false;
RRI.clear();
}
@@ -486,10 +497,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
- // We can't merge a plain objc_retain with an objc_retainBlock.
- if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
- Seq = S_None;
-
// If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
Partial = false;
@@ -698,6 +705,287 @@ void BBState::MergeSucc(const BBState &Other) {
MI->second.Merge(PtrState(), /*TopDown=*/false);
}
+// Only enable ARC Annotations if we are building a debug version of
+// libObjCARCOpts.
+#ifndef NDEBUG
+#define ARC_ANNOTATIONS
+#endif
+
+// Define some macros along the lines of DEBUG and some helper functions to make
+// it cleaner to create annotations in the source code and to no-op when not
+// building in debug mode.
+#ifdef ARC_ANNOTATIONS
+
+#include "llvm/Support/CommandLine.h"
+
+/// Enable/disable ARC sequence annotations.
+static cl::opt<bool>
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+ cl::desc("Enable emission of arc data flow analysis "
+ "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+ cl::desc("Disable check for cfg hazards when "
+ "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+ cl::init(""),
+ cl::desc("filter out all data flow annotations "
+ "but those that apply to the given "
+ "target llvm identifier."));
+
+/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
+/// instruction so that we can track backwards when post processing via the llvm
+/// arc annotation processor tool. If the function is an
+static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
+ Value *Ptr) {
+ MDString *Hash = 0;
+
+ // If pointer is a result of an instruction and it does not have a source
+ // MDNode it, attach a new MDNode onto it. If pointer is a result of
+ // an instruction and does have a source MDNode attached to it, return a
+ // reference to said Node. Otherwise just return 0.
+ if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
+ MDNode *Node;
+ if (!(Node = Inst->getMetadata(NodeId))) {
+ // We do not have any node. Generate and attatch the hash MDString to the
+ // instruction.
+
+ // We just use an MDString to ensure that this metadata gets written out
+ // of line at the module level and to provide a very simple format
+ // encoding the information herein. Both of these makes it simpler to
+ // parse the annotations by a simple external program.
+ std::string Str;
+ raw_string_ostream os(Str);
+ os << "(" << Inst->getParent()->getParent()->getName() << ",%"
+ << Inst->getName() << ")";
+
+ Hash = MDString::get(Inst->getContext(), os.str());
+ Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
+ } else {
+ // We have a node. Grab its hash and return it.
+ assert(Node->getNumOperands() == 1 &&
+ "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
+ Hash = cast<MDString>(Node->getOperand(0));
+ }
+ } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
+ << ")";
+ Hash = MDString::get(Arg->getContext(), os.str());
+ }
+
+ return Hash;
+}
+
+static std::string SequenceToString(Sequence A) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << A;
+ return os.str();
+}
+
+/// Helper function to change a Sequence into a String object using our overload
+/// for raw_ostream so we only have printing code in one location.
+static MDString *SequenceToMDString(LLVMContext &Context,
+ Sequence A) {
+ return MDString::get(Context, SequenceToString(A));
+}
+
+/// A simple function to generate a MDNode which describes the change in state
+/// for Value *Ptr caused by Instruction *Inst.
+static void AppendMDNodeToInstForPtr(unsigned NodeId,
+ Instruction *Inst,
+ Value *Ptr,
+ MDString *PtrSourceMDNodeID,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ MDNode *Node = 0;
+ Value *tmp[3] = {PtrSourceMDNodeID,
+ SequenceToMDString(Inst->getContext(),
+ OldSeq),
+ SequenceToMDString(Inst->getContext(),
+ NewSeq)};
+ Node = MDNode::get(Inst->getContext(),
+ ArrayRef<Value*>(tmp, 3));
+
+ Inst->setMetadata(NodeId, Node);
+}
+
+/// Add to the beginning of the basic block llvm.ptr.annotations which show the
+/// state of a pointer at the entrance to a basic block.
+static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before
+ // continuing.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Add to the end of the basic block llvm.ptr.annotations which show the state
+/// of the pointer at the bottom of the basic block.
+static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before emitting
+ // an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Adds a source annotation to pointer and a state change annotation to Inst
+/// referencing the source annotation and the old/new state of pointer.
+static void GenerateARCAnnotation(unsigned InstMDId,
+ unsigned PtrMDId,
+ Instruction *Inst,
+ Value *Ptr,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ if (EnableARCAnnotations) {
+ // If we have a target identifier, make sure that we match it before
+ // emitting an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
+ // First generate the source annotation on our pointer. This will return an
+ // MDString* if Ptr actually comes from an instruction implying we can put
+ // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
+ // then we know that our pointer is from an Argument so we put a reference
+ // to the argument number.
+ //
+ // The point of this is to make it easy for the
+ // llvm-arc-annotation-processor tool to cross reference where the source
+ // pointer is in the LLVM IR since the LLVM IR parser does not submit such
+ // information via debug info for backends to use (since why would anyone
+ // need such a thing from LLVM IR besides in non standard cases
+ // [i.e. this]).
+ MDString *SourcePtrMDNode =
+ AppendMDNodeToSourcePtr(PtrMDId, Ptr);
+ AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
+ NewSeq);
+ }
+}
+
+// The actual interface for accessing the above functionality is defined via
+// some simple macros which are defined below. We do this so that the user does
+// not need to pass in what metadata id is needed resulting in cleaner code and
+// additionally since it provides an easy way to conditionally no-op all
+// annotation support in a non-debug build.
+
+/// Use this macro to annotate a sequence state change when processing
+/// instructions bottom up,
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+/// Use this macro to annotate a sequence state change when processing
+/// instructions top down.
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+
+#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
+ do { \
+ if (EnableARCAnnotations) { \
+ for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
+ E = (_states)._direction##_ptr_end(); I != E; ++I) { \
+ Value *Ptr = const_cast<Value*>(I->first); \
+ Sequence Seq = I->second.GetSeq(); \
+ GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ } \
+ } \
+ } while (0)
+
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
+ Entrance, bottom_up)
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+ Terminator, bottom_up)
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+ Entrance, top_down)
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+ Terminator, top_down)
+
+#else // !ARC_ANNOTATION
+// If annotations are off, noop.
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
+#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
+#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
+#endif // !ARC_ANNOTATION
+
namespace {
/// \brief The main ARC optimization pass.
class ObjCARCOpt : public FunctionPass {
@@ -711,9 +999,6 @@ namespace {
/// them. These are initialized lazily to avoid cluttering up the Module
/// with unused declarations.
- /// Declaration for ObjC runtime function
- /// objc_retainAutoreleasedReturnValue.
- Constant *RetainRVCallee;
/// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
Constant *AutoreleaseRVCallee;
/// Declaration for ObjC runtime function objc_release.
@@ -738,7 +1023,15 @@ namespace {
/// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
unsigned NoObjCARCExceptionsMDKind;
- Constant *getRetainRVCallee(Module *M);
+#ifdef ARC_ANNOTATIONS
+ /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
+ unsigned ARCAnnotationBottomUpMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
+ unsigned ARCAnnotationTopDownMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
+ unsigned ARCAnnotationProvenanceSourceMDKind;
+#endif // ARC_ANNOATIONS
+
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
@@ -747,10 +1040,11 @@ namespace {
bool IsRetainBlockOptimizable(const Instruction *Inst);
- void OptimizeRetainCall(Function &F, Instruction *Retain);
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
+ bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
+ InstructionClass &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
@@ -804,6 +1098,10 @@ namespace {
void OptimizeReturns(Function &F);
+#ifndef NDEBUG
+ void GatherStatistics(Function &F, bool AfterOptimization = false);
+#endif
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
@@ -851,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
return true;
}
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
- if (!RetainRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
- Attribute);
- }
- return RetainRVCallee;
-}
-
Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
if (!AutoreleaseRVCallee) {
LLVMContext &C = M->getContext();
@@ -946,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
-/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
-/// return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
- ImmutableCallSite CS(GetObjCArg(Retain));
- const Instruction *Call = CS.getInstruction();
- if (!Call) return;
- if (Call->getParent() != Retain->getParent()) return;
-
- // Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (isNoopInstruction(I)) ++I;
- if (&*I != Retain)
- return;
-
- // Turn it to an objc_retainAutoreleasedReturnValue..
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
- "objc_retain => objc_retainAutoreleasedReturnValue"
- " since the operand is a return value.\n"
- " Old: "
- << *Retain << "\n");
-
- cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *Retain << "\n");
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -990,14 +1240,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
if (Call->getParent() == RetainRV->getParent()) {
BasicBlock::const_iterator I = Call;
++I;
- while (isNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock *RetainRVParent = RetainRV->getParent();
if (II->getNormalDest() == RetainRVParent) {
BasicBlock::const_iterator I = RetainRVParent->begin();
- while (isNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
}
@@ -1008,15 +1258,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// pointer. In this case, we can delete the pair.
BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
if (I != Begin) {
- do --I; while (I != Begin && isNoopInstruction(I));
+ do --I; while (I != Begin && IsNoopInstruction(I));
if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
GetObjCArg(I) == Arg) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
- << " Erasing " << *RetainRV
- << "\n");
+ DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+ << "Erasing " << *RetainRV << "\n");
EraseInstruction(I);
EraseInstruction(RetainRV);
@@ -1028,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
- "objc_retainAutoreleasedReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
"objc_retain since the operand is not a return value.\n"
- " Old: "
- << *RetainRV << "\n");
+ "Old = " << *RetainRV << "\n");
cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
- DEBUG(dbgs() << " New: "
- << *RetainRV << "\n");
+ DEBUG(dbgs() << "New = " << *RetainRV << "\n");
return false;
}
@@ -1066,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
- "objc_autoreleaseReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
"objc_autorelease since its operand is not used as a return "
"value.\n"
- " Old: "
- << *AutoreleaseRV << "\n");
+ "Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
AutoreleaseRVCI->
@@ -1079,14 +1323,48 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = IC_Autorelease;
- DEBUG(dbgs() << " New: "
- << *AutoreleaseRV << "\n");
+ DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
+
+}
+
+// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
+// calls.
+//
+// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
+// does not escape (following the rules of block escaping), strength reduce the
+// objc_retainBlock to an objc_retain.
+//
+// TODO: If an objc_retainBlock call is dominated period by a previous
+// objc_retainBlock call, strength reduce the objc_retainBlock to an
+// objc_retain.
+bool
+ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
+ InstructionClass &Class) {
+ assert(GetBasicInstructionClass(Inst) == Class);
+ assert(IC_RetainBlock == Class);
+
+ // If we can not optimize Inst, return false.
+ if (!IsRetainBlockOptimizable(Inst))
+ return false;
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
+ DEBUG(dbgs() << "Old: " << *Inst << "\n");
+ CallInst *RetainBlock = cast<CallInst>(Inst);
+ RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
+ // Remove copy_on_escape metadata.
+ RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
+ Class = IC_Retain;
+ DEBUG(dbgs() << "New: " << *Inst << "\n");
+ return true;
}
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
// Reset all the flags in preparation for recomputing them.
UsedInThisFunction = 0;
@@ -1096,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
InstructionClass Class = GetBasicInstructionClass(Inst);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
- << Class << "; " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
switch (Class) {
default: break;
@@ -1113,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_NoopCast:
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
- " " << *Inst << "\n");
+ DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
EraseInstruction(Inst);
continue;
@@ -1125,18 +1401,15 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_InitWeak:
case IC_DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0))) {
+ if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
continue;
@@ -1146,8 +1419,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_CopyWeak:
case IC_MoveWeak: {
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0)) ||
- isNullOrUndef(CI->getArgOperand(1))) {
+ if (IsNullOrUndef(CI->getArgOperand(0)) ||
+ IsNullOrUndef(CI->getArgOperand(1))) {
Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
@@ -1155,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
@@ -1167,8 +1437,14 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
+ case IC_RetainBlock:
+ // If we strength reduce an objc_retainBlock to an objc_retain, continue
+ // onto the objc_retain peephole optimizations. Otherwise break.
+ if (!OptimizeRetainBlockCall(F, Inst, Class))
+ break;
+ // FALLTHROUGH
case IC_Retain:
- OptimizeRetainCall(F, Inst);
+ ++NumRetainsBeforeOpt;
break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
@@ -1177,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
+ case IC_Release:
+ ++NumReleasesBeforeOpt;
+ break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1193,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *NewCall =
CallInst::Create(getReleaseCallee(F.getParent()),
Call->getArgOperand(0), "", Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind,
- MDNode::get(C, ArrayRef<Value *>()));
+ NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
- "objc_autorelease(x) with objc_release(x) since x is "
- "otherwise unused.\n"
- " Old: " << *Call <<
- "\n New: " <<
- *NewCall << "\n");
+ DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+ "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+ << *NewCall << "\n");
EraseInstruction(Call);
Inst = NewCall;
@@ -1213,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// a tail keyword.
if (IsAlwaysTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
- " to function since it can never be passed stack args: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+ "passed stack args: " << *Inst << "\n");
cast<CallInst>(Inst)->setTailCall();
}
@@ -1223,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// semantics of ARC truly do not do so.
if (IsNeverTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
- "keyword from function: " << *Inst <<
+ DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
"\n");
cast<CallInst>(Inst)->setTailCall(false);
}
@@ -1232,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Set nounwind as needed.
if (IsNoThrow(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
- " class. Setting nounwind on: " << *Inst << "\n");
+ DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+ << "\n");
cast<CallInst>(Inst)->setDoesNotThrow();
}
@@ -1245,11 +1518,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
const Value *Arg = GetObjCArg(Inst);
// ARC calls with null are no-ops. Delete them.
- if (isNullOrUndef(Arg)) {
+ if (IsNullOrUndef(Arg)) {
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
- " null are no-ops. Erasing: " << *Inst << "\n");
+ DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
+ << "\n");
EraseInstruction(Inst);
continue;
}
@@ -1280,7 +1553,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (isNullOrUndef(Incoming))
+ if (IsNullOrUndef(Incoming))
HasNull = true;
else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
.getNumSuccessors() != 1) {
@@ -1334,7 +1607,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (!isNullOrUndef(Incoming)) {
+ if (!IsNullOrUndef(Incoming)) {
CallInst *Clone = cast<CallInst>(CInst->clone());
Value *Op = PN->getIncomingValue(i);
Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
@@ -1343,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Clone->setArgOperand(0, Op);
Clone->insertBefore(InsertPos);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+ DEBUG(dbgs() << "Cloning "
<< *CInst << "\n"
- " And inserting "
- "clone at " << *InsertPos << "\n");
+ "And inserting clone at " << *InsertPos << "\n");
Worklist.push_back(std::make_pair(Clone, Incoming));
}
}
@@ -1358,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
} while (!Worklist.empty());
}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame,
+ bool &ShouldContinue) {
+ switch (SuccSSeq) {
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ ShouldContinue = true;
+ break;
+ }
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame) {
+ switch (SuccSSeq) {
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
}
/// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1371,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I)
- switch (I->second.GetSeq()) {
- default: break;
- case S_Use: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None:
- case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
- continue;
- }
- case S_Use:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
+ E = MyStates.top_down_ptr_end(); I != E; ++I) {
+ PtrState &S = I->second;
+ const Sequence Seq = I->second.GetSeq();
+
+ // We only care about S_Retain, S_CanRelease, and S_Use.
+ if (Seq == S_None)
+ continue;
+
+ // Make sure that if extra top down states are added in the future that this
+ // code is updated to handle it.
+ assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+ "Unknown top down sequence state.");
+
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ const Sequence SuccSSeq = SuccS.GetSeq();
+
+ // If bottom up, the pointer is in an S_None state, clear the sequence
+ // progress since the sequence in the bottom up state finished
+ // suggesting a mismatch in between retains/releases. This is true for
+ // all three cases that we are handling here: S_Retain, S_Use, and
+ // S_CanRelease.
+ if (SuccSSeq == S_None) {
S.ClearSequenceProgress();
- break;
- }
- case S_CanRelease: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
+ continue;
+ }
+
+ // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+ // checks.
+ const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+ // *NOTE* We do not use Seq from above here since we are allowing for
+ // S.GetSeq() to change while we are visiting basic blocks.
+ switch(S.GetSeq()) {
+ case S_Use: {
+ bool ShouldContinue = false;
+ CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+ SomeSuccHasSame, AllSuccsHaveSame,
+ ShouldContinue);
+ if (ShouldContinue)
continue;
- }
- case S_CanRelease:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ break;
+ }
+ case S_CanRelease: {
+ CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+ S, SomeSuccHasSame,
+ AllSuccsHaveSame);
+ break;
+ }
+ case S_Retain:
+ case S_None:
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ break;
}
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
- S.ClearSequenceProgress();
- break;
- }
}
+
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ }
}
bool
@@ -1482,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
InstructionClass Class = GetInstructionClass(Inst);
const Value *Arg = 0;
+ DEBUG(dbgs() << "Class: " << Class << "\n");
+
switch (Class) {
case IC_Release: {
Arg = GetObjCArg(Inst);
@@ -1496,27 +1804,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// pairs by making PtrState hold a stack of states, but this is
// simple and avoids adding overhead for the non-nested case.
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
- DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
- "releases (i.e. a release pair)\n");
+ DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
NestingDetected = true;
}
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
+ Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+ ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
+ S.ResetSequenceProgress(NewSeq);
S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
-
S.SetKnownPositiveRefCount();
break;
}
case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
case IC_Retain:
case IC_RetainRV: {
Arg = GetObjCArg(Inst);
@@ -1524,20 +1831,22 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.SetKnownPositiveRefCount();
- switch (S.GetSeq()) {
+ Sequence OldSeq = S.GetSeq();
+ switch (OldSeq) {
case S_Stop:
case S_Release:
case S_MovableRelease:
case S_Use:
- S.RRI.ReverseInsertPts.clear();
+ // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+ // imprecise release, clear our reverse insertion points.
+ if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_CanRelease:
// Don't do retain+release tracking for IC_RetainRV, because it's
// better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ if (Class != IC_RetainRV)
Retains[Inst] = S.RRI;
- }
S.ClearSequenceProgress();
break;
case S_None:
@@ -1545,7 +1854,9 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
- return NestingDetected;
+ ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
+ // A retain moving bottom up can be a use.
+ break;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -1571,10 +1882,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
+ S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Use:
S.SetSeq(S_CanRelease);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
continue;
case S_CanRelease:
case S_Release:
@@ -1592,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
assert(S.RRI.ReverseInsertPts.empty());
// If this is an invoke instruction, we're scanning it as part of
// one of its successor blocks, since we can't insert code after it
@@ -1601,10 +1917,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
else
S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
S.SetSeq(S_Use);
- } else if (Seq == S_Release &&
- (Class == IC_User || Class == IC_CallOrUser)) {
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ } else if (Seq == S_Release && IsUser(Class)) {
+ DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
assert(S.RRI.ReverseInsertPts.empty());
// As above; handle invoke specially.
if (isa<InvokeInst>(Inst))
@@ -1614,8 +1933,12 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
}
break;
case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class))
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ }
break;
case S_CanRelease:
case S_Use:
@@ -1633,6 +1956,9 @@ bool
ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains) {
+
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -1654,6 +1980,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
}
}
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
Instruction *Inst = llvm::prior(I);
@@ -1662,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (isa<InvokeInst>(Inst))
continue;
- DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
@@ -1677,6 +2007,10 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
}
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+
return NestingDetected;
}
@@ -1690,11 +2024,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (Class) {
case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
case IC_Retain:
case IC_RetainRV: {
Arg = GetObjCArg(Inst);
@@ -1714,9 +2047,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
if (S.GetSeq() == S_Retain)
NestingDetected = true;
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
S.ResetSequenceProgress(S_Retain);
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
S.RRI.Calls.insert(Inst);
}
@@ -1730,17 +2063,23 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.ClearRefCount();
+ S.ClearKnownPositiveRefCount();
+
+ Sequence OldSeq = S.GetSeq();
- switch (S.GetSeq()) {
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+ switch (OldSeq) {
case S_Retain:
case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
+ if (OldSeq == S_Retain || ReleaseMetadata != 0)
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
Releases[Inst] = S.RRI;
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
S.ClearSequenceProgress();
break;
case S_None:
@@ -1776,10 +2115,13 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
+ S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
@@ -1801,8 +2143,12 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible direct uses.
switch (Seq) {
case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class))
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
+ }
break;
case S_Retain:
case S_Use:
@@ -1822,6 +2168,7 @@ bool
ObjCARCOpt::VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -1843,15 +2190,26 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
}
}
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+
+#ifdef ARC_ANNOTATIONS
+ if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
CheckForCFGHazards(BB, BBStates, MyStates);
return NestingDetected;
}
@@ -1983,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Type *ArgTy = Arg->getType();
Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+ DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -1991,20 +2351,12 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
CallInst *Call =
- CallInst::Create(RetainsToMove.IsRetainBlock ?
- getRetainBlockCallee(M) : getRetainCallee(M),
- MyArg, "", InsertPt);
+ CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
Call->setDoesNotThrow();
- if (RetainsToMove.IsRetainBlock)
- Call->setMetadata(CopyOnEscapeMDKind,
- MDNode::get(M->getContext(), ArrayRef<Value *>()));
- else
- Call->setTailCall();
+ Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2021,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
if (ReleasesToMove.IsTailCallRelease)
Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
// Delete the original retain and release calls.
@@ -2034,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRetain = *AI;
Retains.blot(OrigRetain);
DeadInsts.push_back(OrigRetain);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
- "\n");
+ DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
AI = ReleasesToMove.Calls.begin(),
@@ -2043,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRelease = *AI;
Releases.erase(OrigRelease);
DeadInsts.push_back(OrigRelease);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
- << "\n");
+ DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
}
+
}
bool
@@ -2075,7 +2424,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
unsigned OldCount = 0;
unsigned NewCount = 0;
bool FirstRelease = true;
- bool FirstRetain = true;
for (;;) {
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
@@ -2156,16 +2504,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
OldDelta += PathCount;
OldCount += PathCount;
- // Merge the IsRetainBlock values.
- if (FirstRetain) {
- RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
- FirstRetain = false;
- } else if (ReleasesToMove.IsRetainBlock !=
- NewReleaseRetainRRI.IsRetainBlock)
- // It's not possible to merge the sequences if one uses
- // objc_retain and the other uses objc_retainBlock.
- return false;
-
// Collect the optimal insertion points.
if (!KnownSafe)
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2210,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
return false;
+#ifdef ARC_ANNOTATIONS
+ // Do not move calls if ARC annotations are requested.
+ if (EnableARCAnnotations)
+ return false;
+#endif // ARC_ANNOTATIONS
+
Changed = true;
assert(OldCount != 0 && "Unreachable code?");
NumRRs += OldCount - NewCount;
@@ -2228,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
Module *M) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2243,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Instruction *Retain = cast<Instruction>(V);
- DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
- << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
Value *Arg = GetObjCArg(Retain);
@@ -2295,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
/// Weak pointer optimizations.
void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
// First, do memdep-style RLE and S2L optimizations. We can't use memdep
// itself because it uses AliasAnalysis and we need to do provenance
// queries instead.
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
InstructionClass Class = GetBasicInstructionClass(Inst);
if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2392,6 +2738,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
goto clobbered;
case IC_AutoreleasepoolPush:
case IC_None:
+ case IC_IntrinsicUser:
case IC_User:
// Weak pointers are only modified through the weak entry points
// (and arbitrary calls, which could call the weak entry points).
@@ -2449,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
done:;
}
}
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
}
/// Identify program paths which execute sequences of retains and releases which
@@ -2476,6 +2820,88 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
NestingDetected;
}
+/// Check if there is a dependent call earlier that does not have anything in
+/// between the Retain and the call that can affect the reference count of their
+/// shared pointer argument. Note that Retain need not be in BB.
+static bool
+HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
+ DepInsts, Visited, PA);
+ if (DepInsts.size() != 1)
+ return false;
+
+ CallInst *Call =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+ // Check that the pointer is the return value of the call.
+ if (!Call || Arg != Call)
+ return false;
+
+ // Check that the call is a regular call.
+ InstructionClass Class = GetBasicInstructionClass(Call);
+ if (Class != IC_CallOrUser && Class != IC_Call)
+ return false;
+
+ return true;
+}
+
+/// Find a dependent retain that precedes the given autorelease for which there
+/// is nothing in between the two instructions that can affect the ref count of
+/// Arg.
+static CallInst *
+FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
+ Instruction *Autorelease,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(CanChangeRetainCount, Arg,
+ BB, Autorelease, DepInsts, Visited, PA);
+ if (DepInsts.size() != 1)
+ return 0;
+
+ CallInst *Retain =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+ // Check that we found a retain with the same argument.
+ if (!Retain ||
+ !IsRetain(GetBasicInstructionClass(Retain)) ||
+ GetObjCArg(Retain) != Arg) {
+ return 0;
+ }
+
+ return Retain;
+}
+
+/// Look for an ``autorelease'' instruction dependent on Arg such that there are
+/// no instructions dependent on Arg that need a positive ref count in between
+/// the autorelease and the ret.
+static CallInst *
+FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
+ ReturnInst *Ret,
+ SmallPtrSet<Instruction *, 4> &DepInsts,
+ SmallPtrSet<const BasicBlock *, 4> &V,
+ ProvenanceAnalysis &PA) {
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ BB, Ret, DepInsts, V, PA);
+ if (DepInsts.size() != 1)
+ return 0;
+
+ CallInst *Autorelease =
+ dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ if (!Autorelease)
+ return 0;
+ InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+ if (!IsAutorelease(AutoreleaseClass))
+ return 0;
+ if (GetObjCArg(Autorelease) != Arg)
+ return 0;
+
+ return Autorelease;
+}
+
/// Look for this pattern:
/// \code
/// %call = call i8* @something(...)
@@ -2484,122 +2910,91 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
/// ret i8* %3
/// \endcode
/// And delete the retain and autorelease.
-///
-/// Otherwise if it's just this:
-/// \code
-/// %3 = call i8* @objc_autorelease(i8* %2)
-/// ret i8* %3
-/// \endcode
-/// convert the autorelease to autoreleaseRV.
void ObjCARCOpt::OptimizeReturns(Function &F) {
if (!F.getReturnType()->isPointerTy())
return;
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
BasicBlock *BB = FI;
ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
- if (!Ret) continue;
+ if (!Ret)
+ continue;
const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
- FindDependencies(NeedsPositiveRetainCount, Arg,
- BB, Ret, DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
-
- {
- CallInst *Autorelease =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
- if (!Autorelease)
- goto next_block;
- InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
- if (!IsAutorelease(AutoreleaseClass))
- goto next_block;
- if (GetObjCArg(Autorelease) != Arg)
- goto next_block;
-
- DependingInstructions.clear();
- Visited.clear();
-
- // Check that there is nothing that can affect the reference
- // count between the autorelease and the retain.
- FindDependencies(CanChangeRetainCount, Arg,
- BB, Autorelease, DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
-
- {
- CallInst *Retain =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
- // Check that we found a retain with the same argument.
- if (!Retain ||
- !IsRetain(GetBasicInstructionClass(Retain)) ||
- GetObjCArg(Retain) != Arg)
- goto next_block;
-
- DependingInstructions.clear();
- Visited.clear();
-
- // Convert the autorelease to an autoreleaseRV, since it's
- // returning the value.
- if (AutoreleaseClass == IC_Autorelease) {
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease "
- "=> autoreleaseRV since it's returning a value.\n"
- " In: " << *Autorelease
- << "\n");
- Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
- DEBUG(dbgs() << " Out: " << *Autorelease
- << "\n");
- Autorelease->setTailCall(); // Always tail call autoreleaseRV.
- AutoreleaseClass = IC_AutoreleaseRV;
- }
-
- // Check that there is nothing that can affect the reference
- // count between the retain and the call.
- // Note that Retain need not be in BB.
- FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
- DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
- {
- CallInst *Call =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ // Look for an ``autorelease'' instruction that is a predecessor of Ret and
+ // dependent on Arg such that there are no instructions dependent on Arg
+ // that need a positive ref count in between the autorelease and Ret.
+ CallInst *Autorelease =
+ FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
+ DependingInstructions, Visited,
+ PA);
+ DependingInstructions.clear();
+ Visited.clear();
- // Check that the pointer is the return value of the call.
- if (!Call || Arg != Call)
- goto next_block;
+ if (!Autorelease)
+ continue;
- // Check that the call is a regular call.
- InstructionClass Class = GetBasicInstructionClass(Call);
- if (Class != IC_CallOrUser && Class != IC_Call)
- goto next_block;
+ CallInst *Retain =
+ FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+ DependingInstructions, Visited, PA);
+ DependingInstructions.clear();
+ Visited.clear();
- // If so, we can zap the retain and autorelease.
- Changed = true;
- ++NumRets;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
- << "\n Erasing: "
- << *Autorelease << "\n");
- EraseInstruction(Retain);
- EraseInstruction(Autorelease);
- }
- }
- }
+ if (!Retain)
+ continue;
- next_block:
+ // Check that there is nothing that can affect the reference count
+ // between the retain and the call. Note that Retain need not be in BB.
+ bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
+ DependingInstructions,
+ Visited, PA);
DependingInstructions.clear();
Visited.clear();
+
+ if (!HasSafePathToCall)
+ continue;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
+ << *Autorelease << "\n");
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
}
+}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+#ifndef NDEBUG
+void
+ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
+ llvm::Statistic &NumRetains =
+ AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt;
+ llvm::Statistic &NumReleases =
+ AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ default:
+ break;
+ case IC_Retain:
+ ++NumRetains;
+ break;
+ case IC_Release:
+ ++NumReleases;
+ break;
+ }
+ }
}
+#endif
bool ObjCARCOpt::doInitialization(Module &M) {
if (!EnableARCOpts)
@@ -2617,13 +3012,20 @@ bool ObjCARCOpt::doInitialization(Module &M) {
M.getContext().getMDKindID("clang.arc.copy_on_escape");
NoObjCARCExceptionsMDKind =
M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+#ifdef ARC_ANNOTATIONS
+ ARCAnnotationBottomUpMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
+ ARCAnnotationTopDownMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.topdown");
+ ARCAnnotationProvenanceSourceMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
+#endif // ARC_ANNOTATIONS
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
// calls finalizers which can have arbitrary side effects.
// These are initialized lazily.
- RetainRVCallee = 0;
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
@@ -2643,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
Changed = false;
- DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+ DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+ "\n");
PA.setAA(&getAnalysis<AliasAnalysis>());
@@ -2651,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
- // Preliminary optimizations. This also computs UsedInThisFunction.
+ // Preliminary optimizations. This also computes UsedInThisFunction.
OptimizeIndividualCalls(F);
// Optimizations for weak pointers.
@@ -2678,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
(1 << IC_AutoreleaseRV)))
OptimizeReturns(F);
+ // Gather statistics after optimization.
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, true);
+ }
+#endif
+
DEBUG(dbgs() << "\n");
return Changed;
diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
index a841c64a9f..03e12d4fd7 100644
--- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
return OS << "IC_Call";
case IC_User:
return OS << "IC_User";
+ case IC_IntrinsicUser:
+ return OS << "IC_IntrinsicUser";
case IC_None:
return OS << "IC_None";
}
@@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- // No arguments.
+ // No (mandatory) arguments.
if (AI == AE)
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Case("clang.arc.use", IC_IntrinsicUser)
.Default(IC_CallOrUser);
// One argument.
@@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_moveWeak", IC_MoveWeak)
.Case("objc_copyWeak", IC_CopyWeak)
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
+ .Case("llvm.arc.annotation.topdown.bbend", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
.Default(IC_CallOrUser);
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d71dd5dec6..f0d29c88a8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -154,7 +155,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!DefIsLiveOut)
return false;
- // Make sure non of the uses are PHI nodes.
+ // Make sure none of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447f1c..f350b9bbde 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1714,7 +1649,73 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1757,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1920,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2261,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d646..4796eb2953 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -64,10 +65,16 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +85,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -87,6 +111,7 @@ namespace {
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
+ virtual bool doFinalization(Module &M);
const char *getPassName() const {
return "Merge internal globals";
@@ -169,6 +194,42 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +237,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +262,10 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +287,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
@@ -234,6 +300,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
return false;
}
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
return new GlobalMerge(tli);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 97fff7e782..8e76c78f5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
+ // Computing the value outside of the loop brings no benefit if :
+ // - it is definitely used inside the loop in a way which can not be
+ // optimized away.
+ // - no use outside of the loop can take advantage of hoisting the
+ // computation out of the loop
+ if (ExitValue->getSCEVType()>=scMulExpr) {
+ unsigned NumHardInternalUses = 0;
+ unsigned NumSoftExternalUses = 0;
+ unsigned NumUses = 0;
+ for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+ IB!=IE && NumUses<=6 ; ++IB) {
+ Instruction *UseInstr = cast<Instruction>(*IB);
+ unsigned Opc = UseInstr->getOpcode();
+ NumUses++;
+ if (L->contains(UseInstr)) {
+ if (Opc == Instruction::Call || Opc == Instruction::Ret)
+ NumHardInternalUses++;
+ } else {
+ if (Opc == Instruction::PHI) {
+ // Do not count the Phi as a use. LCSSA may have inserted
+ // plenty of trivial ones.
+ NumUses--;
+ for (Value::use_iterator PB=UseInstr->use_begin(),
+ PE=UseInstr->use_end();
+ PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+ if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ continue;
+ }
+ if (Opc != Instruction::Call && Opc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ }
+ if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+ continue;
+ }
+
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9c67e327e2..0b62050b17 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -34,13 +34,9 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop* L, LPPassManager& LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
- bool &Changed, BasicBlock *Preheader);
-
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
+
+ private:
+ bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
};
}
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
return new LoopDeletion();
}
-/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// isLoopDead - Determined if a loop is dead. This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
- SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+ SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
bool &Changed, BasicBlock *Preheader) {
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
- Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
- for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
- if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock* preheader = L->getLoopPreheader();
+ BasicBlock *preheader = L->getLoopPreheader();
if (!preheader)
return false;
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst* TI = preheader->getTerminator();
+ TerminatorInst *TI = preheader->getTerminator();
TI->replaceUsesOfWith(L->getHeader(), exitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock *exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
int j = P->getBasicBlockIndex(exitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTree>();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ LoopInfo &loopInfo = getAnalysis<LoopInfo>();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae953e5..14c5655f08 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4e4cb86464..73e44d7edf 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
@@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
@@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 0da3746950..a3c241dc0f 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -110,6 +110,51 @@ namespace {
}
};
};
+
+ /// Utility class representing a non-constant Xor-operand. We classify
+ /// non-constant Xor-Operands into two categories:
+ /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
+ /// C2)
+ /// C2.1) The operand is in the form of "X | C", where C is a non-zero
+ /// constant.
+ /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+ /// operand as "E | 0"
+ class XorOpnd {
+ public:
+ XorOpnd(Value *V);
+ const XorOpnd &operator=(const XorOpnd &That);
+
+ bool isInvalid() const { return SymbolicPart == 0; }
+ bool isOrExpr() const { return isOr; }
+ Value *getValue() const { return OrigVal; }
+ Value *getSymbolicPart() const { return SymbolicPart; }
+ unsigned getSymbolicRank() const { return SymbolicRank; }
+ const APInt &getConstPart() const { return ConstPart; }
+
+ void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+ // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
+ // The purpose is twofold:
+ // 1) Cluster together the operands sharing the same symbolic-value.
+ // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
+ // could potentially shorten crital path, and expose more loop-invariants.
+ // Note that values' rank are basically defined in RPO order (FIXME).
+ // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
+ // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+ // "z" in the order of X-Y-Z is better than any other orders.
+ struct PtrSortFunctor {
+ bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
+ }
+ };
+ private:
+ Value *OrigVal;
+ Value *SymbolicPart;
+ APInt ConstPart;
+ unsigned SymbolicRank;
+ bool isOr;
+ };
}
namespace {
@@ -137,6 +182,11 @@ namespace {
Value *OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
+ Value *&Res);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res);
bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
SmallVectorImpl<Factor> &Factors);
Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
@@ -148,6 +198,42 @@ namespace {
};
}
+XorOpnd::XorOpnd(Value *V) {
+ assert(!isa<ConstantInt>(V) && "No ConstantInt");
+ OrigVal = V;
+ Instruction *I = dyn_cast<Instruction>(V);
+ SymbolicRank = 0;
+
+ if (I && (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And)) {
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ if (isa<ConstantInt>(V0))
+ std::swap(V0, V1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
+ ConstPart = C->getValue();
+ SymbolicPart = V0;
+ isOr = (I->getOpcode() == Instruction::Or);
+ return;
+ }
+ }
+
+ // view the operand as "V | 0"
+ SymbolicPart = V;
+ ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+ isOr = true;
+}
+
+const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
+ OrigVal = That.OrigVal;
+ SymbolicPart = That.SymbolicPart;
+ ConstPart = That.ConstPart;
+ SymbolicRank = That.SymbolicRank;
+ isOr = That.isOr;
+ return *this;
+}
+
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
"Reassociate expressions", false, false)
@@ -1040,6 +1126,250 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
return 0;
}
+/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// instruction with the given two operands, and return the resulting
+/// instruction. There are two special cases: 1) if the constant operand is 0,
+/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
+/// be returned.
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+ const APInt &ConstOpnd) {
+ if (ConstOpnd != 0) {
+ if (!ConstOpnd.isAllOnesValue()) {
+ LLVMContext &Ctx = Opnd->getType()->getContext();
+ Instruction *I;
+ I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
+ "and.ra", InsertBefore);
+ I->setDebugLoc(InsertBefore->getDebugLoc());
+ return I;
+ }
+ return Opnd;
+ }
+ return 0;
+}
+
+// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
+// into "R ^ C", where C would be 0, and R is a symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
+// and both "Res" and "ConstOpnd" remain unchanged.
+//
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+ APInt &ConstOpnd, Value *&Res) {
+ // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
+ // = ((x | c1) ^ c1) ^ (c1 ^ c2)
+ // = (x & ~c1) ^ (c1 ^ c2)
+ // It is useful only when c1 == c2.
+ if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
+ if (!Opnd1->getValue()->hasOneUse())
+ return false;
+
+ const APInt &C1 = Opnd1->getConstPart();
+ if (C1 != ConstOpnd)
+ return false;
+
+ Value *X = Opnd1->getSymbolicPart();
+ Res = createAndInstr(I, X, ~C1);
+ // ConstOpnd was C2, now C1 ^ C2.
+ ConstOpnd ^= C1;
+
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ return true;
+ }
+ return false;
+}
+
+
+// Helper function of OptimizeXor(). It tries to simplify
+// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively (If the entire expression is
+// evaluated to a constant, the Res is set to NULL); otherwise, false is
+// returned, and both "Res" and "ConstOpnd" remain unchanged.
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res) {
+ Value *X = Opnd1->getSymbolicPart();
+ if (X != Opnd2->getSymbolicPart())
+ return false;
+
+ // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
+ int DeadInstNum = 1;
+ if (Opnd1->getValue()->hasOneUse())
+ DeadInstNum++;
+ if (Opnd2->getValue()->hasOneUse())
+ DeadInstNum++;
+
+ // Xor-Rule 2:
+ // (x | c1) ^ (x & c2)
+ // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
+ // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
+ // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
+ //
+ if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
+ if (Opnd2->isOrExpr())
+ std::swap(Opnd1, Opnd2);
+
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
+ APInt C3((~C1) ^ C2);
+
+ // Do not increase code size!
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C1;
+
+ } else if (Opnd1->isOrExpr()) {
+ // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
+ //
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
+ APInt C3 = C1 ^ C2;
+
+ // Do not increase code size
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C3;
+ } else {
+ // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
+ //
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
+ APInt C3 = C1 ^ C2;
+ Res = createAndInstr(I, X, C3);
+ }
+
+ // Put the original operands in the Redo list; hope they will be deleted
+ // as dead code.
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
+ RedoInsts.insert(T);
+
+ return true;
+}
+
+/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
+/// to a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+Value *Reassociate::OptimizeXor(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
+ return V;
+
+ if (Ops.size() == 1)
+ return 0;
+
+ SmallVector<XorOpnd, 8> Opnds;
+ SmallVector<XorOpnd*, 8> OpndPtrs;
+ Type *Ty = Ops[0].Op->getType();
+ APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+
+ // Step 1: Convert ValueEntry to XorOpnd
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *V = Ops[i].Op;
+ if (!isa<ConstantInt>(V)) {
+ XorOpnd O(V);
+ O.setSymbolicRank(getRank(O.getSymbolicPart()));
+ Opnds.push_back(O);
+ } else
+ ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+ }
+
+ // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+ // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+ // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+ // with the previous loop --- the iterator of the "Opnds" may be invalidated
+ // when new elements are added to the vector.
+ for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+ OpndPtrs.push_back(&Opnds[i]);
+
+ // Step 2: Sort the Xor-Operands in a way such that the operands containing
+ // the same symbolic value cluster together. For instance, the input operand
+ // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
+ // ("x | 123", "x & 789", "y & 456").
+ std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+
+ // Step 3: Combine adjacent operands
+ XorOpnd *PrevOpnd = 0;
+ bool Changed = false;
+ for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd *CurrOpnd = OpndPtrs[i];
+ // The combined value
+ Value *CV;
+
+ // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
+ if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ Changed = true;
+ if (CV)
+ *CurrOpnd = XorOpnd(CV);
+ else {
+ CurrOpnd->Invalidate();
+ continue;
+ }
+ }
+
+ if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
+ PrevOpnd = CurrOpnd;
+ continue;
+ }
+
+ // step 3.2: When previous and current operands share the same symbolic
+ // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
+ //
+ if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+ // Remove previous operand
+ PrevOpnd->Invalidate();
+ if (CV) {
+ *CurrOpnd = XorOpnd(CV);
+ PrevOpnd = CurrOpnd;
+ } else {
+ CurrOpnd->Invalidate();
+ PrevOpnd = 0;
+ }
+ Changed = true;
+ }
+ }
+
+ // Step 4: Reassemble the Ops
+ if (Changed) {
+ Ops.clear();
+ for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd &O = Opnds[i];
+ if (O.isInvalid())
+ continue;
+ ValueEntry VE(getRank(O.getValue()), O.getValue());
+ Ops.push_back(VE);
+ }
+ if (ConstOpnd != 0) {
+ Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+ ValueEntry VE(getRank(C), C);
+ Ops.push_back(VE);
+ }
+ int Sz = Ops.size();
+ if (Sz == 1)
+ return Ops.back().Op;
+ else if (Sz == 0) {
+ assert(ConstOpnd == 0);
+ return ConstantInt::get(Ty->getContext(), ConstOpnd);
+ }
+ }
+
+ return 0;
+}
+
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
/// optimizes based on identities. If it can be reduced to a single Value, it
/// is returned, otherwise the Ops list is mutated as necessary.
@@ -1431,11 +1761,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
default: break;
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
return Result;
break;
+ case Instruction::Xor:
+ if (Value *Result = OptimizeXor(I, Ops))
+ return Result;
+ break;
+
case Instruction::Add:
if (Value *Result = OptimizeAdd(I, Ops))
return Result;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e90fe907d5..d073e789dc 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,15 @@
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted, "Number of instructions deleted");
-STATISTIC(NumVectorized, "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to force the pass to not use DomTree and mem2reg, instead
/// forming SSA values through the SSAUpdater infrastructure.
@@ -69,112 +73,167 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief A partition of an alloca.
+ /// \brief Support for ordering ranges.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
+
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
- /// \brief A particular use of a partition of the alloca.
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +515,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +533,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +555,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +566,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +858,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +873,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +889,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -868,7 +929,7 @@ private:
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue();
- MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+ const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
Offsets.DestBegin == Offsets.SourceBegin)
return markAsDead(II); // Skip identity transfers without side-effects.
@@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
splitAndMergePartitions();
}
+ // Record how many partitions we end up with.
+ NumAllocaPartitions += Partitions.size();
+ MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
// Now build up the user lists for each of these disjoint partitions by
// re-walking the recursive users of the alloca.
Uses.resize(Partitions.size());
@@ -1084,26 +1149,34 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
PtrI = UB.visitPtr(AI);
assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+ unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+ NumUses += Uses[Idx].size();
+#endif
+ NumAllocaPartitionUses += NumUses;
+ MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
- } else {
+ else
return 0; // Bail if we have weird uses.
- }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
@@ -1140,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1244,12 +1318,12 @@ public:
// may be zapped by an optimization pass in future.
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(SExt->getOperand(0));
if (!Arg)
- Arg = SI->getOperand(0);
+ Arg = SI->getValueOperand();
} else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getOperand(0);
+ Arg = LI->getPointerOperand();
} else {
continue;
}
@@ -1375,11 +1449,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1473,7 +1547,7 @@ private:
assert(!Loads.empty());
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilder<> PHIBuilder(&PN);
+ IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
@@ -1496,7 +1570,7 @@ private:
TerminatorInst *TI = Pred->getTerminator();
Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
Value *InVal = PN.getIncomingValue(Idx);
- IRBuilder<> PredBuilder(TI);
+ IRBuilderTy PredBuilder(TI);
LoadInst *Load
= PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1523,8 +1597,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1571,16 +1645,16 @@ private:
void visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// If the select isn't safe to speculate, just use simple logic to emit it.
SmallVector<LoadInst *, 4> Loads;
if (!isSafeSelectToSpeculate(SI, Loads))
return;
+ IRBuilderTy IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1591,7 +1665,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1623,8 +1697,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1641,9 +1715,8 @@ private:
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices) {
if (Indices.empty())
return BasePtr;
@@ -1652,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1664,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1696,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
}
/// \brief Recursively compute indices for a natural GEP.
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1729,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, Prefix);
+ Offset, TargetTy, Indices);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1742,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
StructType *STy = dyn_cast<StructType>(Ty);
@@ -1761,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1774,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1796,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1814,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
- Value *Ptr, APInt Offset, Type *PointerTy,
- const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1850,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
- Indices, Prefix)) {
+ Indices)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -1885,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- Prefix + ".raw_cast");
+ "raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- Prefix + ".raw_idx");
+ "raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
return Ptr;
}
@@ -1911,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1933,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// This will try various different casting techniques, such as bitcasts,
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *Ty) {
assert(canConvertValue(DL, V->getType(), Ty) &&
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1977,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1997,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2063,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2074,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2089,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2105,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2125,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
DEBUG(dbgs() << " start: " << *V << "\n");
@@ -2148,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2179,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
-static Value *extractVector(IRBuilder<> &IRB, Value *V,
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
unsigned BeginIndex, unsigned EndIndex,
const Twine &Name) {
VectorType *VecTy = cast<VectorType>(V->getType());
@@ -2207,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
unsigned BeginIndex, const Twine &Name) {
VectorType *VecTy = cast<VectorType>(Old->getType());
assert(VecTy && "Can only insert a vector into a vector");
@@ -2243,17 +2322,15 @@ static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".expand");
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
- Name + "insert");
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+ V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+ DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
@@ -2297,11 +2374,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // The name prefix to use when rewriting instructions for this alloca.
- std::string NamePrefix;
+ // Utility IR builder, whose name prefix is setup for each visited use, and
+ // the insertion point is set to point to the user.
+ IRBuilderTy IRB;
public:
AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2314,7 +2393,8 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+ IRB(NewAI.getContext(), ConstantFolder()) {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2336,14 +2416,21 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
- NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2365,14 +2452,10 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Twine getName(const Twine &Suffix) {
- return NamePrefix + Suffix;
- }
-
- Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
}
/// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2422,27 +2505,27 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
+ Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || EndOffset < NewAllocaEndOffset)
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
+ "extract");
return V;
}
@@ -2450,58 +2533,39 @@ private:
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(IRB, LI);
+ V = rewriteIntegerLoad(LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
canConvertValue(TD, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
} else {
Type *LTy = TargetTy->getPointerTo();
V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
getPartitionTypeAlign(TargetTy),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
IsPtrAdjusted = true;
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2511,7 +2575,7 @@ private:
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
- getName(".insert"));
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
@@ -2525,7 +2589,7 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ bool rewriteVectorizedStoreInst(Value *V,
StoreInst &SI, Value *OldOp) {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
@@ -2540,8 +2604,8 @@ private:
// Mix in the existing elements.
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2551,17 +2615,17 @@ private:
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
- getName(".insert"));
+ "insert");
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2575,7 +2639,6 @@ private:
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&SI);
Value *V = SI.getValueOperand();
@@ -2588,26 +2651,25 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
- getName(".extract"));
+ "extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(IRB, V, SI);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset &&
canConvertValue(TD, V->getType(), NewAllocaTy)) {
V = convertValue(TD, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -2635,7 +2697,7 @@ private:
///
/// \param V The i8 value to splat.
/// \param Size The number of bytes in the output (assuming i8 is one byte)
- Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+ Value *getIntegerSplat(Value *V, unsigned Size) {
assert(Size > 0 && "Expected a positive number of bytes.");
IntegerType *VTy = cast<IntegerType>(V->getType());
assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
@@ -2643,26 +2705,25 @@ private:
return V;
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
ConstantExpr::getUDiv(
Constant::getAllOnesValue(SplatIntTy),
ConstantExpr::getZExt(
Constant::getAllOnesValue(V->getType()),
SplatIntTy)),
- getName(".isplat"));
+ "isplat");
return V;
}
/// \brief Compute a vector splat for a given element value.
- Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
- V = IRB.CreateVectorSplat(NumElements, V, NamePrefix);
+ Value *getVectorSplat(Value *V, unsigned NumElements) {
+ V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawDest() == OldPtr);
// If the memset has a variable size, it cannot be split, just adjust the
@@ -2719,31 +2780,31 @@ private:
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- Value *Splat = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ElementTy)/8);
+ Value *Splat =
+ getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
Splat = convertValue(TD, IRB, Splat, ElementTy);
if (NumElements > 1)
- Splat = getVectorSplat(IRB, Splat, NumElements);
+ Splat = getVectorSplat(Splat, NumElements);
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+ "oldload");
+ V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
// set integer.
assert(!II.isVolatile());
uint64_t Size = EndOffset - BeginOffset;
- V = getIntegerSplat(IRB, II.getValue(), Size);
+ V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ V = insertInteger(TD, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
@@ -2754,10 +2815,9 @@ private:
assert(BeginOffset == NewAllocaBeginOffset);
assert(EndOffset == NewAllocaEndOffset);
- V = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ScalarTy)/8);
+ V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
- V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+ V = getVectorSplat(V, AllocaVecTy->getNumElements());
V = convertValue(TD, IRB, V, AllocaTy);
}
@@ -2774,7 +2834,6 @@ private:
// them into two categories: split intrinsics and unsplit intrinsics.
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
bool IsDest = II.getRawDest() == OldPtr;
@@ -2858,8 +2917,7 @@ private:
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
@@ -2902,8 +2960,7 @@ private:
OtherPtrTy = SubIntTy->getPointerTo();
}
- Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
@@ -2911,31 +2968,31 @@ private:
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
Src = convertValue(TD, IRB, Src, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
- getName(".copyload"));
+ "copyload");
}
if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+ "oldload");
+ Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
@@ -2950,7 +3007,6 @@ private:
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
@@ -2978,7 +3034,9 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+ IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
@@ -2991,17 +3049,16 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
-
- // Find the operand we need to rewrite here.
- bool IsTrueVal = SI.getTrueValue() == OldPtr;
- if (IsTrueVal)
- assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
- else
- assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+ assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+ "Pointer isn't an operand!");
Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
- SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ // Replace the operands which were using the old pointer.
+ if (SI.getOperand(1) == OldPtr)
+ SI.setOperand(1, NewPtr);
+ if (SI.getOperand(2) == OldPtr)
+ SI.setOperand(2, NewPtr);
+
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
@@ -3066,7 +3123,7 @@ private:
class OpSplitter {
protected:
/// The builder used to form new instructions.
- IRBuilder<> IRB;
+ IRBuilderTy IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
@@ -3278,12 +3335,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
return 0;
- if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+ } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
return 0;
+ }
Offset -= NumSkippedElements * ElementSize;
// First check if we need to recurse.
@@ -3381,7 +3439,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a374ea..bfde334c36 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal) {
- // Non constant GEPs are only a problem on arrays, structs, and pointers
- // Vectors can be dynamically indexed.
- // FIXME: Add support for dynamic indexing on arrays. This should be
- // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
- // isn't.
- if (!(*GEPIt)->isVectorTy())
- return MarkUnsafe(Info, GEPI);
- NonConstant = true;
- NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
- }
+ if (!IdxVal)
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 916b37d4a8..3514e6c2aa 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
@@ -35,7 +34,6 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
//===----------------------------------------------------------------------===//
// Optimizer Base Class
@@ -91,8 +89,6 @@ namespace {
TargetLibraryInfo *TLI;
StringMap<LibCallOptimization*> Optimizations;
-
- bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
SimplifyLibCalls() : FunctionPass(ID) {
@@ -104,14 +100,6 @@ namespace {
void InitOptimizations();
bool runOnFunction(Function &F);
- void setDoesNotAccessMemory(Function &F);
- void setOnlyReadsMemory(Function &F);
- void setDoesNotThrow(Function &F);
- void setDoesNotCapture(Function &F, unsigned n);
- void setDoesNotAlias(Function &F, unsigned n);
- bool doInitialization(Module &M);
-
- void inferPrototypeAttributes(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
}
@@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
return Changed;
}
-// Utility methods for doInitialization.
-
-void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-
-
-void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
- FunctionType *FTy = F.getFunctionType();
-
- StringRef Name = F.getName();
- switch (Name[0]) {
- case 's':
- if (Name == "strlen") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strchr" ||
- Name == "strrchr") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "strcpy" ||
- Name == "stpcpy" ||
- Name == "strcat" ||
- Name == "strtol" ||
- Name == "strtod" ||
- Name == "strtof" ||
- Name == "strtoul" ||
- Name == "strtoll" ||
- Name == "strtold" ||
- Name == "strncat" ||
- Name == "strncpy" ||
- Name == "stpncpy" ||
- Name == "strtoull") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strxfrm") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strcmp" ||
- Name == "strspn" ||
- Name == "strncmp" ||
- Name == "strcspn" ||
- Name == "strcoll" ||
- Name == "strcasecmp" ||
- Name == "strncasecmp") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strstr" ||
- Name == "strpbrk") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strtok" ||
- Name == "strtok_r") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "scanf" ||
- Name == "setbuf" ||
- Name == "setvbuf") {
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strdup" ||
- Name == "strndup") {
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "stat" ||
- Name == "sscanf" ||
- Name == "sprintf" ||
- Name == "statvfs") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "snprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- } else if (Name == "setitimer") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- } else if (Name == "system") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- case 'm':
- if (Name == "malloc") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "memcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "memchr" ||
- Name == "memrchr") {
- if (FTy->getNumParams() != 3)
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "modf" ||
- Name == "modff" ||
- Name == "modfl" ||
- Name == "memcpy" ||
- Name == "memccpy" ||
- Name == "memmove") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "memalign") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotAlias(F, 0);
- } else if (Name == "mkdir" ||
- Name == "mktime") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'r':
- if (Name == "realloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "read") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- } else if (Name == "rmdir" ||
- Name == "rewind" ||
- Name == "remove" ||
- Name == "realpath") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "rename" ||
- Name == "readlink") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'w':
- if (Name == "write") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- }
- break;
- case 'b':
- if (Name == "bcopy") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bzero") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'c':
- if (Name == "calloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "chmod" ||
- Name == "chown" ||
- Name == "ctermid" ||
- Name == "clearerr" ||
- Name == "closedir") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'a':
- if (Name == "atoi" ||
- Name == "atol" ||
- Name == "atof" ||
- Name == "atoll") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "access") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'f':
- if (Name == "fopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "fdopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- } else if (Name == "feof" ||
- Name == "free" ||
- Name == "fseek" ||
- Name == "ftell" ||
- Name == "fgetc" ||
- Name == "fseeko" ||
- Name == "ftello" ||
- Name == "fileno" ||
- Name == "fflush" ||
- Name == "fclose" ||
- Name == "fsetpos" ||
- Name == "flockfile" ||
- Name == "funlockfile" ||
- Name == "ftrylockfile") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "ferror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- } else if (Name == "fputc" ||
- Name == "fstat" ||
- Name == "frexp" ||
- Name == "frexpf" ||
- Name == "frexpl" ||
- Name == "fstatvfs") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "fgets") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- } else if (Name == "fread" ||
- Name == "fwrite") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- } else if (Name == "fputs" ||
- Name == "fscanf" ||
- Name == "fprintf" ||
- Name == "fgetpos") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'g':
- if (Name == "getc" ||
- Name == "getlogin_r" ||
- Name == "getc_unlocked") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "getenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "gets" ||
- Name == "getchar") {
- setDoesNotThrow(F);
- } else if (Name == "getitimer") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "getpwnam") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'u':
- if (Name == "ungetc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "uname" ||
- Name == "unlink" ||
- Name == "unsetenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "utime" ||
- Name == "utimes") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'p':
- if (Name == "putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "puts" ||
- Name == "printf" ||
- Name == "perror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "pread" ||
- Name == "pwrite") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; these are valid pthread cancellation points.
- setDoesNotCapture(F, 2);
- } else if (Name == "putchar") {
- setDoesNotThrow(F);
- } else if (Name == "popen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "pclose") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'v':
- if (Name == "vscanf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vsscanf" ||
- Name == "vfscanf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "valloc") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "vprintf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vfprintf" ||
- Name == "vsprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "vsnprintf") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- }
- break;
- case 'o':
- if (Name == "open") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- } else if (Name == "opendir") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- }
- break;
- case 't':
- if (Name == "tmpfile") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "times") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'h':
- if (Name == "htonl" ||
- Name == "htons") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'n':
- if (Name == "ntohl" ||
- Name == "ntohs") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'l':
- if (Name == "lstat") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "lchown") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'q':
- if (Name == "qsort") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- }
- break;
- case '_':
- if (Name == "__strdup" ||
- Name == "__strndup") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "__strtok_r") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "_IO_getc") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "_IO_putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- }
- break;
- case 1:
- if (Name == "\1__isoc99_scanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1stat64" ||
- Name == "\1lstat64" ||
- Name == "\1statvfs64" ||
- Name == "\1__isoc99_sscanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fopen64") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fseeko64" ||
- Name == "\1ftello64") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1tmpfile64") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "\1fstat64" ||
- Name == "\1fstatvfs64") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1open64") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- }
-}
-
-/// doInitialization - Add attributes to well-known functions.
-///
-bool SimplifyLibCalls::doInitialization(Module &M) {
- Modified = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (F.isDeclaration() && F.hasName())
- inferPrototypeAttributes(F);
- }
- return Modified;
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 00cda8e034..1f517d038d 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
// Compare operand values and branch
- Value *ZeroV = MainBuilder.getInt32(0);
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
// Get bitwidth of div/rem instruction
IntegerType *T = cast<IntegerType>(J->getType());
- int bitwidth = T->getBitWidth();
+ unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a309bce544..be8d39e128 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,26 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
- // Clone any attributes.
- if (NewFunc->arg_size() == OldFunc->arg_size())
- NewFunc->copyAttributesFrom(OldFunc);
- else {
- //Some arguments were deleted with the VMap. Copy arguments one by one
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
- Anew->addAttr(OldFunc->getAttributes()
- .getParamAttributes(I->getArgNo() + 1));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldFunc->getAttributes()));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldFunc->getAttributes()));
+ AttributeSet OldAttrs = OldFunc->getAttributes();
+ // Clone any argument attributes that are present in the VMap.
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end();
+ I != E; ++I)
+ if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs =
+ OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
- }
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a221..dabb67b921 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,8 @@ namespace {
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
- void forwardResume(ResumeInst *RI);
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
/// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
/// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// block. When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
- // rewrite. If the code doesn't have calls or unwinds, we know there is
- // nothing to rewrite.
- if (!InlinedCodeInfo.ContainsCalls) {
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- InvokeDest->removePredecessor(II->getParent());
- return;
- }
-
+ // rewrite.
InvokeInliningInfo Invoke(II);
-
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
continue;
}
+ // Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
- Invoke.forwardResume(RI);
+ Invoke.forwardResume(RI, InlinedLPads);
}
// Now that everything is happy, we have one final detail. The PHI nodes in
// the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
+ // invoke instruction. Eliminate these entries (which might even delete the
// PHI node) now.
InvokeDest->removePredecessor(II->getParent());
}
@@ -748,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If the call site was an invoke instruction, add a branch to the normal
// destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- BranchInst::Create(II->getNormalDest(), TheCall);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
// If the return instruction returned a value, replace uses of the call with
// uses of the returned value.
@@ -777,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = NULL;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
CalledFunc->getName()+".exit");
} else { // It's a call
@@ -840,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
ReturnInst *RI = Returns[i];
- BranchInst::Create(AfterCallBB, RI);
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
RI->eraseFromParent();
}
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
} else if (!Returns.empty()) {
// Otherwise, if there is exactly one return value, just replace anything
// using the return value of the call with the computed value.
@@ -864,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AfterCallBB->getInstList().splice(AfterCallBB->begin(),
ReturnBB->getInstList());
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
// Delete the return instruction now and empty ReturnBB now.
Returns[0]->eraseFromParent();
ReturnBB->eraseFromParent();
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a54ee08b67..12e5b3e9d2 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
/// Dbg Intrinsic utilities
///
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar)
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
@@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, SI))
+ return true;
+
Instruction *DbgVal = NULL;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
return true;
}
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
@@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, LI))
+ return true;
+
Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
@@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ // We only remove the dbg.declare intrinsic if all uses are
+ // converted to dbg.value intrinsics.
bool RemoveDDI = true;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
@@ -985,22 +1010,17 @@ bool llvm::removeUnreachableBlocks(Function &F) {
if (Reachable.count(I))
continue;
- // Remove the block as predecessor of all its reachable successors.
- // Unreachable successors don't matter as they'll soon be removed, too.
for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(I);
+ I->dropAllReferences();
+ }
- // Zap all instructions in this basic block.
- while (!I->empty()) {
- Instruction &Inst = I->back();
- if (!Inst.use_empty())
- Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
- I->getInstList().pop_back();
- }
+ for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
- --I;
- llvm::next(I)->eraseFromParent();
- }
return true;
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a63d31d5af..052ad85551 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
/// \brief Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
return false;
- if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
// Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
+ // used in BB. The transformation will prevent the operand from
// being sunk into the use block.
for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (!HaveRewritablePHIs)
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
llvm::prior(ThenBB->end()));
@@ -2789,9 +2875,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
+ bool InvokeRequiresTableEntry = false;
+ bool Changed = false;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+ if (II->hasFnAttr(Attribute::UWTable)) {
+ // Don't remove an `invoke' instruction if the ABI requires an entry into
+ // the table.
+ InvokeRequiresTableEntry = true;
+ continue;
+ }
+
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
Call->takeName(II);
@@ -2811,11 +2908,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
// Finally, delete the invoke instruction!
II->eraseFromParent();
+ Changed = true;
}
- // The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
- return true;
+ if (!InvokeRequiresTableEntry)
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
+ return Changed;
}
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3059,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
- Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
@@ -3944,11 +4049,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
- return LI->getPointerAddressSpace() == 0;
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
// Store to null is undefined.
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
- return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
}
return false;
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b25febaa14..cadec21c50 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,11 +15,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -798,8 +800,7 @@ struct StrToOpt : public LibCallOptimization {
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
// It would be readonly too, except that it still may write to errno.
- CI->addAttribute(1, Attribute::get(Callee->getContext(),
- Attribute::NoCapture));
+ CI->addAttribute(1, Attribute::NoCapture);
}
return 0;
@@ -1517,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return 0;
+
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1526,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
- Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+ return EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1543,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
- TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
@@ -1673,67 +1677,17 @@ class LibCallSimplifierImpl {
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
bool UnsafeFPShrink;
- StringMap<LibCallOptimization*, BumpPtrAllocator> Optimizations;
-
- // Fortified library call optimizations.
- MemCpyChkOpt MemCpyChk;
- MemMoveChkOpt MemMoveChk;
- MemSetChkOpt MemSetChk;
- StrCpyChkOpt StrCpyChk;
- StpCpyChkOpt StpCpyChk;
- StrNCpyChkOpt StrNCpyChk;
-
- // String library call optimizations.
- StrCatOpt StrCat;
- StrNCatOpt StrNCat;
- StrChrOpt StrChr;
- StrRChrOpt StrRChr;
- StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp;
- StrCpyOpt StrCpy;
- StpCpyOpt StpCpy;
- StrNCpyOpt StrNCpy;
- StrLenOpt StrLen;
- StrPBrkOpt StrPBrk;
- StrToOpt StrTo;
- StrSpnOpt StrSpn;
- StrCSpnOpt StrCSpn;
- StrStrOpt StrStr;
-
- // Memory library call optimizations.
- MemCmpOpt MemCmp;
- MemCpyOpt MemCpy;
- MemMoveOpt MemMove;
- MemSetOpt MemSet;
- // Math library call optimizations.
- UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
- // Integer library call optimizations.
- FFSOpt FFS;
- AbsOpt Abs;
- IsDigitOpt IsDigit;
- IsAsciiOpt IsAscii;
- ToAsciiOpt ToAscii;
-
- // Formatting and IO library call optimizations.
- PrintFOpt PrintF;
- SPrintFOpt SPrintF;
- FPrintFOpt FPrintF;
- FWriteOpt FWrite;
- FPutsOpt FPuts;
- PutsOpt Puts;
-
- void initOptimizations();
- void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
- void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+ // Math library call optimizations.
+ CosOpt Cos;
+ PowOpt Pow;
+ Exp2Opt Exp2;
public:
LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS,
bool UnsafeFPShrink = false)
- : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true),
- Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
+ : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
this->TD = TD;
this->TLI = TLI;
this->LCS = LCS;
@@ -1741,123 +1695,234 @@ public:
}
Value *optimizeCall(CallInst *CI);
+ LibCallOptimization *lookupOptimization(CallInst *CI);
+ bool hasFloatVersion(StringRef FuncName);
};
-void LibCallSimplifierImpl::initOptimizations() {
- // Fortified library call optimizations.
- Optimizations["__memcpy_chk"] = &MemCpyChk;
- Optimizations["__memmove_chk"] = &MemMoveChk;
- Optimizations["__memset_chk"] = &MemSetChk;
- Optimizations["__strcpy_chk"] = &StrCpyChk;
- Optimizations["__stpcpy_chk"] = &StpCpyChk;
- Optimizations["__strncpy_chk"] = &StrNCpyChk;
- Optimizations["__stpncpy_chk"] = &StrNCpyChk;
-
- // String library call optimizations.
- addOpt(LibFunc::strcat, &StrCat);
- addOpt(LibFunc::strncat, &StrNCat);
- addOpt(LibFunc::strchr, &StrChr);
- addOpt(LibFunc::strrchr, &StrRChr);
- addOpt(LibFunc::strcmp, &StrCmp);
- addOpt(LibFunc::strncmp, &StrNCmp);
- addOpt(LibFunc::strcpy, &StrCpy);
- addOpt(LibFunc::stpcpy, &StpCpy);
- addOpt(LibFunc::strncpy, &StrNCpy);
- addOpt(LibFunc::strlen, &StrLen);
- addOpt(LibFunc::strpbrk, &StrPBrk);
- addOpt(LibFunc::strtol, &StrTo);
- addOpt(LibFunc::strtod, &StrTo);
- addOpt(LibFunc::strtof, &StrTo);
- addOpt(LibFunc::strtoul, &StrTo);
- addOpt(LibFunc::strtoll, &StrTo);
- addOpt(LibFunc::strtold, &StrTo);
- addOpt(LibFunc::strtoull, &StrTo);
- addOpt(LibFunc::strspn, &StrSpn);
- addOpt(LibFunc::strcspn, &StrCSpn);
- addOpt(LibFunc::strstr, &StrStr);
-
- // Memory library call optimizations.
- addOpt(LibFunc::memcmp, &MemCmp);
- addOpt(LibFunc::memcpy, &MemCpy);
- addOpt(LibFunc::memmove, &MemMove);
- addOpt(LibFunc::memset, &MemSet);
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+ LibFunc::Func Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
- // Math library call optimizations.
- addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
- addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
- addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
- addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
- addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
- addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
- addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
-
- if(UnsafeFPShrink) {
- addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+ // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ // Next check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return &Pow;
+ case Intrinsic::exp2:
+ return &Exp2;
+ default:
+ return 0;
+ }
}
- addOpt(LibFunc::cosf, &Cos);
- addOpt(LibFunc::cos, &Cos);
- addOpt(LibFunc::cosl, &Cos);
- addOpt(LibFunc::powf, &Pow);
- addOpt(LibFunc::pow, &Pow);
- addOpt(LibFunc::powl, &Pow);
- Optimizations["llvm.pow.f32"] = &Pow;
- Optimizations["llvm.pow.f64"] = &Pow;
- Optimizations["llvm.pow.f80"] = &Pow;
- Optimizations["llvm.pow.f128"] = &Pow;
- Optimizations["llvm.pow.ppcf128"] = &Pow;
- addOpt(LibFunc::exp2l, &Exp2);
- addOpt(LibFunc::exp2, &Exp2);
- addOpt(LibFunc::exp2f, &Exp2);
- Optimizations["llvm.exp2.ppcf128"] = &Exp2;
- Optimizations["llvm.exp2.f128"] = &Exp2;
- Optimizations["llvm.exp2.f80"] = &Exp2;
- Optimizations["llvm.exp2.f64"] = &Exp2;
- Optimizations["llvm.exp2.f32"] = &Exp2;
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ switch (Func) {
+ case LibFunc::strcat:
+ return &StrCat;
+ case LibFunc::strncat:
+ return &StrNCat;
+ case LibFunc::strchr:
+ return &StrChr;
+ case LibFunc::strrchr:
+ return &StrRChr;
+ case LibFunc::strcmp:
+ return &StrCmp;
+ case LibFunc::strncmp:
+ return &StrNCmp;
+ case LibFunc::strcpy:
+ return &StrCpy;
+ case LibFunc::stpcpy:
+ return &StpCpy;
+ case LibFunc::strncpy:
+ return &StrNCpy;
+ case LibFunc::strlen:
+ return &StrLen;
+ case LibFunc::strpbrk:
+ return &StrPBrk;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return &StrTo;
+ case LibFunc::strspn:
+ return &StrSpn;
+ case LibFunc::strcspn:
+ return &StrCSpn;
+ case LibFunc::strstr:
+ return &StrStr;
+ case LibFunc::memcmp:
+ return &MemCmp;
+ case LibFunc::memcpy:
+ return &MemCpy;
+ case LibFunc::memmove:
+ return &MemMove;
+ case LibFunc::memset:
+ return &MemSet;
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return &Cos;
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return &Pow;
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return &Exp2;
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return &FFS;
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return &Abs;
+ case LibFunc::isdigit:
+ return &IsDigit;
+ case LibFunc::isascii:
+ return &IsAscii;
+ case LibFunc::toascii:
+ return &ToAscii;
+ case LibFunc::printf:
+ return &PrintF;
+ case LibFunc::sprintf:
+ return &SPrintF;
+ case LibFunc::fprintf:
+ return &FPrintF;
+ case LibFunc::fwrite:
+ return &FWrite;
+ case LibFunc::fputs:
+ return &FPuts;
+ case LibFunc::puts:
+ return &Puts;
+ case LibFunc::ceil:
+ case LibFunc::fabs:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return &UnaryDoubleFP;
+ return 0;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::sqrt:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return &UnsafeUnaryDoubleFP;
+ return 0;
+ case LibFunc::memcpy_chk:
+ return &MemCpyChk;
+ default:
+ return 0;
+ }
+ }
+
+ // Finally check for fortified library calls.
+ if (FuncName.endswith("_chk")) {
+ if (FuncName == "__memmove_chk")
+ return &MemMoveChk;
+ else if (FuncName == "__memset_chk")
+ return &MemSetChk;
+ else if (FuncName == "__strcpy_chk")
+ return &StrCpyChk;
+ else if (FuncName == "__stpcpy_chk")
+ return &StpCpyChk;
+ else if (FuncName == "__strncpy_chk")
+ return &StrNCpyChk;
+ else if (FuncName == "__stpncpy_chk")
+ return &StrNCpyChk;
+ }
+
+ return 0;
- // Integer library call optimizations.
- addOpt(LibFunc::ffs, &FFS);
- addOpt(LibFunc::ffsl, &FFS);
- addOpt(LibFunc::ffsll, &FFS);
- addOpt(LibFunc::abs, &Abs);
- addOpt(LibFunc::labs, &Abs);
- addOpt(LibFunc::llabs, &Abs);
- addOpt(LibFunc::isdigit, &IsDigit);
- addOpt(LibFunc::isascii, &IsAscii);
- addOpt(LibFunc::toascii, &ToAscii);
-
- // Formatting and IO library call optimizations.
- addOpt(LibFunc::printf, &PrintF);
- addOpt(LibFunc::sprintf, &SPrintF);
- addOpt(LibFunc::fprintf, &FPrintF);
- addOpt(LibFunc::fwrite, &FWrite);
- addOpt(LibFunc::fputs, &FPuts);
- addOpt(LibFunc::puts, &Puts);
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
- if (Optimizations.empty())
- initOptimizations();
+ LibCallOptimization *LCO = lookupOptimization(CI);
// @LOCALMOD-BEGIN
Function *Caller = CI->getParent()->getParent();
@@ -1866,8 +1931,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
if (TLI->getLibFunc(Caller->getName(), F))
return 0;
// @LOCALMOD-END
- Function *Callee = CI->getCalledFunction();
- LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
if (LCO) {
IRBuilder<> Builder(CI);
return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1875,16 +1938,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
return 0;
}
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
- if (TLI->has(F))
- Optimizations[TLI->getName(F)] = Opt;
-}
-
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2,
- LibCallOptimization* Opt) {
- if (TLI->has(F1) && TLI->has(F2))
- Optimizations[TLI->getName(F1)] = Opt;
-}
LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
const TargetLibraryInfo *TLI,
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 5812d4607d..c3df215c29 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index b5941bdf24..544c5eed7e 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 76365417aa..17900dabbe 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1771,7 +1771,7 @@ namespace {
size_t MaxDepth = DAG.lookup(IJ);
DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << DAG.size() << "\n");
// At this point the DAG has been constructed, but, may contain
@@ -2086,7 +2086,7 @@ namespace {
DEBUG(if (DebugPairSelection)
dbgs() << "BBV: found pruned DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << PrunedDAG.size() <<
" (effective size: " << EffSize << ")\n");
if (((TTI && !UseChainDepthWithTI) ||
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index e64034ab26..7ae082f55e 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMVectorize
BBVectorize.cpp
Vectorize.cpp
LoopVectorize.cpp
+ SLPVectorizer.cpp
+ VecUtils.cpp
)
add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a696a2ffba..11ee99ddf1 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
@@ -78,7 +78,9 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +89,7 @@
#include <map>
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -112,9 +115,15 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+static const unsigned RuntimeMemoryCheckThreshold = 8;
+
+/// We use a metadata with this name to indicate that a scalar loop was
+/// vectorized and that we don't need to re-vectorize it if we run into it
+/// again.
+static const char*
+AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
namespace {
@@ -208,7 +217,7 @@ private:
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
+ Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -327,7 +336,7 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0) {}
+ Induction(0), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -337,8 +346,10 @@ public:
RK_IntegerOr, ///< Bitwise or logical OR of numbers.
RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
+ RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult ///< Product of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
@@ -350,21 +361,52 @@ public:
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
};
+ // This enum represents the kind of minmax reduction.
+ enum MinMaxReductionKind {
+ MRK_Invalid,
+ MRK_UIntMin,
+ MRK_UIntMax,
+ MRK_SIntMin,
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
+ };
+
/// This POD struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
- Kind(RK_NoReduction) {}
+ Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
- ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+ MinMaxReductionKind MK)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
// The starting value of the reduction.
// It does not have to be zero!
- Value *StartValue;
+ TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr;
// The kind of the reduction.
ReductionKind Kind;
+ // If this a min/max reduction the kind of reduction.
+ MinMaxReductionKind MinMaxKind;
+ };
+
+ /// This POD struct holds information about a potential reduction operation.
+ struct ReductionInstDesc {
+ ReductionInstDesc(bool IsRedux, Instruction *I) :
+ IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+ ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+ IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+ // Is this instruction a reduction candidate.
+ bool IsReduction;
+ // The last instruction in a min/max pattern (select of the select(icmp())
+ // pattern), or the current reduction instruction otherwise.
+ Instruction *PatternLastInst;
+ // If this is a min/max pattern the comparison predicate.
+ MinMaxReductionKind MinMaxKind;
};
// This POD struct holds information about the memory runtime legality
@@ -381,16 +423,18 @@ public:
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
- SmallVector<Value*, 2> Pointers;
+ SmallVector<TrackingVH<Value>, 2> Pointers;
/// Holds the pointer value at the beginning of the loop.
SmallVector<const SCEV*, 2> Starts;
/// Holds the pointer value at the end of the loop.
SmallVector<const SCEV*, 2> Ends;
+ /// Holds the information if this pointer is used for writing to memory.
+ SmallVector<bool, 2> IsWritePtr;
};
/// A POD for saving information about induction variables.
@@ -398,7 +442,7 @@ public:
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
/// Start value.
- Value *StartValue;
+ TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
};
@@ -413,7 +457,7 @@ public:
/// Alias(Multi)Map stores the values (GEPs or underlying objects and their
/// respective Store/Load instruction(s) to calculate aliasing.
- typedef DenseMap<Value*, Instruction* > AliasMap;
+ typedef MapVector<Value*, Instruction* > AliasMap;
typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
/// Returns true if it is legal to vectorize this loop.
@@ -455,6 +499,10 @@ public:
/// Returns the information that we collected about runtime memory check.
RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+ /// This function returns the identity element (or neutral element) for
+ /// the operation K.
+ static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -481,9 +529,17 @@ private:
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
- /// Returns true if the instruction I can be a reduction variable of type
- /// 'Kind'.
- bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns a struct describing if the instruction 'I' can be a reduction
+ /// variable of type 'Kind'. If the reduction is a min/max pattern of
+ /// select(icmp()) this function advances the instruction pointer 'I' from the
+ /// compare instruction to the select instruction and stores this pointer in
+ /// 'PatternLastInst' member of the returned struct.
+ ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+ ReductionInstDesc &Desc);
+ /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+ /// pattern corresponding to a min(X, Y) or max(X, Y).
+ static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
@@ -534,6 +590,8 @@ private:
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -656,6 +714,11 @@ struct LoopVectorize : public LoopPass {
AA = getAnalysisIfAvailable<AliasAnalysis>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ if (DL == NULL) {
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ return false;
+ }
+
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
@@ -731,7 +794,8 @@ struct LoopVectorize : public LoopPass {
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
- Loop *Lp, Value *Ptr) {
+ Loop *Lp, Value *Ptr,
+ bool WritePtr) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
@@ -740,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
+ IsWritePtr.push_back(WritePtr);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -765,7 +830,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
@@ -778,8 +843,8 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) {
- int Idx = Negate ? (-i): i;
- Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+ int64_t Idx = Negate ? (-i) : i;
+ Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
// Add the consecutive indices to the vector value.
@@ -899,13 +964,19 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+ if (ScalarAllocatedSize != VectorElementSize)
+ return scalarizeInstruction(Instr);
// If the pointer is loop invariant or if it is non consecutive,
// scalarize the load.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
bool UniformLoad = LI && Legal->isUniform(Ptr);
- if (Stride == 0 || UniformLoad)
+ if (!ConsecutiveStride || UniformLoad)
return scalarizeInstruction(Instr);
Constant *Zero = Builder.getInt32(0);
@@ -968,7 +1039,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
}
}
@@ -984,7 +1055,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
cast<LoadInst>(LI)->setAlignment(Alignment);
Entry[Part] = Reverse ? reverseVector(LI) : LI;
@@ -1034,10 +1105,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- // For each scalar that we create:
- for (unsigned Width = 0; Width < VF; ++Width) {
- // For each vector unroll 'part':
- for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each scalar that we create:
+ for (unsigned Width = 0; Width < VF; ++Width) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
@@ -1104,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
+ // No need to check if two readonly pointers intersect.
+ if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
+ continue;
+
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
@@ -1159,6 +1234,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
+ // Mark the old scalar loop with metadata that tells us not to vectorize this
+ // loop again if we run into it.
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
+ OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
+
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
@@ -1425,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/// This function returns the identity element (or neutral element) for
/// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
- case LoopVectorizationLegality:: RK_IntegerXor:
- case LoopVectorizationLegality:: RK_IntegerAdd:
- case LoopVectorizationLegality:: RK_IntegerOr:
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
- case LoopVectorizationLegality:: RK_IntegerMult:
+ case RK_IntegerMult:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
- case LoopVectorizationLegality:: RK_IntegerAnd:
+ case RK_IntegerAnd:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
- case LoopVectorizationLegality:: RK_FloatMult:
+ case RK_FloatMult:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
- case LoopVectorizationLegality:: RK_FloatAdd:
+ case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
default:
@@ -1555,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
}
/// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
switch (Kind) {
case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1572,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
return Instruction::FMul;
case LoopVectorizationLegality::RK_FloatAdd:
return Instruction::FAdd;
+ case LoopVectorizationLegality::RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case LoopVectorizationLegality::RK_FloatMinMax:
+ return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
}
+Value *createMinMaxOp(IRBuilder<> &Builder,
+ LoopVectorizationLegality::MinMaxReductionKind RK,
+ Value *Left,
+ Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max reduction kind");
+ case LoopVectorizationLegality::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case LoopVectorizationLegality::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ Value *Cmp;
+ if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1632,7 +1754,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -1640,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
- Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
- Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ Value *Identity;
+ Value *VectorStart;
+ if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+ RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+ "minmax.ident");
+ } else {
+ Constant *Iden =
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
// Fix the vector-loop phi.
// We created the induction variable so we know that the
@@ -1688,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
+ unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
- "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+ RdxParts[part], ReducedPartRdx,
+ "bin.rdx");
+ else
+ ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+ ReducedPartRdx, RdxParts[part]);
}
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1716,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
// The result is in the first element of the vector.
@@ -1850,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
-
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
- P->getParent());
- for (unsigned part = 0; part < UF; ++part) {
- VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
- VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
- "predphi");
+ unsigned NumIncoming = P->getNumIncomingValues();
+ assert(NumIncoming > 1 && "Invalid PHI");
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We don't need to 'select' the first PHI operand because it is
+ // the default value if all of the other masks don't match.
+ if (In == 0)
+ Entry[part] = In0[part];
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
+ }
}
continue;
}
@@ -1917,7 +2073,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
continue;
}
@@ -2077,6 +2234,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
}
case Instruction::Call: {
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ break;
+
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
@@ -2137,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator()))
return false;
- // We must have at most two predecessors because we need to convert
- // all PHIs to selects.
- unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
- if (Preds > 2)
- return false;
-
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
return false;
@@ -2153,7 +2308,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
bool LoopVectorizationLegality::canVectorize() {
- assert(TheLoop->getLoopPreheader() && "No preheader!!");
+ // We must have a loop in canonical form. Loops with indirectbr in them cannot
+ // be canonicalized.
+ if (!TheLoop->getLoopPreheader())
+ return false;
// We can only vectorize innermost loops.
if (TheLoop->getSubLoopsVector().size())
@@ -2220,10 +2378,44 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+ SmallPtrSet<Value *, 4> &Reductions) {
+ // Reduction instructions are allowed to have exit users. All other
+ // instructions must not have external users.
+ if (!Reductions.count(Inst))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+ I != E; ++I) {
+ Instruction *U = cast<Instruction>(*I);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(U)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return true;
+ }
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
+ // If we marked the scalar loop as "already vectorized" then no need
+ // to vectorize it again.
+ if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
+ DEBUG(dbgs() << "LV: This loop was vectorized before\n");
+ return false;
+ }
+
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr = F.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex,
+ "no-nans-fp-math").getValueAsString() == "true";
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2233,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
- // This should not happen because the loop should be normalized.
- if (Phi->getNumIncomingValues() != 2) {
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
// Check that this PHI type is allowed.
if (!Phi->getType()->isIntegerTy() &&
!Phi->getType()->isFloatingPointTy() &&
@@ -2250,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// If this PHINode is not in the header block, then we know that we
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
- if (*bb != Header)
- continue;
+ if (*bb != Header) {
+ // Check that this instruction has no outside users or is an
+ // identified reduction value with an outside user.
+ if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ continue;
+ return false;
+ }
+
+ // We only allow if-converted PHIs with more than two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2293,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+ DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
if (AddReductionVar(Phi, RK_FloatMult)) {
DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
continue;
@@ -2301,14 +2502,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_FloatMinMax)) {
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
- // We still don't handle functions.
+ // We still don't handle functions. However, we can ignore dbg intrinsic
+ // calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !getIntrinsicIDForCall(CI, TLI)) {
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
@@ -2329,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (!AllowedExit.count(it))
- //Check that all of the users of the loop are inside the BB.
- for (Value::use_iterator I = it->use_begin(), E = it->use_end();
- I != E; ++I) {
- Instruction *U = cast<Instruction>(*I);
- // This user may be a reduction exit value.
- if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
- return false;
- }
- }
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
} // next instr.
}
@@ -2419,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
bool LoopVectorizationLegality::canVectorizeMemory() {
- if (TheLoop->isAnnotatedParallel()) {
- DEBUG(dbgs()
- << "LV: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
- return true;
- }
-
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store *instructions*.
@@ -2434,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2448,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayReadFromMemory()) {
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
- if (!Ld->isSimple()) {
+ if (!Ld->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -2460,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) return false;
- if (!St->isSimple()) {
+ if (!St->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -2507,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
ReadWrites.insert(std::make_pair(Ptr, ST));
}
+ if (IsAnnotatedParallel) {
+ DEBUG(dbgs()
+ << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ return true;
+ }
+
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
@@ -2529,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
+ unsigned NumReadPtrs = 0;
+ unsigned NumWritePtrs = 0;
+
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
@@ -2536,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, true);
+ NumWritePtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2546,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, false);
+ NumReadPtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2556,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found a
// unsizeable pointer.
- if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+ DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
}
@@ -2619,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
return false;
}
@@ -2663,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
return false;
}
}
@@ -2710,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// used as reduction variables (such as ADD). We may have a single
// out-of-block user. The cycle must end with the original PHI.
Instruction *Iter = Phi;
- while (true) {
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // such that we don't stop when we see the phi has two uses (one by the select
+ // and one by the icmp) and to make sure we only see exactly the two
+ // instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ ReductionInstDesc ReduxDesc(false, 0);
+
+ // Avoid cycles in the chain.
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ while (VisitedInsts.insert(Iter)) {
// If the instruction has no users then this is a broken
// chain and can't be a reduction variable.
if (Iter->use_empty())
@@ -2749,26 +2967,40 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
U->getParent() != TheLoop->getHeader() &&
TheLoop->contains(U) &&
- Iter->getNumUses() > 1)
+ Iter->hasNUsesOrMore(2))
continue;
- // We can't have multiple inside users.
- if (FoundInBlockUser)
+ // We can't have multiple inside users except for a combination of
+ // icmp/select both using the phi.
+ if (FoundInBlockUser && !NumCmpSelectPatternInst)
return false;
FoundInBlockUser = true;
// Any reduction instr must be of one of the allowed kinds.
- if (!isReductionInstr(U, Kind))
+ ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
return false;
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+
// Reductions of instructions such as Div, and Sub is only
// possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+ if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+ !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
return false;
- Iter = U;
+ Iter = ReduxDesc.PatternLastInst;
}
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
+
// We found a reduction var if we have reached the original
// phi node and we only have a single instruction with out-of-loop
// users.
@@ -2777,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
AllowedExit.insert(ExitInstruction);
// Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
Reductions[Phi] = RD;
// We've ended the cycle. This is a reduction variable if we have an
// outside user and it has a binary op.
return FoundBinOp && ExitInstruction;
}
}
+
+ return false;
}
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = 0;
+ SelectInst *Select = 0;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(Select, Prev.MinMaxKind);
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return ReductionInstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return ReductionInstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return ReductionInstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+
+ return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
LoopVectorizationLegality::isReductionInstr(Instruction *I,
- ReductionKind Kind) {
+ ReductionKind Kind,
+ ReductionInstDesc &Prev) {
bool FP = I->getType()->isFloatingPointTy();
bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
switch (I->getOpcode()) {
default:
- return false;
+ return ReductionInstDesc(false, I);
case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
- return false;
- // possibly.
- return true;
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+ Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
case Instruction::Add:
- return Kind == RK_IntegerAdd;
- case Instruction::SDiv:
- case Instruction::UDiv:
+ return ReductionInstDesc(Kind == RK_IntegerAdd, I);
case Instruction::Mul:
- return Kind == RK_IntegerMult;
+ return ReductionInstDesc(Kind == RK_IntegerMult, I);
case Instruction::And:
- return Kind == RK_IntegerAnd;
+ return ReductionInstDesc(Kind == RK_IntegerAnd, I);
case Instruction::Or:
- return Kind == RK_IntegerOr;
+ return ReductionInstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
- return Kind == RK_IntegerXor;
+ return ReductionInstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
- return Kind == RK_FloatMult && FastMath;
+ return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
- return Kind == RK_FloatAdd && FastMath;
- }
+ return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
}
LoopVectorizationLegality::InductionKind
@@ -3238,6 +3530,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ continue;
+
unsigned C = getInstructionCost(it, VF);
Cost += C;
DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
@@ -3297,14 +3593,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
- return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy);
+ case Instruction::Xor: {
+ // Certain instructions can be cheaper to vectorize if they have a constant
+ // second vector operand. One example of this are shifts on x86.
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+
+ if (isa<ConstantInt>(I->getOperand(1)))
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+
+ return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+ }
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
- if (ScalarCond)
+ if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
@@ -3335,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
// Scalarized loads/stores.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
- if (0 == Stride) {
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000000..cc30cc9278
--- /dev/null
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,348 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+ cl::desc("Only vectorize trees if the gain is above this "
+ "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+ typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit SLPVectorizer() : FunctionPass(ID) {
+ initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+
+ virtual bool runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+
+ StoreRefs.clear();
+ bool Changed = false;
+
+ // Must have DataLayout. We can't require it because some tests run w/o
+ // triple.
+ if (!DL)
+ return false;
+
+ for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+ BasicBlock *BB = it;
+ bool BBChanged = false;
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+
+ // Vectorize trees that end at reductions.
+ BBChanged |= vectorizeReductions(BB, R);
+
+ // Vectorize trees that end at stores.
+ if (unsigned count = collectStores(BB, R)) {
+ (void)count;
+ DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
+ BBChanged |= vectorizeStoreChains(R);
+ }
+
+ // Try to hoist some of the scalarization code to the preheader.
+ if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+ Changed |= BBChanged;
+ }
+
+ if (Changed) {
+ DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+ DEBUG(verifyFunction(F));
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<LoopInfo>();
+ }
+
+private:
+
+ /// \brief Collect memory references and sort them according to their base
+ /// object. We sort the stores to their base objects to reduce the cost of the
+ /// quadratic search on the stores. TODO: We can further reduce this cost
+ /// if we flush the chain creation every time we run into a memory barrier.
+ unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
+
+ /// \brief Try to vectorize a list of operands.
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that may start at the operands of \V;
+ bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
+
+ /// \brief Vectorize the stores that were collected in StoreRefs.
+ bool vectorizeStoreChains(BoUpSLP &R);
+
+ /// \brief Try to hoist gather sequences outside of the loop in cases where
+ /// all of the sources are loop invariant.
+ void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Scan the basic block and look for reductions that may start a
+ /// vectorization chain.
+ bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+ StoreListMap StoreRefs;
+};
+
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+ unsigned count = 0;
+ StoreRefs.clear();
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ Type *Ty = SI->getValueOperand()->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
+ count++;
+ }
+ return count;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+ if (!A || !B) return false;
+ Value *VL[] = { A, B };
+ return tryToVectorizeList(VL, R);
+}
+
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+ DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+ // Check that all of the parts are scalar.
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ Type *Ty = VL[i]->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+ }
+
+ int Cost = R.getTreeCost(VL);
+ int ExtrCost = R.getScalarizationCost(VL);
+ DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+ " Cost of extract:" << ExtrCost << ".\n");
+ if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+ DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+ R.vectorizeArith(VL);
+ return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+ if (!V) return false;
+ // Try to vectorize V.
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ return true;
+
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+ // Try to skip B.
+ if (B && B->hasOneUse()) {
+ BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (tryToVectorizePair(A, B0, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A, B1, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ }
+
+ // Try to skip A.
+ if (A && A->hasOneUse()) {
+ BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (tryToVectorizePair(A0, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A1, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ }
+ return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+ bool Changed = false;
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ if (isa<DbgInfoIntrinsic>(it)) continue;
+
+ // Try to vectorize reductions that use PHINodes.
+ if (PHINode *P = dyn_cast<PHINode>(it)) {
+ // Check that the PHI is a reduction PHI.
+ if (P->getNumIncomingValues() != 2) return Changed;
+ Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+ (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+ 0));
+ // Check if this is a Binary Operator.
+ BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+ if (!BI)
+ continue;
+
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P) Inst = BI->getOperand(1);
+ Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ continue;
+ }
+
+ // Try to vectorize trees that start at compare instructions.
+ if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+ if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+ Changed |= true;
+ continue;
+ }
+ for (int i = 0; i < 2; ++i)
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+ Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
+
+ DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+ it->second.size() << ".\n");
+
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ }
+ return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+ BoUpSLP &R) {
+ // Check if this block is inside a loop.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ // Check if it has a preheader.
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ if (!PreHeader)
+ return;
+
+ // Mark the insertion point for the block.
+ Instruction *Location = PreHeader->getTerminator();
+
+ BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+ for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+ it != e; ++it) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+ // The InsertElement sequence can be simplified into a constant.
+ if (!Insert)
+ continue;
+
+ // If the vector or the element that we insert into it are
+ // instructions that are defined in this basic block then we can't
+ // hoist this instruction.
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (CurrVec && L->contains(CurrVec)) continue;
+ if (NewElem && L->contains(NewElem)) continue;
+
+ // We can hoist this instruction. Move it to the pre-header.
+ Insert->moveBefore(Location);
+ }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createSLPVectorizerPass() {
+ return new SLPVectorizer();
+ }
+}
+
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000000..9b9436683b
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,730 @@
+//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
+ BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
+ numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+ int Loc = 0;
+ InstrIdx.clear();
+ InstrVec.clear();
+ // Number the instructions in the block.
+ for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+ InstrIdx[it] = Loc++;
+ InstrVec.push_back(it);
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+ }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+ return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+ if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+ return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+ // Check that A and B are of the same type.
+ if (PtrA->getType() != PtrB->getType()) return false;
+
+ // Calculate the distance.
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+ const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+ // Non constant distance.
+ if (!ConstOffSCEV) return false;
+
+ int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ // The Instructions are connsecutive if the size of the first load/store is
+ // the same as the offset.
+ int64_t Sz = DL->getTypeStoreSize(Ty);
+ return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
+ Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+ unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ unsigned VF = MinVecRegSize / Sz;
+
+ if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+ bool Changed = false;
+ // Look for profitable vectorizable trees at all offsets, starting at zero.
+ for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+ if (i + VF > e) return Changed;
+ DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+ int Cost = getTreeCost(Operands);
+ DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ if (Cost < CostThreshold) {
+ DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ vectorizeTree(Operands, VF);
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
+ ValueSet Heads, Tails;
+ SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we vectorized so that we don't visit the same store twice.
+ ValueSet VectorizedStores;
+ bool Changed = false;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of loads that follow each other.
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+ for (unsigned j = 0; j < e; ++j) {
+ if (i == j) continue;
+ if (isConsecutiveAccess(Stores[i], Stores[j])) {
+ Tails.insert(Stores[j]);
+ Heads.insert(Stores[i]);
+ ConsecutiveChain[Stores[i]] = Stores[j];
+ }
+ }
+
+ // For stores that start but don't end a link in the chain:
+ for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ if (Tails.count(*it)) continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to vectorize it.
+ ValueList Operands;
+ Value *I = *it;
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (VectorizedStores.count(I)) break;
+ Operands.push_back(I);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+ // Mark the vectorized stores so that we don't vectorize them again.
+ if (Vectorized)
+ VectorizedStores.insert(Operands.begin(), Operands.end());
+ Changed |= Vectorized;
+ }
+
+ return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
+ // Find the type of the operands in VL.
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ // Find the cost of inserting/extracting values from the vector.
+ return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+ int Cost = 0;
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+ BasicBlock::iterator I = Src, E = Dst;
+ /// Scan all of the instruction from SRC to DST and check if
+ /// the source may alias.
+ for (++I; I != E; ++I) {
+ // Ignore store instructions that are marked as 'ignore'.
+ if (MemBarrierIgnoreList.count(I)) continue;
+ if (Src->mayWriteToMemory()) /* Write */ {
+ if (!I->mayReadOrWriteMemory()) continue;
+ } else /* Read */ {
+ if (!I->mayWriteToMemory()) continue;
+ }
+ AliasAnalysis::Location A = getLocation(&*I);
+ AliasAnalysis::Location B = getLocation(Src);
+
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+ return I;
+ }
+ return 0;
+}
+
+void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+ Value *Vec = vectorizeTree(Operands, Operands.size());
+ BasicBlock::iterator Loc = cast<Instruction>(Vec);
+ IRBuilder<> Builder(++Loc);
+ // After vectorizing the operands we need to generate extractelement
+ // instructions and replace all of the uses of the scalar values with
+ // the values that we extracted from the vectorized tree.
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+ Operands[i]->replaceAllUsesWith(S);
+ }
+}
+
+int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
+ // Get rid of the list of stores that were removed, and from the
+ // lists of instructions with multiple users.
+ MemBarrierIgnoreList.clear();
+ LaneMap.clear();
+ MultiUserVals.clear();
+ MustScalarize.clear();
+
+ // Scan the tree and find which value is used by which lane, and which values
+ // must be scalarized.
+ getTreeUses_rec(VL, 0);
+
+ // Check that instructions with multiple users can be vectorized. Mark unsafe
+ // instructions.
+ for (ValueSet::iterator it = MultiUserVals.begin(),
+ e = MultiUserVals.end(); it != e; ++it) {
+ // Check that all of the users of this instr are within the tree
+ // and that they are all from the same lane.
+ int Lane = -1;
+ for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+ I != E; ++I) {
+ if (LaneMap.find(*I) == LaneMap.end()) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
+ " to MustScalarize because of an out of tree usage.\n");
+ break;
+ }
+ if (Lane == -1) Lane = LaneMap[*I];
+ if (Lane != LaneMap[*I]) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"Adding " << **it <<
+ " to MustScalarize because multiple lane use it: "
+ << Lane << " and " << LaneMap[*I] << ".\n");
+ break;
+ }
+ }
+ }
+
+ // Now calculate the cost of vectorizing the tree.
+ return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ if (Depth == RecursionMaxDepth) return;
+
+ // Don't handle vectors.
+ if (VL[0]->getType()->isVectorTy()) return;
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If one of the instructions is out of this BB, we need to scalarize all.
+ if (I && I->getParent() != BB) return;
+ }
+
+ // If all of the operands are identical or constant we have a simple solution.
+ if (AllConst || AllSameScalar) return;
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return;
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return;
+ }
+
+ // Mark instructions with multiple users.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Remember to check if all of the users of this instr are vectorized
+ // within our tree.
+ if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+ }
+
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ // Check that the instruction is only used within
+ // one lane.
+ if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+ // Make this instruction as 'seen' and remember the lane.
+ LaneMap[VL[i]] = i;
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ getTreeUses_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::Store: {
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ getTreeUses_rec(Operands, Depth+1);
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ Type *ScalarTy = VL[0]->getType();
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+
+ /// Don't mess with vectors.
+ if (ScalarTy->isVectorTy()) return max_cost;
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ bool MustScalarizeFlag = false;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // Must have a single use.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ MustScalarizeFlag |= MustScalarize.count(VL[i]);
+ // This instruction is outside the basic block.
+ if (I && I->getParent() != BB)
+ return getScalarizationCost(VecTy);
+ }
+
+ // Is this a simple vector constant.
+ if (AllConst) return 0;
+
+ // If all of the operands are identical we can broadcast them.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (AllSameScalar) {
+ // If we are in a loop, and this is not an instruction (e.g. constant or
+ // argument) or the instruction is defined outside the loop then assume
+ // that the cost is zero.
+ if (L && (!VL0 || !L->contains(VL0)))
+ return 0;
+
+ // We need to broadcast the scalar.
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+ }
+
+ // If this is not a constant, or a scalar from outside the loop then we
+ // need to scalarize it.
+ if (MustScalarizeFlag)
+ return getScalarizationCost(VecTy);
+
+ if (!VL0) return getScalarizationCost(VecTy);
+ assert(VL0->getParent() == BB && "Wrong BB");
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+ }
+
+ // Check if it is safe to sink the loads or the stores.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ int MaxIdx = InstrIdx[VL0];
+ for (unsigned i = 1, e = VL.size(); i < e; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+ Instruction *Last = InstrVec[MaxIdx];
+ for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+ if (VL[i] == Last) continue;
+ Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+ if (Barrier) {
+ DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+ *Last << "\n because of " << *Barrier << "\n");
+ return max_cost;
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ int Cost = 0;
+ ValueList Operands;
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ // Check that the casted type is the same for all users.
+ if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+ return getScalarizationCost(VecTy);
+ }
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+ VL0->getType(), SrcTy);
+
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ int Cost = 0;
+ // Calculate the cost of all of the operands.
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+ }
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+ int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Load: {
+ // If we are scalarize the loads, add the cost of forming the vector.
+ for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i], VL[i+1]))
+ return getScalarizationCost(VecTy);
+
+ // Cost of wide load - cost of scalar loads.
+ int ScalarLdCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ return VecLdCost - ScalarLdCost;
+ }
+ case Instruction::Store: {
+ // We know that we can merge the stores. Calculate the cost.
+ int ScalarStCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+ int StoreCost = VecStCost - ScalarStCost;
+
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ MemBarrierIgnoreList.insert(VL[j]);
+ }
+
+ int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+ return TotalCost;
+ }
+ default:
+ // Unable to vectorize unknown instructions.
+ return getScalarizationCost(VecTy);
+ }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+ int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+ for (unsigned i = 0; i < VF; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+ return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
+ IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+ Value *Vec = UndefValue::get(Ty);
+ for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+ // Generate the 'InsertElement' instruction.
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ // Remember that this instruction is used as part of a 'gather' sequence.
+ // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+ // if the users are outside of the basic block.
+ GatherInstructions.push_back(Vec);
+ }
+
+ return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
+ Value *V = vectorizeTree_rec(VL, VF);
+ // We moved some instructions around. We have to number them again
+ // before we can do any analysis.
+ numberInstructions();
+ MustScalarize.clear();
+ return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+ // Check if all of the operands are constants or identical.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // The instruction must be in the same BB, and it must be vectorizable.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+ return Scalarize(VL, VecTy);
+ }
+
+ // Check that this is a simple vector constant.
+ if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return Scalarize(VL, VecTy);
+
+ if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ ValueList INVL;
+ for (int i = 0; i < VF; ++i)
+ INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Value *InVec = vectorizeTree_rec(INVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ CastInst *CI = dyn_cast<CastInst>(VL0);
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0; i < VF; ++i) {
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
+ Value *LHS = vectorizeTree_rec(LHSVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(VL0);
+ unsigned Alignment = LI->getAlignment();
+
+ // Check if all of the loads are consecutive.
+ for (unsigned i = 1, e = VF; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i-1], VL[i]))
+ return Scalarize(VL, VecTy);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = Builder.CreateLoad(VecPtr);
+ LI->setAlignment(Alignment);
+ VectorizedValues[VL0] = LI;
+ return LI;
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(VL0);
+ unsigned Alignment = SI->getAlignment();
+
+ ValueList ValueOp;
+ for (int i = 0; i < VF; ++i)
+ ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+ Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo());
+ Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+ for (int i = 0; i < VF; ++i)
+ cast<Instruction>(VL[i])->eraseFromParent();
+ return 0;
+ }
+ default:
+ Value *S = Scalarize(VL, VecTy);
+ VectorizedValues[VL0] = S;
+ return S;
+ }
+}
+
+} // end of namespace
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000000..5456c6c779
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,164 @@
+//===- VecUtils.h - Vectorization Utilities -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+class Loop;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP {
+ typedef SmallVector<Value*, 8> ValueList;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ typedef SmallVector<StoreInst*, 8> StoreList;
+ static const int max_cost = 1<<20;
+
+ // \brief C'tor.
+ BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
+
+ /// \brief Take the pointer operand from the Load/Store instruction.
+ /// \returns NULL if this is not a valid Load/Store instruction.
+ static Value *getPointerOperand(Value *I);
+
+ /// \brief Take the address space operand from the Load/Store instruction.
+ /// \returns -1 if this is not a valid Load/Store instruction.
+ static unsigned getAddressSpaceOperand(Value *I);
+
+ /// \returns true if the memory operations A and B are consecutive.
+ bool isConsecutiveAccess(Value *A, Value *B);
+
+ /// \brief Vectorize the tree that starts with the elements in \p VL.
+ /// \returns the vectorized value.
+ Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
+
+ /// \returns the vectorization cost of the subtree that starts at \p VL.
+ /// A negative number means that this is profitable.
+ int getTreeCost(ArrayRef<Value *> VL);
+
+ /// \returns the scalarization cost for this list of values. Assuming that
+ /// this subtree gets vectorized, we may need to extract the values from the
+ /// roots. This method calculates the cost of extracting the values.
+ int getScalarizationCost(ArrayRef<Value *> VL);
+
+ /// \brief Attempts to order and vectorize a sequence of stores. This
+ /// function does a quadratic scan of the given stores.
+ /// \returns true if the basic block was modified.
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
+
+ /// \brief Vectorize a group of scalars into a vector tree.
+ void vectorizeArith(ArrayRef<Value *> Operands);
+
+ /// \returns the list of new instructions that were added in order to collect
+ /// scalars into vectors. This list can be used to further optimize the gather
+ /// sequences.
+ ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+ /// \brief This method contains the recursive part of getTreeCost.
+ int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This recursive method looks for vectorization hazards such as
+ /// values that are used by multiple users and checks that values are used
+ /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+ void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This method contains the recursive part of vectorizeTree.
+ Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
+
+ /// \brief Number all of the instructions in the block.
+ void numberInstructions();
+
+ /// \brief Vectorize a sorted sequence of stores.
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
+
+ /// \returns the scalarization cost for this type. Scalarization in this
+ /// context means the creation of vectors from a group of scalars.
+ int getScalarizationCost(Type *Ty);
+
+ /// \returns the AA location that is being access by the instruction.
+ AliasAnalysis::Location getLocation(Instruction *I);
+
+ /// \brief Checks if it is possible to sink an instruction from
+ /// \p Src to \p Dst.
+ /// \returns the pointer to the barrier instruction if we can't sink.
+ Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+ /// \returns the instruction that appears last in the BB from \p VL.
+ /// Only consider the first \p VF elements.
+ Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+
+ /// \returns a vector from a collection of scalars in \p VL.
+ Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
+
+private:
+ /// Maps instructions to numbers and back.
+ SmallDenseMap<Value*, int> InstrIdx;
+ /// Maps integers to Instructions.
+ std::vector<Instruction*> InstrVec;
+
+ // -- containers that are used during getTreeCost -- //
+
+ /// Contains values that must be scalarized because they are used
+ /// by multiple lanes, or by users outside the tree.
+ /// NOTICE: The vectorization methods also use this set.
+ ValueSet MustScalarize;
+
+ /// Contains a list of values that are used outside the current tree. This
+ /// set must be reset between runs.
+ ValueSet MultiUserVals;
+ /// Maps values in the tree to the vector lanes that uses them. This map must
+ /// be reset between runs of getCost.
+ std::map<Value*, int> LaneMap;
+ /// A list of instructions to ignore while sinking
+ /// memory instructions. This map must be reset between runs of getCost.
+ SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+ // -- Containers that are used during vectorizeTree -- //
+
+ /// Maps between the first scalar to the vector. This map must be reset
+ ///between runs.
+ DenseMap<Value*, Value*> VectorizedValues;
+
+ // -- Containers that are used after vectorization by the caller -- //
+
+ /// A list of instructions that are used when gathering scalars into vectors.
+ /// In many cases these instructions can be hoisted outside of the BB.
+ /// Iterating over this list is faster than calling LICM.
+ ValueList GatherInstructions;
+
+ // Analysis and block reference.
+ BasicBlock *BB;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ Loop *L;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2f87..a927fe1451 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
- //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,6 +28,7 @@ using namespace llvm;
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
+ initializeSLPVectorizerPass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopVectorizePass());
}
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSLPVectorizerPass());
+}