aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/InstCombine
diff options
context:
space:
mode:
authorEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
committerEli Bendersky <eliben@chromium.org>2013-07-15 16:09:15 -0700
commitc6cf05cb5108f356dde97c01ee4188b0671d4542 (patch)
tree436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/Transforms/InstCombine
parentc75199c649c739aade160289d93f257edc798cde (diff)
parent7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff)
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts: docs/LangRef.rst include/llvm/CodeGen/CallingConvLower.h include/llvm/IRReader/IRReader.h include/llvm/Target/TargetMachine.h lib/CodeGen/CallingConvLower.cpp lib/IRReader/IRReader.cpp lib/IRReader/LLVMBuild.txt lib/IRReader/Makefile lib/LLVMBuild.txt lib/Makefile lib/Support/MemoryBuffer.cpp lib/Support/Unix/PathV2.inc lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMTargetMachine.cpp lib/Target/Mips/CMakeLists.txt lib/Target/Mips/MipsDelaySlotFiller.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsInstrInfo.td lib/Target/Mips/MipsSubtarget.cpp lib/Target/Mips/MipsSubtarget.h lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrControl.td lib/Target/X86/X86InstrFormats.td lib/Transforms/IPO/ExtractGV.cpp lib/Transforms/InstCombine/InstCombineCompares.cpp lib/Transforms/Utils/SimplifyLibCalls.cpp test/CodeGen/X86/fast-isel-divrem.ll test/MC/ARM/data-in-code.ll tools/Makefile tools/llvm-extract/llvm-extract.cpp tools/llvm-link/CMakeLists.txt tools/opt/CMakeLists.txt tools/opt/LLVMBuild.txt tools/opt/Makefile tools/opt/opt.cpp
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h1
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp279
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp176
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp82
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp145
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp192
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp103
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp85
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp2
13 files changed, 790 insertions, 324 deletions
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index 72cfe2c985..a25696ec03 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine
InstCombineMulDivRem.cpp
InstCombinePHI.cpp
InstCombineSelect.cpp
- InstCombineShifts.cpp
+ InstCombineShifts.cpp
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5e33..2a36074750 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
+ Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6f00..166f8dfdb4 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
/// Class representing coefficient of floating-point addend.
/// This class needs to be highly efficient, which is especially true for
/// the constructor. As of I write this comment, the cost of the default
- /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
/// perform write-merging).
- ///
+ ///
class FAddendCoef {
public:
// The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
//
FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
~FAddendCoef();
-
+
void set(short C) {
assert(!insaneIntVal(C) && "Insane coefficient");
IsFp = false; IntVal = C;
}
-
+
void set(const APFloat& C);
-
+
void negate();
-
+
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
Value *getValue(Type *) const;
-
+
// If possible, don't define operator+/operator- etc because these
// operators inevitably call FAddendCoef's constructor which is not cheap.
void operator=(const FAddendCoef &A);
void operator+=(const FAddendCoef &A);
void operator-=(const FAddendCoef &A);
void operator*=(const FAddendCoef &S);
-
+
bool isOne() const { return isInt() && IntVal == 1; }
bool isTwo() const { return isInt() && IntVal == 2; }
bool isMinusOne() const { return isInt() && IntVal == -1; }
bool isMinusTwo() const { return isInt() && IntVal == -2; }
-
+
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
@@ -74,18 +74,28 @@ namespace {
return *getFpValPtr();
}
- APFloat &getFpVal(void)
- { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-
+ APFloat &getFpVal(void) {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
bool isInt() const { return !IsFp; }
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
-
+
// True iff FpValBuf contains an instance of APFloat.
bool BufHasFpVal;
-
+
// The integer coefficient of an individual addend is either 1 or -1,
// and we try to simplify at most 4 addends from neighboring at most
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -94,7 +104,7 @@ namespace {
AlignedCharArrayUnion<APFloat> FpValBuf;
};
-
+
/// FAddend is used to represent floating-point addend. An addend is
/// represented as <C, V>, where the V is a symbolic value, and C is a
/// constant coefficient. A constant addend is represented as <C, 0>.
@@ -102,10 +112,10 @@ namespace {
class FAddend {
public:
FAddend() { Val = 0; }
-
+
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
-
+
bool isConstant() const { return Val == 0; }
bool isZero() const { return Coeff.isZero(); }
@@ -114,17 +124,17 @@ namespace {
{ Coeff.set(Coefficient); Val = V; }
void set(const ConstantFP* Coefficient, Value *V)
{ Coeff.set(Coefficient->getValueAPF()); Val = V; }
-
+
void negate() { Coeff.negate(); }
-
+
/// Drill down the U-D chain one step to find the definition of V, and
/// try to break the definition into one or two addends.
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-
+
/// Similar to FAddend::drillDownOneStep() except that the value being
/// splitted is the addend itself.
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-
+
void operator+=(const FAddend &T) {
assert((Val == T.Val) && "Symbolic-values disagree");
Coeff += T.Coeff;
@@ -132,12 +142,12 @@ namespace {
private:
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-
+
// This addend has the value of "Coeff * Val".
Value *Val;
FAddendCoef Coeff;
};
-
+
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
/// with its neighboring at most two instructions.
///
@@ -145,27 +155,30 @@ namespace {
public:
FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
Value *simplify(Instruction *FAdd);
-
+
private:
typedef SmallVector<const FAddend*, 4> AddendVect;
-
+
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-
+
/// Return the number of instructions needed to emit the N-ary addition.
unsigned calcInstrNumber(const AddendVect& Vect);
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
-
+
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
-
+
private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
@@ -177,7 +190,7 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+}
//===----------------------------------------------------------------------===//
//
@@ -200,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) {
} else
*P = C;
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
}
-void FAddendCoef::operator=(const FAddendCoef& That) {
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
if (That.isInt())
set(That.IntVal);
else
@@ -219,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
getFpVal().add(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
return;
}
-
+
APFloat &T = getFpVal();
- T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
void FAddendCoef::operator-=(const FAddendCoef &That) {
@@ -240,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
getFpVal().subtract(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().subtract(T, RndMode);
return;
}
APFloat &T = getFpVal();
- T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
}
void FAddendCoef::operator*=(const FAddendCoef &That) {
@@ -268,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
return;
}
- const fltSemantics &Semantic =
+ const fltSemantics &Semantic =
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
- set(APFloat(Semantic, IntVal));
+ convertToFpType(Semantic);
APFloat &F0 = getFpVal();
if (That.isInt())
- F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
else
F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
@@ -302,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
// A - B <1, A>, <1,B>
// 0 - B <-1, B>
// C * A, <C, A>
-// A + C <1, A> <C, NULL>
+// A + C <1, A> <C, NULL>
// 0 +/- 0 <0, NULL> (corner case)
//
// Legend: A and B are not constant, C is constant
-//
+//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
Instruction *I = 0;
@@ -377,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
return 0;
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
- if (!BreakNum || Coeff.isOne())
+ if (!BreakNum || Coeff.isOne())
return BreakNum;
Addend0.Scale(Coeff);
@@ -388,6 +426,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -398,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
- // Save the instruction before calling other member-functions.
+ // Save the instruction before calling other member-functions.
Instr = I;
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -409,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
unsigned Opnd0_ExpNum = 0;
unsigned Opnd1_ExpNum = 0;
- if (!Opnd0.isConstant())
+ if (!Opnd0.isConstant())
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -431,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -471,7 +581,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -479,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
- // For saving intermediate results;
+ // For saving intermediate results;
unsigned NextTmpIdx = 0;
FAddend TmpResult[3];
@@ -495,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
AddendVect SimpVect;
// The outer loop works on one symbolic-value at a time. Suppose the input
- // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
// The symbolic-values will be processed in this order: x, y, z.
//
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -522,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = 0;
SimpVect.push_back(T);
}
}
@@ -535,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
R += *SimpVect[Idx];
// Pop all addends being folded and push the resulting folded addend.
- SimpVect.resize(StartIdx);
+ SimpVect.resize(StartIdx);
if (Val != 0) {
if (!R.isZero()) {
SimpVect.push_back(&R);
@@ -548,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -570,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
assert(!Opnds.empty() && "Expect at least one addend");
// Step 1: Check if the # of instructions needed exceeds the quota.
- //
+ //
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
return 0;
@@ -591,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
// Iterate the addends, creating fadd/fsub using adjacent two addends.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
I != E; I++) {
- bool NeedNeg;
+ bool NeedNeg;
Value *V = createAddendVal(**I, NeedNeg);
if (!LastVal) {
LastVal = V;
@@ -617,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
}
#ifndef NDEBUG
- assert(CreateInstrNum == InstrNeeded &&
+ assert(CreateInstrNum == InstrNeeded &&
"Inconsistent in instruction numbers");
#endif
@@ -627,7 +738,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +751,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -665,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
// Adjust the number of instructions needed to emit the N-ary add.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -853,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1111,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4332467371..ec75dd2e04 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -22,8 +22,8 @@ using namespace PatternMatch;
/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue() + 1);
}
/// SubOne - Subtract one from a ConstantInt.
static Constant *SubOne(ConstantInt *C) {
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
return 0;
}
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi). In practice, we emit the more efficient
/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return 0;
+
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- if (LHSCst == SubOne(RHSCst)) {
- // (X == 13 | X == 14) -> X-13 <u 2
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return Builder->CreateICmpULT(Add, AddCST);
- }
-
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
// (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd278..78b4a2c6c9 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+ const AttributeSet &NewPAL =
+ AttributeSet::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a960ab2499..2ee1278d23 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
@@ -1604,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1625,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ unsigned Elt = 0;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
@@ -1647,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6eca399a40..518a8323b6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
}
}
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+ if (!ICmpInst::isSigned(pred))
+ return false;
+
+ if (RHS->isZero())
+ return ICmpInst::isRelational(pred);
+
+ if (RHS->isOne()) {
+ if (pred == ICmpInst::ICMP_SLT) {
+ pred = ICmpInst::ICMP_SLE;
+ return true;
+ }
+ } else if (RHS->isAllOnesValue()) {
+ if (pred == ICmpInst::ICMP_SGT) {
+ pred = ICmpInst::ICMP_SGE;
+ return true;
+ }
+ }
+
+ return false;
+}
+
// isHighOnes - Return true if the constant is of the form 1+0+.
// This is the same as lowones(~X).
static bool isHighOnes(const ConstantInt *CI) {
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
- // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+ // If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
- if (ArrayElementCount <= 32 ||
- (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
- Type *Ty;
- if (ArrayElementCount <= 32)
+ {
+ Type *Ty = 0;
+
+ // Look for an appropriate type:
+ // - The type of Idx if the magic fits
+ // - The smallest fitting legal type if we have a DataLayout
+ // - Default to i32
+ if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+ Ty = Idx->getType();
+ else if (TD)
+ Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
- else
- Ty = Type::getInt64Ty(Init->getContext());
- Value *V = Builder->CreateIntCast(Idx, Ty, false);
- V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
- V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
- return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+ if (Ty != 0) {
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
}
return 0;
@@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
break;
}
+ case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
+ ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!Val) break;
+
+ // If this is a signed comparison to 0 and the mul is sign preserving,
+ // use the mul LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) && !Val->isZero() &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(Val->isNegative() ?
+ ICmpInst::getSwappedPredicate(pred) : pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ break;
+ }
+
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!ShAmt) break;
@@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getLShr(RHS, ShAmt));
+ // If the shift is NSW and we compare to 0, then it is just shifting out
+ // sign bits, no need for an AND either.
+ if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (LHSI->hasOneUse() &&
@@ -1333,18 +1399,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- // @LOCALMOD-BEGIN
- // We don't want to introduce non-power-of-two integer sizes for PNaCl's
- // stable wire format, so modify this transformation for NaCl.
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt &&
- isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8) {
- // @LOCALMOD-END
+ if (LHSI->hasOneUse() &&
+ // @LOCALMOD-BEGIN
+ // We don't want to introduce non-power-of-two integer sizes for PNaCl's
+ // stable wire format, so modify this transformation for NaCl.
+ isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8 &&
+ // @LOCALMOD-END
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
@@ -1536,6 +1603,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(pred, X, NegX);
}
}
+ break;
+ case Instruction::Mul:
+ if (RHSV == 0 && BO->hasNoSignedWrap()) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // The trivial case (mul X, 0) is handled by InstSimplify
+ // General case : (mul X, C) != 0 iff X != 0
+ // (mul X, C) == 0 iff X == 0
+ if (!BOC->isZero())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+ break;
default: break;
}
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
@@ -2416,6 +2496,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, Y, Z);
}
+ // icmp slt (X + -1), Y -> icmp sle X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+ // icmp sge (X + -1), Y -> icmp sgt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+ // icmp sle (X + 1), Y -> icmp slt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+ // icmp sgt (X + 1), Y -> icmp sge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+ // if C1 has greater magnitude than C2:
+ // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // s.t. C3 = C1 - C2
+ //
+ // if C2 has greater magnitude than C1:
+ // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // s.t. C3 = C2 - C1
+ if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+ const APInt &AP1 = C1->getValue();
+ const APInt &AP2 = C2->getValue();
+ if (AP1.isNegative() == AP2.isNegative()) {
+ APInt AP1Abs = C1->getValue().abs();
+ APInt AP2Abs = C2->getValue().abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+ Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+ Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
+ }
+
+
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
A = 0; B = 0; C = 0; D = 0;
@@ -2549,6 +2678,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
{ Value *A, *B;
+ // Transform (A & ~B) == 0 --> (A & B) != 0
+ // and (A & ~B) != 0 --> (A & B) == 0
+ // if A is a power of 2.
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+ return new ICmpInst(I.getInversePredicate(),
+ Builder->CreateAnd(A, B),
+ Op1);
+
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe32a8..e2d7966cb3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
- IsOffset || !GEP->hasAllZeroIndices()))
+ if (!isOnlyCopiedFromConstantGlobal(
+ GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
return false;
continue;
}
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
+ SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// If the two stores had the same TBAA tag, preserve it.
if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
OtherStore->getMetadata(LLVMContext::MD_tbaa))))
NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
// Nuke the old stores.
EraseInstFromFunction(SI);
EraseInstFromFunction(*OtherStore);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8e4267f898..ecc9fc3e45 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// if this is safe. For example, the use could be in dynamically unreached
// code.
if (!V->hasOneUse()) return 0;
-
+
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
-
+
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
I->setOperand(0, V2);
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
I->setIsExact();
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
I->setHasNoUnsignedWrap();
MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// TODO: Lots more we could do here:
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
-
+
return MadeChange ? V : 0;
}
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
LHSExt = LHSExt.zext(W * 2);
RHSExt = RHSExt.zext(W * 2);
}
-
+
APInt MulExt = LHSExt * RHSExt;
-
+
if (!sign)
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-
+
APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
+
// ((X << C1)*C2) == (X * (C2 << C1))
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
if (SI->getOpcode() == Instruction::Shl)
if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
return BinaryOperator::CreateMul(SI->getOperand(0),
ConstantExpr::getShl(CI, ShOp));
-
+
const APInt &Val = CI->getValue();
if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
return Shl;
}
-
+
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
}
-
+
// Simplify mul instructions with a constant RHS.
- if (isa<Constant>(Op1)) {
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *Op1C = Op1;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
if (!BO ||
- (BO->getOpcode() != Instruction::UDiv &&
+ (BO->getOpcode() != Instruction::UDiv &&
BO->getOpcode() != Instruction::SDiv)) {
Op1C = Op0;
BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_Shl(m_One(), m_Value(Y))))
return BinaryOperator::CreateShl(Op0, Y);
}
-
+
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-
+
Value *BoolCast = 0, *OtherOp = 0;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
return;
if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
return;
-
+
ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
if (CFP && CFP->isExactlyValue(0.5)) {
Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
CFP = dyn_cast<ConstantFP>(I->getOperand(1));
if (CFP && CFP->isExactlyValue(0.5))
Y = I->getOperand(0);
-}
+}
/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
/// true iff the given value is FMul or FDiv with one and only one operand
/// being a normal constant (i.e. not Zero/NaN/Infinity).
static bool isFMulOrFDivWithConstant(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || (I->getOpcode() != Instruction::FMul &&
+ if (!I || (I->getOpcode() != Instruction::FMul &&
I->getOpcode() != Instruction::FDiv))
return false;
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the
+/// This function is to simplify "FMulOrDiv * C" and returns the
/// resulting expression. Note that this function could return NULL in
/// case the constants cannot be folded into a normal floating-point.
-///
+///
Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
Instruction *InsertBefore) {
assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
if (isNormalFp(F)) {
R = BinaryOperator::CreateFMul(Opnd0, F);
} else {
- // (X / C1) * C => X / (C1/C)
+ // (X / C1) * C => X / (C1/C)
Constant *F = ConstantExpr::getFDiv(C1, C);
if (isNormalFp(cast<ConstantFP>(F)))
R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2)
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
Instruction *FAddSub = dyn_cast<Instruction>(Op0);
if (FAddSub &&
(FAddSub->getOpcode() == Instruction::FAdd ||
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C0) {
std::swap(C0, C1);
std::swap(Opnd0, Opnd1);
- Swap = true;
+ Swap = true;
}
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
- Value *M0 = ConstantExpr::getFMul(C1, C);
- Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ?
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
+ // The purpose is two-fold:
// 1) to form a power expression (of X).
// 2) potentially shorten the critical path: After transformation, the
// latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // B * (uitofp i1 C) -> select C, B, 0
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *B, *C;
+ if (!match(RHS, m_UIToFp(m_Value(C))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ B = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(B->getType());
+ return SelectInst::Create(C, B, Zero);
+ }
+ }
+
+ // A * (1 - uitofp i1 C) -> select C, 0, A
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *A, *C;
+ if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+ C->getType()->isIntegerTy(1)) {
+ A = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(A->getType());
+ return SelectInst::Create(C, Zero, A);
+ }
+ }
+
if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);
else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
/// instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-
+
// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
int NonNullOperand = -1;
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
if (ST->isNullValue())
NonNullOperand = 1;
-
+
if (NonNullOperand == -1)
return false;
-
+
Value *SelectCond = SI->getOperand(0);
-
+
// Change the div/rem to use 'Y' instead of the select.
I.setOperand(1, SI->getOperand(NonNullOperand));
-
+
// Okay, we know we replace the operand of the div/rem with 'Y' with no
// problem. However, the select, or the condition of the select may have
// multiple uses. Based on our knowledge that the operand must be non-zero,
// propagate the known value for the select into other uses of it, and
// propagate a known value of the condition into its other users.
-
+
// If the select and condition only have a single use, don't bother with this,
// early exit.
if (SI->use_empty() && SelectCond->hasOneUse())
return true;
-
+
// Scan the current block backward, looking for other uses of SI.
BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-
+
while (BBI != BBFront) {
--BBI;
// If we found a call to a function, we can't assume it will return, so
// information from below it cannot be propagated above it.
if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
break;
-
+
// Replace uses of the select or its condition with the known values.
for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
Worklist.Add(BBI);
}
}
-
+
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
SI = 0;
if (&*BBI == SelectCond)
SelectCond = 0;
-
+
// If we ran out of things to eliminate, break out of the loop.
if (SelectCond == 0 && SI == 0)
break;
-
+
}
return true;
}
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
I.setOperand(1, V);
return &I;
}
-
+
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
-
- {
+
+ {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(),
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, N);
}
}
-
+
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
{ Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Construct the "on true" case of the select
Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
I.isExact());
-
+
// Construct the "on false" case of the select
Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
I.isExact());
-
+
// construct the select instruction and return it.
return SelectInst::Create(Cond, TSI, FSI);
}
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
-
+
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
}
-
+
return 0;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
/// FP value and:
-/// 1) 1/C is exact, or
+/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
/// If the convertion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
const APFloat &FpVal = Divisor->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
-
+
if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
(void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Constant *C = ConstantExpr::getFMul(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
if (F.isNormal() && !F.isDenormal()) {
- Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
AllowReciprocal);
if (!Res)
- Res = BinaryOperator::CreateFDiv(X, C);
+ Res = BinaryOperator::CreateFDiv(X, C);
}
}
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Fold) {
const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
if (FoldC.isNormal() && !FoldC.isDenormal()) {
- Instruction *R = CreateDiv ?
+ Instruction *R = CreateDiv ?
BinaryOperator::CreateFDiv(Fold, X) :
BinaryOperator::CreateFMul(X, Fold);
R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
-
+
// X urem C^2 -> X and C-1
{ const APInt *C;
if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
ConstantInt::get(I.getType(), *C-1));
}
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
-
+
if (Value *RHSNeg = dyn_castNegVal(Op1))
if (!isa<Constant>(RHSNeg) ||
(isa<ConstantInt>(RHSNeg) &&
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998cca7..bd14e81c3f 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
unsigned Opc = FirstInst->getOpcode();
Value *LHSVal = FirstInst->getOperand(0);
Value *RHSVal = FirstInst->getOperand(1);
-
+
Type *LHSType = LHSVal->getType();
Type *RHSType = RHSVal->getType();
-
+
bool isNUW = false, isNSW = false, isExact = false;
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
} else if (PossiblyExactOperator *PEO =
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
if (isExact)
isExact = cast<PossiblyExactOperator>(I)->isExact();
-
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
return 0;
-
+
// Otherwise, this is safe to transform!
-
+
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewLHS, PN);
LHSVal = NewLHS;
}
-
+
if (RHSVal == 0) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewRHS, PN);
RHSVal = NewRHS;
}
-
+
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
}
-
+
if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-
- SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
// This is true if all GEP bases are allocas and if all indices into them are
// constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// more than one phi, which leads to higher register pressure. This is
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
-
+
bool AllInBounds = true;
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
return 0;
AllInBounds &= GEP->isInBounds();
-
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
!GEP->hasAllConstantIndices()))
AllBasePointersAreAllocas = false;
-
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
continue;
-
+
// Don't merge two GEPs when two operands differ (introducing phi nodes)
// if one of the PHIs has a constant for the index. The index may be
// substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
return 0;
-
+
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
return 0;
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
NeededPhi = true;
}
}
-
+
// If all of the base pointers of the PHI'd GEPs are from allocas, don't
// bother doing this transformation. At best, this will just save a bit of
// offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
return 0;
-
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-
+
bool HasAnyPHIs = false;
for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
if (FixedOperands[i]) continue; // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
FirstOp->getName()+".pn");
InsertNewInstBefore(NewPN, PN);
-
+
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[i] = NewPN;
FixedOperands[i] = NewPN;
HasAnyPHIs = true;
}
-
+
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
BasicBlock *InBB = PN.getIncomingBlock(i);
-
+
for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
if (PHINode *OpPhi = OperandPhis[op])
OpPhi->addIncoming(InGEP->getOperand(op), InBB);
}
}
-
+
Value *Base = FixedOperands[0];
- GetElementPtrInst *NewGEP =
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
-
+
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
isAddressTaken = true;
break;
}
-
+
if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
-
+
// If this load is a load from a GEP with a constant offset from an alloca,
// then we don't want to sink it. In its present form, it will be
// load [constant stack offset]. Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
return false;
-
+
return true;
}
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
bool isVolatile = FirstLI->isVolatile();
unsigned LoadAlignment = FirstLI->getAlignment();
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-
+
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
return 0;
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
+
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
return 0;
-
- // We can't sink the load if the loaded value could be modified between
+
+ // We can't sink the load if the loaded value could be modified between
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
return 0;
-
+
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
return 0;
-
+
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
}
-
+
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
PN.getNumIncomingValues(),
PN.getName()+".in");
-
+
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-
+
// Add all operands to the new PHI.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InVal = 0;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
-
+
Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewPN, PN);
PhiVal = NewPN;
}
-
+
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-
+
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return FoldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
return FoldPHIArgLoadIntoPHI(PN);
-
+
// Scan the instruction, looking for input operations that can be folded away.
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Constant *ConstantOp = 0;
Type *CastSrcTy = 0;
bool isNUW = false, isNSW = false, isExact = false;
-
+
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return 0;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
- // Can fold binop, compare or shift here if the RHS is a constant,
+ // Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
-
+
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
NewCI->setDebugLoc(FirstInst->getDebugLoc());
return NewCI;
}
-
+
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
BinOp->setDebugLoc(FirstInst->getDebugLoc());
return BinOp;
}
-
+
CmpInst *CIOp = cast<CmpInst>(FirstInst);
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
// Remember this node, and if we find the cycle, return.
if (!PotentiallyDeadPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (PotentiallyDeadPHIs.size() == 16)
return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
/// PHIsEqualValue - Return true if this phi node is always equal to
/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
// See if we already saw this PHI node.
if (!ValueEqualPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (ValueEqualPHIs.size() == 16)
return false;
-
+
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
} else if (Op != NonPhiInVal)
return false;
}
-
+
return true;
}
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
-
+
PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
: PHIId(pn), Shift(Sh), Inst(User) {}
-
+
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
RHS.Inst->getType()->getPrimitiveSizeInBits();
}
};
-
+
struct LoweredPHIRecord {
PHINode *PN; // The PHI that was lowered.
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
-
+
LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
: PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-
+
// Ctor form used by DenseMap.
LoweredPHIRecord(PHINode *pn, unsigned Sh)
: PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHIUsers - Keep track of all of the truncated values extracted from a set
// of PHIs, along with their offset. These are the things we want to rewrite.
SmallVector<PHIUsageRecord, 16> PHIUsers;
-
+
// PHIs are often mutually cyclic, so we keep track of a whole set of PHI
// nodes which are extracted from. PHIsToSlice is a set we use to avoid
// revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
// check the uses of (to ensure they are all extracts).
SmallVector<PHINode*, 8> PHIsToSlice;
SmallPtrSet<PHINode*, 8> PHIsInspected;
-
+
PHIsToSlice.push_back(&FirstPhi);
PHIsInspected.insert(&FirstPhi);
-
+
for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
PHINode *PN = PHIsToSlice[PHIId];
-
+
// Scan the input list of the PHI. If any input is an invoke, and if the
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (II == 0) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
-
+
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
return 0;
}
-
-
+
+
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
// If the user is a PHI, inspect its uses recursively.
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (PHIsInspected.insert(UserPN))
PHIsToSlice.push_back(UserPN);
continue;
}
-
+
// Truncates are always ok.
if (isa<TruncInst>(User)) {
PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
continue;
}
-
+
// Otherwise it must be a lshr which can only be used by one trunc.
if (User->getOpcode() != Instruction::LShr ||
!User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
!isa<ConstantInt>(User->getOperand(1)))
return 0;
-
+
unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
}
}
-
+
// If we have no users, they must be all self uses, just nuke the PHI.
if (PHIUsers.empty())
return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-
+
// If this phi node is transformable, create new PHIs for all the pieces
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-
+
DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
);
-
+
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
DenseMap<BasicBlock*, Value*> PredValues;
-
+
// ExtractedVals - Each new PHI we introduce is saved here so we don't
// introduce redundant PHIs.
DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-
+
for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
unsigned PHIId = PHIUsers[UserI].PHIId;
PHINode *PN = PHIsToSlice[PHIId];
unsigned Offset = PHIUsers[UserI].Shift;
Type *Ty = PHIUsers[UserI].Inst->getType();
-
+
PHINode *EltPHI;
-
+
// If we've already lowered a user like this, reuse the previously lowered
// value.
if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-
+
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
PN->getName()+".off"+Twine(Offset), PN);
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
Value *&PredVal = PredValues[Pred];
-
+
// If we already have a value for this predecessor, reuse it.
if (PredVal) {
EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
EltPHI->addIncoming(PredVal, Pred);
continue;
}
-
+
if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
// If the incoming value was a PHI, and if it was one of the PHIs we
// already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
continue;
}
}
-
+
// Otherwise, do an extract in the predecessor.
Builder->SetInsertPoint(Pred, Pred->getTerminator());
Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Res = Builder->CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
-
+
// If the incoming value was a PHI, and if it was one of the PHIs we are
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
OldInVal)-PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
cast<Instruction>(Res)));
++UserE;
}
}
PredValues.clear();
-
+
DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
-
+
// Replace the use of this piece with the PHI node.
ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
}
-
+
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with undefs.
Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (DeadPHICycle(PU, PotentiallyDeadPHIs))
return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
}
-
+
// If this phi has a single use, and if that use just computes a value for
// the next iteration of a loop, delete the phi. This occurs with unused
// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (InValNo != NumIncomingVals) {
Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-
+
// Scan the rest of the operands to see if there are any conflicts, if so
// there is no need to recursively scan other phis.
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
}
-
+
// If we scanned over all operands, then we have one unique value plus
// phi values. Scan PHI nodes to see if they all merge in each other or
// the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
-
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a262d711d3..59502fb988 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// If this is a non-volatile load or a cast from the same type,
// merge.
if (TI->isCast()) {
- if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ Type *FIOpndTy = FI->getOperand(0)->getType();
+ if (TI->getOperand(0)->getType() != FIOpndTy)
return 0;
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
- if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
- FI->getOperand(0)->getType()->getVectorNumElements())
+ if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+ CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
return 0;
} else {
return 0; // unknown unary op.
@@ -349,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return 0;
}
+/// foldSelectICmpAndOr - We want to turn:
+/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+/// (or (shl (and X, C1), C3), y)
+/// iff:
+/// C1 and C2 are both powers of 2
+/// where:
+/// C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+
+ if (!match(CmpRHS, m_Zero()))
+ return 0;
+
+ Value *X;
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ return 0;
+
+ const APInt *C2;
+ bool OrOnTrueVal = false;
+ bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+ if (!OrOnFalseVal)
+ OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+ if (!OrOnFalseVal && !OrOnTrueVal)
+ return 0;
+
+ Value *V = CmpLHS;
+ Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+ unsigned C1Log = C1->logBase2();
+ unsigned C2Log = C2->logBase2();
+ if (C2Log > C1Log) {
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder->CreateShl(V, C2Log - C1Log);
+ } else if (C1Log > C2Log) {
+ V = Builder->CreateLShr(V, C1Log - C2Log);
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ } else
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ V = Builder->CreateXor(V, *C2);
+
+ return Builder->CreateOr(V, Y);
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -520,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : 0;
}
@@ -675,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// Change: A = select B, false, C --> A = and !B, C
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return BinaryOperator::CreateAnd(NotCond, FalseVal);
- } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ }
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
if (C->getZExtValue() == false) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -689,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select a, a, b -> a|b
if (CondVal == TrueVal)
return BinaryOperator::CreateOr(CondVal, FalseVal);
- else if (CondVal == FalseVal)
+ if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
// select a, ~a, b -> (~a)&b
// select a, b, ~a -> (~a)|b
if (match(TrueVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateAnd(TrueVal, FalseVal);
- else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ if (match(FalseVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
@@ -837,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NewFalseOp = NegVal;
if (AddOp != TI)
std::swap(NewTrueOp, NewFalseOp);
- Value *NewSel =
+ Value *NewSel =
Builder->CreateSelect(CondVal, NewTrueOp,
NewFalseOp, SI.getName() + ".p");
@@ -861,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *LHS, *RHS, *LHS2, *RHS2;
if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
- if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -907,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -917,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
- // Form a shufflevector instruction.
- SmallVector<Constant *, 8> Mask(VWidth);
- Type *Int32Ty = Type::getInt32Ty(CV->getContext());
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elem = cast<Constant>(CV->getOperand(i));
- if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
- Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
- else if (isa<UndefValue>(Elem))
- Mask[i] = UndefValue::get(Int32Ty);
- else
- return 0;
- }
- Constant *MaskVal = ConstantVector::get(Mask);
- Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
- return ReplaceInstUsesWith(SI, V);
- }
-
if (isa<ConstantAggregateZero>(CondVal)) {
return ReplaceInstUsesWith(SI, FalseVal);
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1a4b..4301ddb5aa 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
return 0;
}
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+ // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+ if (!PN->hasNUses(2))
+ return NULL;
+
+ // If so, it's known at this point that one operand is PHI and the other is
+ // an extractelement node. Find the PHI user that is not the extractelement
+ // node.
+ Value::use_iterator iu = PN->use_begin();
+ Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+ if (PHIUser == cast<Instruction>(&EI))
+ PHIUser = cast<Instruction>(*(++iu));
+
+ // Verify that this PHI user has one use, which is the PHI itself,
+ // and that it is a binary operation which is cheap to scalarize.
+ // otherwise return NULL.
+ if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !CheapToScalarize(PHIUser, true))
+ return NULL;
+
+ // Create a scalar PHI node that will replace the vector PHI node
+ // just before the current PHI node.
+ PHINode * scalarPHI = cast<PHINode>(
+ InsertNewInstWith(PHINode::Create(EI.getType(),
+ PN->getNumIncomingValues(), ""), *PN));
+ // Scalarize each PHI operand.
+ for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ Value *PHIInVal = PN->getIncomingValue(i);
+ BasicBlock *inBB = PN->getIncomingBlock(i);
+ Value *Elt = EI.getIndexOperand();
+ // If the operand is the PHI induction variable:
+ if (PHIInVal == PHIUser) {
+ // Scalarize the binary operation. Its first operand is the
+ // scalar PHI and the second operand is extracted from the other
+ // vector operand.
+ BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+ unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+ Value *Op = Builder->CreateExtractElement(
+ B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ Value *newPHIUser = InsertNewInstWith(
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+ *B0);
+ scalarPHI->addIncoming(newPHIUser, inBB);
+ } else {
+ // Scalarize PHI input:
+ Instruction *newEI =
+ ExtractElementInst::Create(PHIInVal, Elt, "");
+ // Insert the new instruction into the predecessor basic block.
+ Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+ BasicBlock::iterator InsertPos;
+ if (pos && !isa<PHINode>(pos)) {
+ InsertPos = pos;
+ ++InsertPos;
+ } else {
+ InsertPos = inBB->getFirstInsertionPt();
+ }
+
+ InsertNewInstWith(newEI, *InsertPos);
+
+ scalarPHI->addIncoming(newEI, inBB);
+ }
+ }
+ return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
+
+ // If there's a vector PHI feeding a scalar use through this extractelement
+ // instruction, try to scalarize the PHI.
+ if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return (scalarPHI);
+ }
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement)
// bitcasts can change the number of vector elements and they cost nothing
- if (CI->hasOneUse() && EI.hasOneUse() &&
- (CI->getOpcode() != Instruction::BitCast)) {
+ if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
+ Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
}
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Update Mask to reflect that `ScalarOp' has been inserted at
+ // position `InsertedIdx' within the vector returned by IEI.
+ Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3e91..ec10751202 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
Module *M = II->getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
- ArrayRef<Value *>(), "", II->getParent());
+ None, "", II->getParent());
}
return EraseInstFromFunction(MI);
}