diff options
author | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
---|---|---|
committer | Eli Bendersky <eliben@chromium.org> | 2013-07-15 16:09:15 -0700 |
commit | c6cf05cb5108f356dde97c01ee4188b0671d4542 (patch) | |
tree | 436fdc2a55296d3c202e7ef11f31be3be53efb5f /lib/Transforms/InstCombine | |
parent | c75199c649c739aade160289d93f257edc798cde (diff) | |
parent | 7dfcb84fc16b3bf6b2379713b53090757f0a45f9 (diff) |
Merge commit '7dfcb84fc16b3bf6b2379713b53090757f0a45f9'
Conflicts:
docs/LangRef.rst
include/llvm/CodeGen/CallingConvLower.h
include/llvm/IRReader/IRReader.h
include/llvm/Target/TargetMachine.h
lib/CodeGen/CallingConvLower.cpp
lib/IRReader/IRReader.cpp
lib/IRReader/LLVMBuild.txt
lib/IRReader/Makefile
lib/LLVMBuild.txt
lib/Makefile
lib/Support/MemoryBuffer.cpp
lib/Support/Unix/PathV2.inc
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMInstrInfo.td
lib/Target/ARM/ARMSubtarget.cpp
lib/Target/ARM/ARMTargetMachine.cpp
lib/Target/Mips/CMakeLists.txt
lib/Target/Mips/MipsDelaySlotFiller.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsInstrInfo.td
lib/Target/Mips/MipsSubtarget.cpp
lib/Target/Mips/MipsSubtarget.h
lib/Target/X86/X86FastISel.cpp
lib/Target/X86/X86ISelDAGToDAG.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrControl.td
lib/Target/X86/X86InstrFormats.td
lib/Transforms/IPO/ExtractGV.cpp
lib/Transforms/InstCombine/InstCombineCompares.cpp
lib/Transforms/Utils/SimplifyLibCalls.cpp
test/CodeGen/X86/fast-isel-divrem.ll
test/MC/ARM/data-in-code.ll
tools/Makefile
tools/llvm-extract/llvm-extract.cpp
tools/llvm-link/CMakeLists.txt
tools/opt/CMakeLists.txt
tools/opt/LLVMBuild.txt
tools/opt/Makefile
tools/opt/opt.cpp
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 1 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 279 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 176 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 82 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 145 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombinePHI.cpp | 192 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 103 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 85 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 2 |
13 files changed, 790 insertions, 324 deletions
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index 72cfe2c985..a25696ec03 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine InstCombineMulDivRem.cpp InstCombinePHI.cpp InstCombineSelect.cpp - InstCombineShifts.cpp + InstCombineShifts.cpp InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 1f6a3a5e33..2a36074750 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -233,6 +233,7 @@ private: Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); Value *EmitGEPOffset(User *GEP); + Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); public: // InsertNewInstBefore - insert an instruction New before instruction Old diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..166f8dfdb4 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -24,9 +24,9 @@ namespace { /// Class representing coefficient of floating-point addend. /// This class needs to be highly efficient, which is especially true for /// the constructor. As of I write this comment, the cost of the default - /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to /// perform write-merging). - /// + /// class FAddendCoef { public: // The constructor has to initialize a APFloat, which is uncessary for @@ -37,31 +37,31 @@ namespace { // FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} ~FAddendCoef(); - + void set(short C) { assert(!insaneIntVal(C) && "Insane coefficient"); IsFp = false; IntVal = C; } - + void set(const APFloat& C); - + void negate(); - + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } Value *getValue(Type *) const; - + // If possible, don't define operator+/operator- etc because these // operators inevitably call FAddendCoef's constructor which is not cheap. void operator=(const FAddendCoef &A); void operator+=(const FAddendCoef &A); void operator-=(const FAddendCoef &A); void operator*=(const FAddendCoef &S); - + bool isOne() const { return isInt() && IntVal == 1; } bool isTwo() const { return isInt() && IntVal == 2; } bool isMinusOne() const { return isInt() && IntVal == -1; } bool isMinusTwo() const { return isInt() && IntVal == -2; } - + private: bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) @@ -74,18 +74,28 @@ namespace { return *getFpValPtr(); } - APFloat &getFpVal(void) - { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } - + APFloat &getFpVal(void) { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + bool isInt() const { return !IsFp; } + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; - + // True iff FpValBuf contains an instance of APFloat. bool BufHasFpVal; - + // The integer coefficient of an individual addend is either 1 or -1, // and we try to simplify at most 4 addends from neighboring at most // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt @@ -94,7 +104,7 @@ namespace { AlignedCharArrayUnion<APFloat> FpValBuf; }; - + /// FAddend is used to represent floating-point addend. An addend is /// represented as <C, V>, where the V is a symbolic value, and C is a /// constant coefficient. A constant addend is represented as <C, 0>. @@ -102,10 +112,10 @@ namespace { class FAddend { public: FAddend() { Val = 0; } - + Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - + bool isConstant() const { return Val == 0; } bool isZero() const { return Coeff.isZero(); } @@ -114,17 +124,17 @@ namespace { { Coeff.set(Coefficient); Val = V; } void set(const ConstantFP* Coefficient, Value *V) { Coeff.set(Coefficient->getValueAPF()); Val = V; } - + void negate() { Coeff.negate(); } - + /// Drill down the U-D chain one step to find the definition of V, and /// try to break the definition into one or two addends. static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); - + /// Similar to FAddend::drillDownOneStep() except that the value being /// splitted is the addend itself. unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; - + void operator+=(const FAddend &T) { assert((Val == T.Val) && "Symbolic-values disagree"); Coeff += T.Coeff; @@ -132,12 +142,12 @@ namespace { private: void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } - + // This addend has the value of "Coeff * Val". Value *Val; FAddendCoef Coeff; }; - + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along /// with its neighboring at most two instructions. /// @@ -145,27 +155,30 @@ namespace { public: FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} Value *simplify(Instruction *FAdd); - + private: typedef SmallVector<const FAddend*, 4> AddendVect; - + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); - + /// Return the number of instructions needed to emit the N-ary addition. unsigned calcInstrNumber(const AddendVect& Vect); Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); - + InstCombiner::BuilderTy *Builder; Instruction *Instr; - + private: // Debugging stuff are clustered here. #ifndef NDEBUG @@ -177,7 +190,7 @@ namespace { void incCreateInstNum() {} #endif }; -} +} //===----------------------------------------------------------------------===// // @@ -200,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) { } else *P = C; - IsFp = BufHasFpVal = true; + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; } -void FAddendCoef::operator=(const FAddendCoef& That) { +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { if (That.isInt()) set(That.IntVal); else @@ -219,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { getFpVal().add(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); return; } - + APFloat &T = getFpVal(); - T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } void FAddendCoef::operator-=(const FAddendCoef &That) { @@ -240,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { getFpVal().subtract(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); return; } APFloat &T = getFpVal(); - T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); } void FAddendCoef::operator*=(const FAddendCoef &That) { @@ -268,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { return; } - const fltSemantics &Semantic = + const fltSemantics &Semantic = isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) - set(APFloat(Semantic, IntVal)); + convertToFpType(Semantic); APFloat &F0 = getFpVal(); if (That.isInt()) - F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); else F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); @@ -302,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const { // A - B <1, A>, <1,B> // 0 - B <-1, B> // C * A, <C, A> -// A + C <1, A> <C, NULL> +// A + C <1, A> <C, NULL> // 0 +/- 0 <0, NULL> (corner case) // // Legend: A and B are not constant, C is constant -// +// unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { Instruction *I = 0; @@ -377,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep return 0; unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); - if (!BreakNum || Coeff.isOne()) + if (!BreakNum || Coeff.isOne()) return BreakNum; Addend0.Scale(Coeff); @@ -388,6 +426,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -398,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); - // Save the instruction before calling other member-functions. + // Save the instruction before calling other member-functions. Instr = I; FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; @@ -409,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { unsigned Opnd0_ExpNum = 0; unsigned Opnd1_ExpNum = 0; - if (!Opnd0.isConstant()) + if (!Opnd0.isConstant()) Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. @@ -431,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) @@ -471,7 +581,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -479,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { unsigned AddendNum = Addends.size(); assert(AddendNum <= 4 && "Too many addends"); - // For saving intermediate results; + // For saving intermediate results; unsigned NextTmpIdx = 0; FAddend TmpResult[3]; @@ -495,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { AddendVect SimpVect; // The outer loop works on one symbolic-value at a time. Suppose the input - // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... // The symbolic-values will be processed in this order: x, y, z. // for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { @@ -522,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = 0; SimpVect.push_back(T); } } @@ -535,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { R += *SimpVect[Idx]; // Pop all addends being folded and push the resulting folded addend. - SimpVect.resize(StartIdx); + SimpVect.resize(StartIdx); if (Val != 0) { if (!R.isZero()) { SimpVect.push_back(&R); @@ -548,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { } } - assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && "out-of-bound access"); if (ConstAdd) @@ -570,7 +681,7 @@ Value *FAddCombine::createNaryFAdd assert(!Opnds.empty() && "Expect at least one addend"); // Step 1: Check if the # of instructions needed exceeds the quota. - // + // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) return 0; @@ -591,7 +702,7 @@ Value *FAddCombine::createNaryFAdd // Iterate the addends, creating fadd/fsub using adjacent two addends. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); I != E; I++) { - bool NeedNeg; + bool NeedNeg; Value *V = createAddendVal(**I, NeedNeg); if (!LastVal) { LastVal = V; @@ -617,7 +728,7 @@ Value *FAddCombine::createNaryFAdd } #ifndef NDEBUG - assert(CreateInstrNum == InstrNeeded && + assert(CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers"); #endif @@ -627,7 +738,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +751,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -665,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { unsigned OpndNum = Opnds.size(); unsigned InstrNeeded = OpndNum - 1; - // The number of addends in the form of "(-1)*x". - unsigned NegOpndNum = 0; + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; // Adjust the number of instructions needed to emit the N-ary add. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); @@ -853,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } + // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, + // transform them into (X + (signbit ^ C)) + if (XorRHS->getValue().isSignBit()) + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); } } @@ -1111,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } } + // select C, 0, B + select C, A, 0 -> select C, A, B + { + Value *A1, *B1, *C1, *A2, *B2, *C2; + if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && + match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { + if (C1 == C2) { + Constant *Z1=0, *Z2=0; + Value *A, *B, *C=C1; + if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(A1); A = A2; + Z2 = dyn_cast<Constant>(B2); B = B1; + } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(B1); B = B2; + Z2 = dyn_cast<Constant>(A2); A = A1; + } + + if (Z1 && Z2 && + (I.hasNoSignedZeros() || + (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { + return SelectInst::Create(C, A, B); + } + } + } + } + if (I.hasUnsafeAlgebra()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4332467371..ec75dd2e04 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -22,8 +22,8 @@ using namespace PatternMatch; /// AddOne - Add one to a ConstantInt. -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); } /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { @@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return 0; } - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient +/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise +/// (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) + return 0; + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { @@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 <u 2 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return Builder->CreateICmpULT(Add, AddCST); - } - if (LHS->getOperand(0) == RHS->getOperand(0)) { // if LHSCst and RHSCst differ only by one bit: // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 @@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 <u 2 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return Builder->CreateICmpULT(Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64cd1bd278..78b4a2c6c9 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); + const AttributeSet &NewPAL = + AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index a960ab2499..2ee1278d23 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; @@ -1604,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1625,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } @@ -1647,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6eca399a40..518a8323b6 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, } } +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { @@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (ArrayElementCount <= 32 || - (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { - Type *Ty; - if (ArrayElementCount <= 32) + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } } return 0; @@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; } + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + |