diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineAddSub.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 279 |
1 files changed, 215 insertions, 64 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..166f8dfdb4 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -24,9 +24,9 @@ namespace { /// Class representing coefficient of floating-point addend. /// This class needs to be highly efficient, which is especially true for /// the constructor. As of I write this comment, the cost of the default - /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to /// perform write-merging). - /// + /// class FAddendCoef { public: // The constructor has to initialize a APFloat, which is uncessary for @@ -37,31 +37,31 @@ namespace { // FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} ~FAddendCoef(); - + void set(short C) { assert(!insaneIntVal(C) && "Insane coefficient"); IsFp = false; IntVal = C; } - + void set(const APFloat& C); - + void negate(); - + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } Value *getValue(Type *) const; - + // If possible, don't define operator+/operator- etc because these // operators inevitably call FAddendCoef's constructor which is not cheap. void operator=(const FAddendCoef &A); void operator+=(const FAddendCoef &A); void operator-=(const FAddendCoef &A); void operator*=(const FAddendCoef &S); - + bool isOne() const { return isInt() && IntVal == 1; } bool isTwo() const { return isInt() && IntVal == 2; } bool isMinusOne() const { return isInt() && IntVal == -1; } bool isMinusTwo() const { return isInt() && IntVal == -2; } - + private: bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) @@ -74,18 +74,28 @@ namespace { return *getFpValPtr(); } - APFloat &getFpVal(void) - { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } - + APFloat &getFpVal(void) { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + bool isInt() const { return !IsFp; } + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; - + // True iff FpValBuf contains an instance of APFloat. bool BufHasFpVal; - + // The integer coefficient of an individual addend is either 1 or -1, // and we try to simplify at most 4 addends from neighboring at most // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt @@ -94,7 +104,7 @@ namespace { AlignedCharArrayUnion<APFloat> FpValBuf; }; - + /// FAddend is used to represent floating-point addend. An addend is /// represented as <C, V>, where the V is a symbolic value, and C is a /// constant coefficient. A constant addend is represented as <C, 0>. @@ -102,10 +112,10 @@ namespace { class FAddend { public: FAddend() { Val = 0; } - + Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - + bool isConstant() const { return Val == 0; } bool isZero() const { return Coeff.isZero(); } @@ -114,17 +124,17 @@ namespace { { Coeff.set(Coefficient); Val = V; } void set(const ConstantFP* Coefficient, Value *V) { Coeff.set(Coefficient->getValueAPF()); Val = V; } - + void negate() { Coeff.negate(); } - + /// Drill down the U-D chain one step to find the definition of V, and /// try to break the definition into one or two addends. static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); - + /// Similar to FAddend::drillDownOneStep() except that the value being /// splitted is the addend itself. unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; - + void operator+=(const FAddend &T) { assert((Val == T.Val) && "Symbolic-values disagree"); Coeff += T.Coeff; @@ -132,12 +142,12 @@ namespace { private: void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } - + // This addend has the value of "Coeff * Val". Value *Val; FAddendCoef Coeff; }; - + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along /// with its neighboring at most two instructions. /// @@ -145,27 +155,30 @@ namespace { public: FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} Value *simplify(Instruction *FAdd); - + private: typedef SmallVector<const FAddend*, 4> AddendVect; - + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); - + /// Return the number of instructions needed to emit the N-ary addition. unsigned calcInstrNumber(const AddendVect& Vect); Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); - + InstCombiner::BuilderTy *Builder; Instruction *Instr; - + private: // Debugging stuff are clustered here. #ifndef NDEBUG @@ -177,7 +190,7 @@ namespace { void incCreateInstNum() {} #endif }; -} +} //===----------------------------------------------------------------------===// // @@ -200,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) { } else *P = C; - IsFp = BufHasFpVal = true; + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; } -void FAddendCoef::operator=(const FAddendCoef& That) { +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { if (That.isInt()) set(That.IntVal); else @@ -219,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { getFpVal().add(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); return; } - + APFloat &T = getFpVal(); - T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } void FAddendCoef::operator-=(const FAddendCoef &That) { @@ -240,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { getFpVal().subtract(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); return; } APFloat &T = getFpVal(); - T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); } void FAddendCoef::operator*=(const FAddendCoef &That) { @@ -268,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { return; } - const fltSemantics &Semantic = + const fltSemantics &Semantic = isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) - set(APFloat(Semantic, IntVal)); + convertToFpType(Semantic); APFloat &F0 = getFpVal(); if (That.isInt()) - F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); else F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); @@ -302,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const { // A - B <1, A>, <1,B> // 0 - B <-1, B> // C * A, <C, A> -// A + C <1, A> <C, NULL> +// A + C <1, A> <C, NULL> // 0 +/- 0 <0, NULL> (corner case) // // Legend: A and B are not constant, C is constant -// +// unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { Instruction *I = 0; @@ -377,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep return 0; unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); - if (!BreakNum || Coeff.isOne()) + if (!BreakNum || Coeff.isOne()) return BreakNum; Addend0.Scale(Coeff); @@ -388,6 +426,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -398,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); - // Save the instruction before calling other member-functions. + // Save the instruction before calling other member-functions. Instr = I; FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; @@ -409,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { unsigned Opnd0_ExpNum = 0; unsigned Opnd1_ExpNum = 0; - if (!Opnd0.isConstant()) + if (!Opnd0.isConstant()) Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. @@ -431,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) @@ -471,7 +581,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -479,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { unsigned AddendNum = Addends.size(); assert(AddendNum <= 4 && "Too many addends"); - // For saving intermediate results; + // For saving intermediate results; unsigned NextTmpIdx = 0; FAddend TmpResult[3]; @@ -495,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { AddendVect SimpVect; // The outer loop works on one symbolic-value at a time. Suppose the input - // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... // The symbolic-values will be processed in this order: x, y, z. // for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { @@ -522,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = 0; SimpVect.push_back(T); } } @@ -535,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { R += *SimpVect[Idx]; // Pop all addends being folded and push the resulting folded addend. - SimpVect.resize(StartIdx); + SimpVect.resize(StartIdx); if (Val != 0) { if (!R.isZero()) { SimpVect.push_back(&R); @@ -548,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { } } - assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && "out-of-bound access"); if (ConstAdd) @@ -570,7 +681,7 @@ Value *FAddCombine::createNaryFAdd assert(!Opnds.empty() && "Expect at least one addend"); // Step 1: Check if the # of instructions needed exceeds the quota. - // + // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) return 0; @@ -591,7 +702,7 @@ Value *FAddCombine::createNaryFAdd // Iterate the addends, creating fadd/fsub using adjacent two addends. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); I != E; I++) { - bool NeedNeg; + bool NeedNeg; Value *V = createAddendVal(**I, NeedNeg); if (!LastVal) { LastVal = V; @@ -617,7 +728,7 @@ Value *FAddCombine::createNaryFAdd } #ifndef NDEBUG - assert(CreateInstrNum == InstrNeeded && + assert(CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers"); #endif @@ -627,7 +738,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +751,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -665,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { unsigned OpndNum = Opnds.size(); unsigned InstrNeeded = OpndNum - 1; - // The number of addends in the form of "(-1)*x". - unsigned NegOpndNum = 0; + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; // Adjust the number of instructions needed to emit the N-ary add. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); @@ -853,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } + // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, + // transform them into (X + (signbit ^ C)) + if (XorRHS->getValue().isSignBit()) + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); } } @@ -1111,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } } + // select C, 0, B + select C, A, 0 -> select C, A, B + { + Value *A1, *B1, *C1, *A2, *B2, *C2; + if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && + match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { + if (C1 == C2) { + Constant *Z1=0, *Z2=0; + Value *A, *B, *C=C1; + if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(A1); A = A2; + Z2 = dyn_cast<Constant>(B2); B = B1; + } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(B1); B = B2; + Z2 = dyn_cast<Constant>(A2); A = A1; + } + + if (Z1 && Z2 && + (I.hasNoSignedZeros() || + (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { + return SelectInst::Create(C, A, B); + } + } + } + } + if (I.hasUnsafeAlgebra()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return ReplaceInstUsesWith(I, V); |