diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e428f0485d..75f1c18a73 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -180,6 +180,9 @@ namespace { SDOperand N3, ISD::CondCode CC); SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1, ISD::CondCode Cond, bool foldBooleans = true); + + SDOperand BuildSDIV(SDNode *N); + SDOperand BuildUDIV(SDNode *N); public: DAGCombiner(SelectionDAG &D) : DAG(D), TLI(D.getTargetLoweringInfo()), AfterLegalize(false) {} @@ -189,6 +192,178 @@ public: }; } +struct ms { + int64_t m; // magic number + int64_t s; // shift amount +}; + +struct mu { + uint64_t m; // magic number + int64_t a; // add indicator + int64_t s; // shift amount +}; + +/// magic - calculate the magic numbers required to codegen an integer sdiv as +/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, +/// or -1. +static ms magic32(int32_t d) { + int32_t p; + uint32_t ad, anc, delta, q1, r1, q2, r2, t; + const uint32_t two31 = 0x80000000U; + struct ms mag; + + ad = abs(d); + t = two31 + ((uint32_t)d >> 31); + anc = t - 1 - t%ad; // absolute value of nc + p = 31; // initialize p + q1 = two31/anc; // initialize q1 = 2p/abs(nc) + r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = two31/ad; // initialize q2 = 2p/abs(d) + r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = 2*q1; // update q1 = 2p/abs(nc) + r1 = 2*r1; // update r1 = rem(2p/abs(nc)) + if (r1 >= anc) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2*q2; // update q2 = 2p/abs(d) + r2 = 2*r2; // update r2 = rem(2p/abs(d)) + if (r2 >= ad) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + mag.m = (int32_t)(q2 + 1); // make sure to sign extend + if (d < 0) mag.m = -mag.m; // resulting magic number + mag.s = p - 32; // resulting shift + return mag; +} + +/// magicu - calculate the magic numbers required to codegen an integer udiv as +/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. +static mu magicu32(uint32_t d) { + int32_t p; + uint32_t nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + nc = - 1 - (-d)%d; + p = 31; // initialize p + q1 = 0x80000000/nc; // initialize q1 = 2p/nc + r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) + q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d + r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1 >= nc - r1 ) { + q1 = 2*q1 + 1; // update q1 + r1 = 2*r1 - nc; // update r1 + } + else { + q1 = 2*q1; // update q1 + r1 = 2*r1; // update r1 + } + if (r2 + 1 >= d - r2) { + if (q2 >= 0x7FFFFFFF) magu.a = 1; + q2 = 2*q2 + 1; // update q2 + r2 = 2*r2 + 1 - d; // update r2 + } + else { + if (q2 >= 0x80000000) magu.a = 1; + q2 = 2*q2; // update q2 + r2 = 2*r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - 32; // resulting shift + return magu; +} + +/// magic - calculate the magic numbers required to codegen an integer sdiv as +/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, +/// or -1. +static ms magic64(int64_t d) { + int64_t p; + uint64_t ad, anc, delta, q1, r1, q2, r2, t; + const uint64_t two63 = 9223372036854775808ULL; // 2^63 + struct ms mag; + + ad = llabs(d); + t = two63 + ((uint64_t)d >> 63); + anc = t - 1 - t%ad; // absolute value of nc + p = 63; // initialize p + q1 = two63/anc; // initialize q1 = 2p/abs(nc) + r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = two63/ad; // initialize q2 = 2p/abs(d) + r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = 2*q1; // update q1 = 2p/abs(nc) + r1 = 2*r1; // update r1 = rem(2p/abs(nc)) + if (r1 >= anc) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2*q2; // update q2 = 2p/abs(d) + r2 = 2*r2; // update r2 = rem(2p/abs(d)) + if (r2 >= ad) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + mag.m = q2 + 1; + if (d < 0) mag.m = -mag.m; // resulting magic number + mag.s = p - 64; // resulting shift + return mag; +} + +/// magicu - calculate the magic numbers required to codegen an integer udiv as +/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. +static mu magicu64(uint64_t d) +{ + int64_t p; + uint64_t nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + nc = - 1 - (-d)%d; + p = 63; // initialize p + q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc + r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) + q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d + r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1 >= nc - r1 ) { + q1 = 2*q1 + 1; // update q1 + r1 = 2*r1 - nc; // update r1 + } + else { + q1 = 2*q1; // update q1 + r1 = 2*r1; // update r1 + } + if (r2 + 1 >= d - r2) { + if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; + q2 = 2*q2 + 1; // update q2 + r2 = 2*r2 + 1 - d; // update r2 + } + else { + if (q2 >= 0x8000000000000000ull) magu.a = 1; + q2 = 2*q2; // update q2 + r2 = 2*r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - 64; // resulting shift + return magu; +} + /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Op and Mask are known to /// be the same type. @@ -608,12 +783,23 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) { if (MaskedValueIsZero(N1, SignBit, TLI) && MaskedValueIsZero(N0, SignBit, TLI)) return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1); + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + // FIXME: This currently opts out powers of two, since targets can often be + // more clever in those cases. In an idea world, we would have some way to + // detect that too. + if (N1C && !isPowerOf2_64(N1C->getSignExtended()) && + (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && + TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isIntDivExpensive()) { + return BuildSDIV(N); + } return SDOperand(); } SDOperand DAGCombiner::visitUDIV(SDNode *N) { SDOperand N0 = N->getOperand(0); SDOperand N1 = N->getOperand(1); + MVT::ValueType VT = N->getValueType(0); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); @@ -626,6 +812,10 @@ SDOperand DAGCombiner::visitUDIV(SDNode *N) { return DAG.getNode(ISD::SRL, N->getValueType(0), N0, DAG.getConstant(Log2_64(N1C->getValue()), TLI.getShiftAmountTy())); + // fold (udiv x, c) -> alternate + if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) && + TLI.isIntDivExpensive()) + return BuildUDIV(N); return SDOperand(); } @@ -2356,6 +2546,79 @@ SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0, return SDOperand(); } +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand DAGCombiner::BuildSDIV(SDNode *N) { + MVT::ValueType VT = N->getValueType(0); + assert((VT == MVT::i32 || VT == MVT::i64) && + "BuildSDIV only operates on i32 or i64!"); + + int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); + ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); + + // Multiply the numerator (operand 0) by the magic value + SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + // If d > 0 and m < 0, add the numerator + if (d > 0 && magics.m < 0) { + Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); + WorkList.push_back(Q.Val); + } + // If d < 0 and m > 0, subtract the numerator. + if (d < 0 && magics.m > 0) { + Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); + WorkList.push_back(Q.Val); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, VT, Q, + DAG.getConstant(magics.s, TLI.getShiftAmountTy())); + WorkList.push_back(Q.Val); + } + // Extract the sign bit and add it to the quotient + SDOperand T = + DAG.getNode(ISD::SRL, MVT::i32, Q, + DAG.getConstant(MVT::getSizeInBits(VT)-1, + TLI.getShiftAmountTy())); + WorkList.push_back(T.Val); + return DAG.getNode(ISD::ADD, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand DAGCombiner::BuildUDIV(SDNode *N) { + MVT::ValueType VT = N->getValueType(0); + assert((VT == MVT::i32 || VT == MVT::i64) && + "BuildUDIV only operates on i32 or i64!"); + + uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); + mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); + + // Multiply the numerator (operand 0) by the magic value + SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + WorkList.push_back(Q.Val); + + if (magics.a == 0) { + return DAG.getNode(ISD::SRL, VT, Q, + DAG.getConstant(magics.s, TLI.getShiftAmountTy())); + } else { + SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); + WorkList.push_back(NPQ.Val); + NPQ = DAG.getNode(ISD::SRL, VT, NPQ, + DAG.getConstant(1, TLI.getShiftAmountTy())); + WorkList.push_back(NPQ.Val); + NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); + WorkList.push_back(NPQ.Val); + return DAG.getNode(ISD::SRL, VT, NPQ, + DAG.getConstant(magics.s-1, TLI.getShiftAmountTy())); + } +} + // SelectionDAG::Combine - This is the entry point for the file. // void SelectionDAG::Combine(bool RunningAfterLegalize) { |