diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 47 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelPattern.cpp | 54 |
5 files changed, 45 insertions, 66 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cf6106dbb3..6c1d22c2ba 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -745,8 +745,7 @@ SDOperand DAGCombiner::visitMUL(SDNode *N) { return N1; // fold (mul x, -1) -> 0-x if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getValueType(0), - DAG.getConstant(0, N->getValueType(0)), N0); + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c if (N1C && isPowerOf2_64(N1C->getValue())) return DAG.getNode(ISD::SHL, N->getValueType(0), N0, @@ -777,21 +776,49 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(), N->getValueType(0)); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSignExtended() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); if (MaskedValueIsZero(N1, SignBit, TLI) && MaskedValueIsZero(N0, SignBit, TLI)) return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1); + // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1)) + if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSignExtended()) || + isPowerOf2_64(-N1C->getSignExtended()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDOperand(); + int64_t pow2 = N1C->getSignExtended(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0, + DAG.getConstant(MVT::getSizeInBits(VT)-1, + TLI.getShiftAmountTy())); + WorkList.push_back(SRL.Val); + SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL); + WorkList.push_back(SGN.Val); + SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN, + DAG.getConstant(Log2_64(abs2), + TLI.getShiftAmountTy())); + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + WorkList.push_back(SRA.Val); + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA); + } // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - // FIXME: This currently opts out powers of two, since targets can often be - // more clever in those cases. In an idea world, we would have some way to - // detect that too. - if (N1C && !isPowerOf2_64(N1C->getSignExtended()) && - (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && - TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT) && - TLI.isIntDivExpensive()) { + if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && + !TLI.isIntDivCheap() && + TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT)) { return BuildSDIV(N); } return SDOperand(); @@ -815,7 +842,7 @@ SDOperand DAGCombiner::visitUDIV(SDNode *N) { TLI.getShiftAmountTy())); // fold (udiv x, c) -> alternate if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) && - TLI.isTypeLegal(VT) && TLI.isIntDivExpensive()) + TLI.isTypeLegal(VT) && !TLI.isIntDivCheap()) return BuildUDIV(N); return SDOperand(); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c4ccbf5ef0..eaca703ad9 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -30,6 +30,8 @@ TargetLowering::TargetLowering(TargetMachine &tm) maxStoresPerMemSet = maxStoresPerMemCpy = maxStoresPerMemMove = 8; allowUnalignedMemoryAccesses = false; UseUnderscoreSetJmpLongJmp = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; } TargetLowering::~TargetLowering() {} diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4813e9d369..cc9e23c1cc 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -996,6 +996,11 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) { return SDOperand(N, 0); } case ISD::SDIV: { + // FIXME: since this depends on the setting of the carry flag from the srawi + // we should really be making notes about that for the scheduler. + // FIXME: It sure would be nice if we could cheaply recognize the + // srl/add/sra pattern the dag combiner will generate for this as + // sra/addze rather than having to handle sdiv ourselves. oh well. unsigned Imm; if (isIntImmediate(N->getOperand(1), Imm)) { if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 118c605888..c9ed2bae1e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -27,8 +27,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) // Fold away setcc operations if possible. setSetCCIsExpensive(); - // Fold constant integer div/rem into an alternate sequence of instructions - setIntDivIsExpensive(); + setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmpLongJmp(true); diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 531c33f09b..a244afd601 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -3035,60 +3035,6 @@ unsigned ISel::SelectExpr(SDOperand N) { return Result; } } - - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - // FIXME: These special cases should be handled by the lowering impl! - unsigned RHS = CN->getValue(); - bool isNeg = false; - if ((int)RHS < 0) { - isNeg = true; - RHS = -RHS; - } - if (RHS && (RHS & (RHS-1)) == 0) { // Signed division by power of 2? - unsigned Log = Log2_32(RHS); - unsigned SAROpc, SHROpc, ADDOpc, NEGOpc; - switch (N.getValueType()) { - default: assert("Unknown type to signed divide!"); - case MVT::i8: - SAROpc = X86::SAR8ri; - SHROpc = X86::SHR8ri; - ADDOpc = X86::ADD8rr; - NEGOpc = X86::NEG8r; - break; - case MVT::i16: - SAROpc = X86::SAR16ri; - SHROpc = X86::SHR16ri; - ADDOpc = X86::ADD16rr; - NEGOpc = X86::NEG16r; - break; - case MVT::i32: - SAROpc = X86::SAR32ri; - SHROpc = X86::SHR32ri; - ADDOpc = X86::ADD32rr; - NEGOpc = X86::NEG32r; - break; - } - unsigned RegSize = MVT::getSizeInBits(N.getValueType()); - Tmp1 = SelectExpr(N.getOperand(0)); - unsigned TmpReg; - if (Log != 1) { - TmpReg = MakeReg(N.getValueType()); - BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1); - } else { - TmpReg = Tmp1; - } - unsigned TmpReg2 = MakeReg(N.getValueType()); - BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(RegSize-Log); - unsigned TmpReg3 = MakeReg(N.getValueType()); - BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2); - - unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result; - BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log); - if (isNeg) - BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4); - return Result; - } - } } if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) { |