diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 217 |
1 files changed, 140 insertions, 77 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 344d1447a8..db8ae6ea06 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -60,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { return Res; } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f16: return &APFloat::IEEEhalf; - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -95,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -1081,7 +1070,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); - apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); } else @@ -2442,7 +2431,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -2450,9 +2440,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2499,7 +2489,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -2690,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2) { - const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; + SmallVector<SDValue, 4> Outputs; + EVT SVT = VT.getScalarType(); - switch (Opcode) { - case ISD::ADD: return getConstant(C1 + C2, VT); - case ISD::SUB: return getConstant(C1 - C2, VT); - case ISD::MUL: return getConstant(C1 * C2, VT); - case ISD::UDIV: - if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); - break; - case ISD::UREM: - if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); - break; - case ISD::SDIV: - if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); - break; - case ISD::SREM: - if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); - break; - case ISD::AND: return getConstant(C1 & C2, VT); - case ISD::OR: return getConstant(C1 | C2, VT); - case ISD::XOR: return getConstant(C1 ^ C2, VT); - case ISD::SHL: return getConstant(C1 << C2, VT); - case ISD::SRL: return getConstant(C1.lshr(C2), VT); - case ISD::SRA: return getConstant(C1.ashr(C2), VT); - case ISD::ROTL: return getConstant(C1.rotl(C2), VT); - case ISD::ROTR: return getConstant(C1.rotr(C2), VT); - default: break; + ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); + ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } } - return SDValue(); + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } + } + + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, - SDValue N1, SDValue N2) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -3023,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } } - if (N1C) { - if (N2C) { - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); - if (SV.getNode()) return SV; - } else { // Cannonicalize constant to RHS if commutative - if (isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - } + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); } // Constant fold FP operations. @@ -3040,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Cannonicalize constant to RHS if commutative + // Canonicalize constant to RHS if commutative. std::swap(N1CFP, N2CFP); std::swap(N1, N2); } else if (N2CFP) { @@ -3084,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -3316,17 +3377,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } -/// SplatByte - Distribute ByteVal over NumBits bits. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3335,17 +3385,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(Val), VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); } Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. - APInt Magic = SplatByte(NumBits, 0x01); + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } @@ -3571,6 +3622,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3807,6 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3874,6 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3928,6 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. |