aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp217
1 files changed, 140 insertions, 77 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 344d1447a8..db8ae6ea06 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -60,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
return Res;
}
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unknown FP format");
- case MVT::f16: return &APFloat::IEEEhalf;
- case MVT::f32: return &APFloat::IEEEsingle;
- case MVT::f64: return &APFloat::IEEEdouble;
- case MVT::f80: return &APFloat::x87DoubleExtended;
- case MVT::f128: return &APFloat::IEEEquad;
- case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
- }
-}
-
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
@@ -95,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
- (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
&losesInfo);
return !losesInfo;
}
@@ -1081,7 +1070,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
EltVT==MVT::f16) {
bool ignored;
APFloat apf = APFloat(Val);
- apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
} else
@@ -2442,7 +2431,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -2450,9 +2440,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
}
case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(Val), VT);
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), VT);
@@ -2499,7 +2489,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -2690,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return SDValue(N, 0);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
- EVT VT,
- ConstantSDNode *Cst1,
- ConstantSDNode *Cst2) {
- const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+ SDNode *Cst1, SDNode *Cst2) {
+ SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+ SmallVector<SDValue, 4> Outputs;
+ EVT SVT = VT.getScalarType();
- switch (Opcode) {
- case ISD::ADD: return getConstant(C1 + C2, VT);
- case ISD::SUB: return getConstant(C1 - C2, VT);
- case ISD::MUL: return getConstant(C1 * C2, VT);
- case ISD::UDIV:
- if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
- break;
- case ISD::UREM:
- if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
- break;
- case ISD::SDIV:
- if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
- break;
- case ISD::SREM:
- if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
- break;
- case ISD::AND: return getConstant(C1 & C2, VT);
- case ISD::OR: return getConstant(C1 | C2, VT);
- case ISD::XOR: return getConstant(C1 ^ C2, VT);
- case ISD::SHL: return getConstant(C1 << C2, VT);
- case ISD::SRL: return getConstant(C1.lshr(C2), VT);
- case ISD::SRA: return getConstant(C1.ashr(C2), VT);
- case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
- case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
- default: break;
+ ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+ ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+ if (Scalar1 && Scalar2) {
+ // Scalar instruction.
+ Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+ } else {
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ Inputs.push_back(std::make_pair(V1, V2));
+ }
}
- return SDValue();
+ // We have a number of constant values, constant fold them element by element.
+ for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+ const APInt &C1 = Inputs[I].first->getAPIntValue();
+ const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+ switch (Opcode) {
+ case ISD::ADD:
+ Outputs.push_back(getConstant(C1 + C2, SVT));
+ break;
+ case ISD::SUB:
+ Outputs.push_back(getConstant(C1 - C2, SVT));
+ break;
+ case ISD::MUL:
+ Outputs.push_back(getConstant(C1 * C2, SVT));
+ break;
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+ break;
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.urem(C2), SVT));
+ break;
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+ break;
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ return SDValue();
+ Outputs.push_back(getConstant(C1.srem(C2), SVT));
+ break;
+ case ISD::AND:
+ Outputs.push_back(getConstant(C1 & C2, SVT));
+ break;
+ case ISD::OR:
+ Outputs.push_back(getConstant(C1 | C2, SVT));
+ break;
+ case ISD::XOR:
+ Outputs.push_back(getConstant(C1 ^ C2, SVT));
+ break;
+ case ISD::SHL:
+ Outputs.push_back(getConstant(C1 << C2, SVT));
+ break;
+ case ISD::SRL:
+ Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+ break;
+ case ISD::SRA:
+ Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+ break;
+ case ISD::ROTL:
+ Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+ break;
+ case ISD::ROTR:
+ Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+ break;
+ default:
+ return SDValue();
+ }
+ }
+
+ // Handle the scalar case first.
+ if (Outputs.size() == 1)
+ return Outputs.back();
+
+ // Otherwise build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+ Outputs.size());
}
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
- SDValue N1, SDValue N2) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+ SDValue N2) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -3023,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
}
- if (N1C) {
- if (N2C) {
- SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
- if (SV.getNode()) return SV;
- } else { // Cannonicalize constant to RHS if commutative
- if (isCommutativeBinOp(Opcode)) {
- std::swap(N1C, N2C);
- std::swap(N1, N2);
- }
- }
+ // Perform trivial constant folding.
+ SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+ if (SV.getNode()) return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
}
// Constant fold FP operations.
@@ -3040,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
if (!N2CFP && isCommutativeBinOp(Opcode)) {
- // Cannonicalize constant to RHS if commutative
+ // Canonicalize constant to RHS if commutative.
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
} else if (N2CFP) {
@@ -3084,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
+ (void)V.convert(EVTToAPFloatSemantics(VT),
APFloat::rmNearestTiesToEven, &ignored);
return getConstantFP(V, VT);
}
@@ -3316,17 +3377,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
-/// SplatByte - Distribute ByteVal over NumBits bits.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
- APInt Val = APInt(NumBits, ByteVal);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
- return Val;
-}
-
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3335,17 +3385,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
if (VT.isInteger())
return DAG.getConstant(Val, VT);
- return DAG.getConstantFP(APFloat(Val), VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
}
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
if (NumBits > 8) {
// Use a multiplication with 0x010101... to extend the input to the
// required length.
- APInt Magic = SplatByte(NumBits, 0x01);
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
@@ -3571,6 +3622,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3807,6 +3867,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3874,6 +3935,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3928,6 +3990,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.