aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-09-18 15:07:33 -0700
committerDerek Schuff <dschuff@chromium.org>2012-09-18 15:07:33 -0700
commitef9bd62c68c2c279926e66058bc5a6ef62150432 (patch)
tree247f92a4e885f15dbcd37685fa9dbaa7c1db7c67 /lib/CodeGen/SelectionDAG
parent5868d5c7576d519a0bba685e7a0471ee306f5eee (diff)
parent8e70b5506ec0d7a6c2740bc89cd1b8f12a78b24f (diff)
Merge commit '8e70b5506ec0d7a6c2740bc89cd1b8f12a78b24f'
Conflicts: include/llvm/CodeGen/ISDOpcodes.h include/llvm/Target/Target.td include/llvm/Target/TargetLowering.h include/llvm/Target/TargetOpcodes.h lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp lib/Target/Mips/MipsMCInstLower.cpp utils/TableGen/CodeGenTarget.cpp
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp235
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp43
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp73
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp116
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h15
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp24
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp5
16 files changed, 490 insertions, 110 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 10491d98cb..e2a64cff96 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2496,8 +2496,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// lanes of the constant together.
EVT VT = Vector->getValueType(0);
unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (BitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+ SplatBitSize < BitWidth;
+ SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
}
}
@@ -5681,6 +5691,127 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // In unsafe math mode, we can fold chains of FADD's of the same value
+ // into multiplications. This transform is not safe in general because
+ // we are reducing the number of rounding steps.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul x, c), x) -> (fmul c+1, x)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+ if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
+ N0.getOperand(0) == N1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, DAG.getConstantFP(3.0, VT));
+ }
+
+ // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+
+ // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+ if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+ if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, DAG.getConstantFP(3.0, VT));
+ }
+
+ // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
+ if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
+ if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getConstantFP(4.0, VT));
+ }
+ }
+
// FADD -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
@@ -5692,8 +5823,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
-
- // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
@@ -5867,6 +5998,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
@@ -5877,6 +6009,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FMUL &&
+ N0 == N2.getOperand(0) &&
+ N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ }
+
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FMUL && N1CFP &&
+ N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+ N2);
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ AddToWorkList(RHSNeg.getNode());
+ return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ }
+ }
+
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, VT)));
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, VT)));
+ }
+
+
return SDValue();
}
@@ -6246,6 +6430,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
}
}
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ N0.getOperand(1)));
+ }
+ }
+
return SDValue();
}
@@ -7876,29 +8071,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // If the element type of the input vector is not the same as
- // the output element type, make concat_vectors based on input element
- // type and then bitcast it to the output vector type.
- //
- // In another words avoid nodes like this:
- // <NODE> v16i8 = concat_vectors v4i16 v4i16
- // Replace it with this one:
- // <NODE0> v8i16 = concat_vectors v4i16 v4i16
- // <NODE1> v16i8 = bitcast NODE0
- EVT ItemType = VecIn1.getValueType().getVectorElementType();
- if (ItemType != VT.getVectorElementType()) {
- EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
- ItemType,
- VecIn1.getValueType().getVectorNumElements()*2);
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
- VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
- } else
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
-
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
}
// If VecIn2 is unused then change it to undef.
@@ -8754,7 +8929,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// to alias with anything but itself. Provides base object and offset as
// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
- const GlobalValue *&GV, void *&CV) {
+ const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
Base = Ptr; Offset = 0; GV = 0; CV = 0;
@@ -8779,8 +8954,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
// for ConstantSDNodes since the same constant pool entry may be represented
// by multiple nodes with different offsets.
if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
- CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
- : (void *)C->getConstVal();
+ CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+ : (const void *)C->getConstVal();
Offset += C->getOffset();
return false;
}
@@ -8805,7 +8980,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
SDValue Base1, Base2;
int64_t Offset1, Offset2;
const GlobalValue *GV1, *GV2;
- void *CV1, *CV2;
+ const void *CV1, *CV2;
bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 3e18ea7ac9..b2a2a5cb25 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
- MayNeedSP);
+ MayNeedSP, AI);
}
for (; BB != EB; ++BB)
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4488d2790b..6d2cdeabd1 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
///
/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
/// the chain and glue. These operands may be implicit on the machine instr.
-static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
@@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
--N; // Ignore chain if it exists.
// Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
- for (unsigned I = N; I; --I) {
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
@@ -720,7 +722,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NumImpUses = 0;
- unsigned NodeOperands = countOperands(Node, NumImpUses);
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
@@ -870,6 +873,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -890,19 +904,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
getZExtValue();
MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ GroupIdx.push_back(MI->getNumOperands());
MI->addOperand(MachineOperand::CreateImm(Flags));
++i; // Skip the ID value.
switch (InlineAsm::getKind(Flags)) {
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
- for (; NumVals; --NumVals, ++i) {
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
@@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
- for (; NumVals; --NumVals, ++i) {
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
/*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
@@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case InlineAsm::Kind_Mem: // Addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
- for (; NumVals; --NumVals, ++i)
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
/*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MI->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
break;
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 908ebb9486..7b341700b8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2042,7 +2042,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Op0,
EVT DestVT,
DebugLoc dl) {
- if (Op0.getValueType() == MVT::i32) {
+ if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
// Get the stack frame index of a 8 byte buffer.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 94fc9761ec..37f0e60087 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -625,6 +625,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_VSETCC(SDNode* N);
+ SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 704f99bcf0..22f8d51ab2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,7 @@ class VectorLegalizer {
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
@@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
+ case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
@@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case TargetLowering::Expand:
if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::SELECT)
+ Result = ExpandSELECT(Op);
else if (Node->getOpcode() == ISD::UINT_TO_FP)
Result = ExpandUINT_TO_FLOAT(Op);
else if (Node->getOpcode() == ISD::FNEG)
@@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
return TF;
}
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ unsigned NumElem = VT.getVectorNumElements();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = TLI.getSetCCResultType(VT);
+ assert(MaskTy.isVector() && "Invalid CC type");
+ assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
+ DAG.getConstant(0, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ SmallVector<SDValue, 8> Ops(NumElem, Mask);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(true) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
- assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
+ assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
&& "Invalid mask size");
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4709202965..4095728ee0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FTRUNC:
Res = WidenVecRes_Unary(N);
break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
SetWidenedVector(SDValue(N, ResNo), Res);
}
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
unsigned Opcode = N->getOpcode();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index bf0a43785b..2b86e36991 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
case ISD::CopyToReg:
case ISD::CopyFromReg:
case ISD::EH_LABEL:
@@ -1756,6 +1758,7 @@ public:
return V;
}
+#ifndef NDEBUG
void dump(ScheduleDAG *DAG) const {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
@@ -1766,6 +1769,7 @@ public:
SU->dump(DAG);
}
}
+#endif
};
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
@@ -1893,6 +1897,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
//===----------------------------------------------------------------------===//
void RegReductionPQBase::dumpRegPressure() const {
+#ifndef NDEBUG
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
E = TRI->regclass_end(); I != E; ++I) {
const TargetRegisterClass *RC = *I;
@@ -1902,6 +1907,7 @@ void RegReductionPQBase::dumpRegPressure() const {
DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
<< '\n');
}
+#endif
}
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 748668cdf6..222dc559a2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -643,6 +643,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#ifndef NDEBUG
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
return;
@@ -659,8 +660,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
dbgs() << "\n";
GluedNodes.pop_back();
}
+#endif
}
+#ifndef NDEBUG
void ScheduleDAGSDNodes::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
@@ -669,6 +672,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const {
dbgs() << "**** NOOP ****\n";
}
}
+#endif
#ifndef NDEBUG
/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f4fe8927f6..928385a0ad 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1097,10 +1097,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
"Cannot set target flags on target-independent globals");
// Truncate (with sign-extension) the offset value to the pointer size.
- EVT PTy = TLI.getPointerTy();
- unsigned BitWidth = PTy.getSizeInBits();
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
if (BitWidth < 64)
- Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+ Offset = SignExtend64(Offset, BitWidth);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
@@ -3928,12 +3927,16 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
// For now, atomics are considered to be volatile always.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- Flags |= MachineMemOperand::MOVolatile;
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, M