aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2009-06-22 23:27:02 +0000
committerBob Wilson <bob.wilson@apple.com>2009-06-22 23:27:02 +0000
commit5bafff36c798608a189c517d37527e4a38863071 (patch)
tree79bd2abbc5253e6f00db07023cf7d829cbcdee5a /lib
parent5de83afcdc3f4f0edf8caacba523f5d05ee48048 (diff)
Add support for ARM's Advanced SIMD (NEON) instruction set.
This is still a work in progress but most of the NEON instruction set is supported. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73919 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARMCallingConv.td41
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp62
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1077
-rw-r--r--lib/Target/ARM/ARMISelLowering.h72
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td119
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td10
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1665
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td127
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp18
-rw-r--r--lib/Target/ARM/README.txt20
12 files changed, 3100 insertions, 125 deletions
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 47151e667c..8a4c741faf 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -24,19 +24,29 @@ def CC_ARM_APCS : CallingConv<[
CCIfType<[i8, i16], CCPromoteToType<i32>>,
- // f64 is passed in pairs of GPRs, possibly split onto the stack
- CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
CCIfType<[i32], CCAssignToStack<4, 4>>,
- CCIfType<[f64], CCAssignToStack<8, 4>>
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 4>>
]>;
def RetCC_ARM_APCS : CallingConv<[
CCIfType<[f32], CCBitConvertToType<i32>>,
- CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
@@ -59,7 +69,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
- CCIfType<[f64], CCAssignToStack<8, 8>>
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[v2f64], CCAssignToStack<16, 8>>
]>;
def RetCC_ARM_AAPCS_Common : CallingConv<[
@@ -72,13 +83,21 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
- CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
def RetCC_ARM_AAPCS : CallingConv<[
- CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
@@ -88,6 +107,10 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>,
@@ -95,6 +118,10 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
]>;
def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index d3c3047fc9..ee9dadff15 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -32,6 +32,9 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
+static const unsigned arm_dsubreg_0 = 5;
+static const unsigned arm_dsubreg_1 = 6;
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -918,6 +921,65 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
MVT::Other, Ops, 3);
}
+
+ case ISD::CONCAT_VECTORS: {
+ MVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && Op.getNumOperands() == 2 &&
+ "unexpected CONCAT_VECTORS");
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDNode *Result =
+ CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT);
+ if (N0.getOpcode() != ISD::UNDEF)
+ Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT,
+ SDValue(Result, 0), N0,
+ CurDAG->getTargetConstant(arm_dsubreg_0,
+ MVT::i32));
+ if (N1.getOpcode() != ISD::UNDEF)
+ Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT,
+ SDValue(Result, 0), N1,
+ CurDAG->getTargetConstant(arm_dsubreg_1,
+ MVT::i32));
+ return Result;
+ }
+
+ case ISD::VECTOR_SHUFFLE: {
+ MVT VT = Op.getValueType();
+
+ // Match 128-bit splat to VDUPLANEQ. (This could be done with a Pat in
+ // ARMInstrNEON.td but it is awkward because the shuffle mask needs to be
+ // transformed first into a lane number and then to both a subregister
+ // index and an adjusted lane number.) If the source operand is a
+ // SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP.
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ if (VT.is128BitVector() && SVOp->isSplat() &&
+ Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR &&
+ Op.getOperand(1).getOpcode() == ISD::UNDEF) {
+ unsigned LaneVal = SVOp->getSplatIndex();
+
+ MVT HalfVT;
+ unsigned Opc = 0;
+ switch (VT.getVectorElementType().getSimpleVT()) {
+ default: assert(false && "unhandled VDUP splat type");
+ case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break;
+ case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break;
+ case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break;
+ case MVT::f32: Opc = ARM::VDUPLNfq; HalfVT = MVT::v2f32; break;
+ }
+
+ // The source operand needs to be changed to a subreg of the original
+ // 128-bit operand, and the lane number needs to be adjusted accordingly.
+ unsigned NumElts = VT.getVectorNumElements() / 2;
+ unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1);
+ SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32);
+ SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32);
+ SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
+ dl, HalfVT, N->getOperand(0), SR);
+ return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane);
+ }
+
+ break;
+ }
}
return SelectCode(Op);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2443625d4b..29d3da2b79 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -56,6 +56,52 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
+
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
+ }
+
+ MVT ElemTy = VT.getVectorElementType();
+ if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+ setOperationAction(ISD::VSETCC, VT, Custom);
+ if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ if (VT.isInteger()) {
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ }
+
+ // Promote all bit-wise operations.
+ if (VT.isInteger() && VT != PromotedBitwiseVT) {
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
+ }
+}
+
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, ARM::DPRRegisterClass);
+ addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, ARM::QPRRegisterClass);
+ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM), ARMPCLabelIndex(0) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
@@ -152,6 +198,30 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
+
+ if (Subtarget->hasNEON()) {
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
+
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ }
+
computeRegisterProperties();
// ARM does not have f32 extending load.
@@ -352,6 +422,36 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMDRR: return "ARMISD::FMDRR";
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+ case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEU: return "ARMISD::VCGEU";
+ case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTU: return "ARMISD::VCGTU";
+ case ARMISD::VTST: return "ARMISD::VTST";
+
+ case ARMISD::VSHL: return "ARMISD::VSHL";
+ case ARMISD::VSHRs: return "ARMISD::VSHRs";
+ case ARMISD::VSHRu: return "ARMISD::VSHRu";
+ case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
+ case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
+ case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
+ case ARMISD::VSHRN: return "ARMISD::VSHRN";
+ case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
+ case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
+ case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
+ case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
+ case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
+ case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
+ case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
+ case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
+ case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
+ case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
+ case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
+ case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
+ case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
+ case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ";
}
}
@@ -423,63 +523,93 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
#include "ARMGenCallingConv.inc"
// APCS f64 is in register pairs, possibly split to stack
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
- static const unsigned LoRegList[] = { ARM::R1,
- ARM::R2,
- ARM::R3,
- ARM::NoRegister };
-
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
- if (Reg == 0)
- return false; // we didn't handle it
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
- unsigned i;
- for (i = 0; i < 4; ++i)
- if (HiRegList[i] == Reg)
- break;
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
- if (LoRegList[i] != ARM::NoRegister)
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- MVT::i32, LocInfo));
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
State.AllocateStack(4, 4),
- MVT::i32, LocInfo));
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
return true; // we handled it
}
// AAPCS f64 is in aligned register pairs
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
- if (Reg == 0)
- return false; // we didn't handle it
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
unsigned i;
for (i = 0; i < 2; ++i)
if (HiRegList[i] == Reg)
break;
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- MVT::i32, LocInfo));
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
return true; // we handled it
}
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
@@ -492,9 +622,20 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (HiRegList[i] == Reg)
break;
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- MVT::i32, LocInfo));
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
return true; // we handled it
}
@@ -558,7 +699,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
SDValue Val;
if (VA.needsCustom()) {
- // Handle f64 as custom.
+ // Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
Chain = Lo.getValue(1);
@@ -569,6 +710,24 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(0, MVT::i32));
+
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+ DAG.getConstant(1, MVT::i32));
+ }
} else {
Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
@@ -625,6 +784,31 @@ ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
PseudoSourceValue::getStack(), LocMemOffset);
}
+void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG,
+ SDValue Chain, SDValue &Arg,
+ RegsToPassVector &RegsToPass,
+ CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &StackPtr,
+ SmallVector<SDValue, 8> &MemOpChains,
+ ISD::ArgFlagsTy Flags) {
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+ if (NextVA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(NextVA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, NextVA,
+ Chain, fmrrd.getValue(1), Flags));
+ }
+}
+
/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
/// nodes.
@@ -651,7 +835,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
// Walk the register/memloc assignments, inserting copies/loads. In the case
@@ -681,22 +865,32 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
break;
}
- // f64 is passed in i32 pairs and must be combined
+ // f64 and v2f64 are passed in i32 pairs and must be split into pieces
if (VA.needsCustom()) {
- SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
- VA = ArgLocs[++i]; // skip ahead to next loc
- if (VA.isRegLoc())
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1)));
- else {
- assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
- Chain, fmrrd.getValue(1),
- Flags));
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+
+ PassF64ArgInRegs(TheCall, DAG, Chain, Op0, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(TheCall, DAG, Chain, Op1, RegsToPass,
+ VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, Op1, Flags));
+ }
+ } else {
+ PassF64ArgInRegs(TheCall, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -864,9 +1058,28 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
break;
}
- // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
- // available.
if (VA.needsCustom()) {
+ if (VA.getLocVT() == MVT::v2f64) {
+ // Extract the first half and return it in two registers.
+ SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, MVT::i32));
+ SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(1), Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+
+ // Extract the 2nd half and fall through to handle it as an f64 value.
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, MVT::i32));
+ }
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
@@ -1117,6 +1330,40 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
}
SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+ SDValue &Root, SelectionDAG &DAG,
+ DebugLoc dl) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ TargetRegisterClass *RC;
+ if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+ SDValue ArgValue2;
+ if (NextVA.isMemLoc()) {
+ unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset());
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
+ } else {
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+SDValue
ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1141,47 +1388,45 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
// Arguments stored in registers.
if (VA.isRegLoc()) {
MVT RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (AFI->isThumbFunction())
- RC = ARM::tGPRRegisterClass;
- else
- RC = ARM::GPRRegisterClass;
- if (FloatABIType == FloatABI::Hard) {
- if (RegVT == MVT::f32)
- RC = ARM::SPRRegisterClass;
- else if (RegVT == MVT::f64)
- RC = ARM::DPRRegisterClass;
- } else if (RegVT == MVT::f64) {
- // f64 is passed in pairs of GPRs and must be combined.
+ SDValue ArgValue;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split up into multiple registers or
+ // combinations of registers and stack slots.
RegVT = MVT::i32;
- } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
- assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
- // Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+ if (VA.getLocVT() == MVT::v2f64) {
+ SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Root, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+ Root, DAG, dl);
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+ ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+ } else
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Root, DAG, dl);
- // f64 is passed in i32 pairs and must be combined.
- if (VA.needsCustom()) {
- SDValue ArgValue2;
+ } else {
+ TargetRegisterClass *RC;
+ if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32)
+ RC = ARM::SPRRegisterClass;
+ else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)
+ RC = ARM::DPRRegisterClass;
+ else if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
- VA = ArgLocs[++i]; // skip ahead to next loc
- if (VA.isMemLoc()) {
- // must be APCS to split like this
- unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
-
- // Create load node to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
- } else {
- Reg = MF.addLiveIn(VA.getLocReg(), RC);
- ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
- }
+ assert((RegVT == MVT::i32 || RegVT == MVT::f32 ||
+ (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) &&
+ "RegVT not supported by FORMAL_ARGUMENTS Lowering");
- ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64,
- ArgValue, ArgValue2);
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
}
// If this is an 8 or 16-bit value, it is really passed promoted
@@ -1638,8 +1883,78 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
}
-static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) {
- assert(N->getValueType(0) == MVT::i64 &&
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Zero vectors are used to represent vector negation and in those cases
+ // will be implemented with the NEON VNEG instruction. However, VNEG does
+ // not support i64 elements, so sometimes the zero vectors will need to be
+ // explicitly constructed. For those cases, and potentially other uses in
+ // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
+ SDValue Vec;
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ if (VT.getSizeInBits() == 64)
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ else
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDValue Vec;
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ if (VT.getSizeInBits() == 64)
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ else
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}