aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-02-12 14:18:49 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-02-12 14:18:49 +0000
commit7eacad03efda36e09ebd96e95d7891cadaaa9087 (patch)
treec66658286eca956701f8334550a8edefe236b468
parentc8a196ae8fad3cba7a777e2e7916fd36ebf70fe6 (diff)
[NVPTX] Disable vector registers
Vectors were being manually scalarized by the backend. Instead, let the target-independent code do all of the work. The manual scalarization was from a time before good target-independent support for scalarization in LLVM. However, this forces us to specially-handle vector loads and stores, which we can turn into PTX instructions that produce/consume multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/IntrinsicsNVVM.td10
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/NVPTX.h1
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp46
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp809
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h7
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp458
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h21
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td96
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td145
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp156
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td44
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp1
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp1239
-rw-r--r--lib/Target/NVPTX/gen-register-defs.py202
-rw-r--r--test/CodeGen/NVPTX/vector-loads.ll66
19 files changed, 1350 insertions, 1997 deletions
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index 6b853001e7..ebfd03e484 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -805,6 +805,16 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldu.global.p">;
+// Generated within nvvm. Use for ldg on sm_35 or later
+def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldg.global.i">;
+def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldg.global.f">;
+def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
+ [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+ "llvm.nvvm.ldg.global.p">;
// Use for generic pointers
// - These intrinsics are used to convert address spaces.
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7cb16b4dd8..47baef6696 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
- VectorElementize.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 097b50aa4e..b46ea881c4 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
-FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3f99d1d0e4..0115e1f5d3 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
O << getNVPTXRegClassStr(RC) << mapped_vr;
return;
}
- // Vector virtual register
- if (getNVPTXVectorSize(RC) == 4)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
- << "}";
- else if (getNVPTXVectorSize(RC) == 2)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
- << "}";
- else
- llvm_unreachable("Unsupported vector size");
+ report_fatal_error("Bad register!");
}
void
@@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
- case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
- case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
- case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
- case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
- case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
- case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
- case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
- case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
- case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
- case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
- case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
- case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
- case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
- case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
- case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
- case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
- case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
- case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
- case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
- case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
@@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
- case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
- case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
- case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
- case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
- case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
- case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
- case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
- case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
- case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
- case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 36ab7f5c15..481f13afd1 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
ResNode = SelectStore(N);
break;
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4:
+ ResNode = SelectLoadVector(N);
+ break;
+ case NVPTXISD::LDGV2:
+ case NVPTXISD::LDGV4:
+ case NVPTXISD::LDUV2:
+ case NVPTXISD::LDUV4:
+ ResNode = SelectLDGLDUVector(N);
+ break;
+ case NVPTXISD::StoreV2:
+ case NVPTXISD::StoreV4:
+ ResNode = SelectStoreVector(N);
+ break;
+ default: break;
}
if (ResNode)
return ResNode;
@@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N1.getNode(), N1, Base, Offset):
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
MVT::Other, Ops, 8);
}
else {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return NVPTXLD;
}
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT LoadedVT = MemSD->getMemoryVT();
+
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int FromType;
+ // The last operand holds the original LoadSDNode::getExtensionType() value
+ unsigned ExtensionType =
+ cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+ if (ExtensionType == ISD::SEXTLOAD)
+ FromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ FromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
+ case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
+ default: return NULL;
+ }
+
+ EVT EltVT = N->getValueType(0);
+
+ if (SelectDirectAddr(Op1, Addr)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ }
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+
+ EVT RetVT = N->getValueType(0);
+
+ // Select opcode
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
- ca