diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/PTX/PTX.td | 30 | ||||
-rw-r--r-- | lib/Target/PTX/PTXAsmPrinter.cpp | 64 | ||||
-rw-r--r-- | lib/Target/PTX/PTXISelDAGToDAG.cpp | 27 | ||||
-rw-r--r-- | lib/Target/PTX/PTXISelLowering.cpp | 31 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.cpp | 17 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 203 | ||||
-rw-r--r-- | lib/Target/PTX/PTXMFInfoExtract.cpp | 4 | ||||
-rw-r--r-- | lib/Target/PTX/PTXRegisterInfo.td | 131 | ||||
-rw-r--r-- | lib/Target/PTX/PTXSubtarget.cpp | 27 | ||||
-rw-r--r-- | lib/Target/PTX/PTXSubtarget.h | 27 |
10 files changed, 440 insertions, 121 deletions
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 8b1a1b18da..9f62aa16f8 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -19,8 +19,34 @@ include "llvm/Target/Target.td" // Subtarget Features. //===----------------------------------------------------------------------===// -def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true", - "Enable sm_20 target architecture">; +//===- Architectural Features ---------------------------------------------===// + +def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", + "Do not demote .f64 to .f32">; + +//===- PTX Version --------------------------------------------------------===// + +def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4", + "Use PTX Language Version 1.4">; + +def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", + "Use PTX Language Version 2.0", + [FeaturePTX14]>; + +def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", + "Use PTX Language Version 2.1", + [FeaturePTX20]>; + +//===- PTX Shader Model ---------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", + "Enable Shader Model 1.0 compliance">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", + "Enable Shader Model 1.3 compliance", + [FeatureSM10, FeatureDouble]>; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", + "Enable Shader Model 2.0 compliance", + [FeatureSM13]>; //===----------------------------------------------------------------------===// // PTX supported processors. diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 25f26fa4c4..35eeadce2d 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -37,13 +38,6 @@ using namespace llvm; -static cl::opt<std::string> -OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4")); - -static cl::opt<std::string> -OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"), - cl::init("sm_10")); - namespace { class PTXAsmPrinter : public AsmPrinter { public: @@ -82,11 +76,14 @@ private: static const char PARAM_PREFIX[] = "__param_"; static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ +#define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; - TEST_REGCLS(RRegf32, f32); - TEST_REGCLS(RRegs32, s32); TEST_REGCLS(Preds, pred); + TEST_REGCLS(RRegu16, u16); + TEST_REGCLS(RRegu32, u32); + TEST_REGCLS(RRegu64, u64); + TEST_REGCLS(RRegf32, f32); + TEST_REGCLS(RRegf64, f64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); @@ -121,7 +118,14 @@ static const char *getTypeName(const Type* type) { switch (type->getTypeID()) { default: llvm_unreachable("Unknown type"); case Type::FloatTyID: return ".f32"; - case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types. + case Type::DoubleTyID: return ".f64"; + case Type::IntegerTyID: + switch (type->getPrimitiveSizeInBits()) { + default: llvm_unreachable("Unknown integer bit-width"); + case 16: return ".u16"; + case 32: return ".u32"; + case 64: return ".u64"; + } case Type::ArrayTyID: case Type::PointerTyID: type = dyn_cast<const SequentialType>(type)->getElementType(); @@ -162,8 +166,11 @@ bool PTXAsmPrinter::doFinalization(Module &M) { void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { - OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion)); - OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget)); + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); + OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + + (ST.supportsDouble() ? "" : ", map_f64_to_f32"))); OutStreamer.AddBlankLine(); // declare global variables @@ -236,11 +243,24 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_FPImmediate: APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); + bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; + // Emit 0F for 32-bit floats and 0D for 64-bit doubles. + if (isFloat) { + OS << "0F"; + } + else { + OS << "0D"; + } + // Emit the encoded floating-point value. if (constFP.getZExtValue() > 0) { - OS << "0F" << constFP.toString(16, false); + OS << constFP.toString(16, false); } else { - OS << "0F00000000"; + OS << "00000000"; + // If We have a double-precision zero, pad to 8-bytes. + if (!isFloat) { + OS << "00000000"; + } } break; } @@ -338,12 +358,18 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (!MFI->argRegEmpty()) { decl += " ("; if (isKernel) { - for (int i = 0, e = MFI->getNumArg(); i != e; ++i) { - if (i != 0) + unsigned cnt = 0; + //for (int i = 0, e = MFI->getNumArg(); i != e; ++i) { + for(PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) { + reg = *i; + assert(reg != PTX::NoRegister && "Not a valid register!"); + if (i != b) decl += ", "; - decl += ".param .s32 "; // TODO: add types + decl += ".param .u32"; // TODO: Parse type from register map + decl += " "; decl += PARAM_PREFIX; - decl += utostr(i + 1); + decl += utostr(++cnt); } } else { for (PTXMachineFunctionInfo::reg_iterator diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index efb0e8b1af..1e6a53fee9 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -66,14 +67,34 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { } SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDValue index = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + unsigned opcode; if (index.getOpcode() != ISD::TargetConstant) llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); + if (Node->getValueType(0) == MVT::i16) { + opcode = PTX::LDpiU16; + } + else if (Node->getValueType(0) == MVT::i32) { + opcode = PTX::LDpiU32; + } + else if (Node->getValueType(0) == MVT::i64) { + opcode = PTX::LDpiU64; + } + else if (Node->getValueType(0) == MVT::f32) { + opcode = PTX::LDpiF32; + } + else if (Node->getValueType(0) == MVT::f64) { + opcode = PTX::LDpiF64; + } + else { + llvm_unreachable("Unknown parameter type for ld.param"); + } + return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index); + GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); } // Match memory operand of the form [reg+reg] diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index d30c9ecbe4..147b2a82cf 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,13 +28,17 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. addRegisterClass(MVT::i1, PTX::PredsRegisterClass); - addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); + addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass); + addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass); + addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass); addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); - + addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + // Customize translation of memory addresses setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); @@ -90,10 +95,13 @@ struct argmap_entry { bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } } argmap[] = { argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i32, PTX::RRegs32RegisterClass), - argmap_entry(MVT::f32, PTX::RRegf32RegisterClass) + argmap_entry(MVT::i16, PTX::RRegu16RegisterClass), + argmap_entry(MVT::i32, PTX::RRegu32RegisterClass), + argmap_entry(MVT::i64, PTX::RRegu64RegisterClass), + argmap_entry(MVT::f32, PTX::RRegf32RegisterClass), + argmap_entry(MVT::f64, PTX::RRegf64RegisterClass) }; -} // end anonymous namespace +} // end anonymous namespace SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, @@ -192,12 +200,21 @@ SDValue PTXTargetLowering:: SDValue Flag; unsigned reg; - if (Outs[0].VT == MVT::i32) { + if (Outs[0].VT == MVT::i16) { + reg = PTX::RH0; + } + else if (Outs[0].VT == MVT::i32) { reg = PTX::R0; } + else if (Outs[0].VT == MVT::i64) { + reg = PTX::RD0; + } else if (Outs[0].VT == MVT::f32) { reg = PTX::F0; } + else if (Outs[0].VT == MVT::f64) { + reg = PTX::FD0; + } else { assert(false && "Can return only basic types"); } diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index f2e5e4c110..7277238c2e 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -27,9 +27,12 @@ static const struct map_entry { const TargetRegisterClass *cls; const int opcode; } map[] = { - { &PTX::RRegs32RegClass, PTX::MOVrr }, - { &PTX::RRegf32RegClass, PTX::MOVrr }, - { &PTX::PredsRegClass, PTX::MOVpp } + { &PTX::RRegu16RegClass, PTX::MOVU16rr }, + { &PTX::RRegu32RegClass, PTX::MOVU32rr }, + { &PTX::RRegu64RegClass, PTX::MOVU64rr }, + { &PTX::RRegf32RegClass, PTX::MOVF32rr }, + { &PTX::RRegf64RegClass, PTX::MOVF64rr }, + { &PTX::PredsRegClass, PTX::MOVPREDrr } }; void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -76,8 +79,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, switch (MI.getOpcode()) { default: return false; - case PTX::MOVpp: - case PTX::MOVrr: + case PTX::MOVU16rr: + case PTX::MOVU32rr: + case PTX::MOVU64rr: + case PTX::MOVF32rr: + case PTX::MOVF64rr: + case PTX::MOVPREDrr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "Invalid register-register move instruction"); diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 9d962b0e25..fce6da66ef 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -114,7 +114,7 @@ def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>; // Address operands def MEMri : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegs32, i32imm); + let MIOperandInfo = (ops RRegu32, i32imm); } def MEMii : Operand<i32> { let PrintMethod = "printMemOperand"; @@ -143,75 +143,115 @@ def PTXret // Instruction Class Templates //===----------------------------------------------------------------------===// -// Three-operand f32 instruction template +// Three-operand floating-point instruction template multiclass FLOAT3<string opcstr, SDNode opnode> { - def rr : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; - def ri : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, f32imm:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; + def rr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; + def ri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>; + def ri64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>; } multiclass INT3<string opcstr, SDNode opnode> { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; } // no %type directive, non-communtable multiclass INT3ntnc<string opcstr, SDNode opnode> { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), + def rr : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; - def ir : InstPTX<(outs RRegs32:$d), - (ins i32imm:$a, RRegs32:$b), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def ir : InstPTX<(outs RRegu32:$d), + (ins i32imm:$a, RRegu32:$b), !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>; + [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; } -multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> { +multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { def rr : InstPTX<(outs RC:$d), (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), [(set RC:$d, (pat_load ADDRrr:$a))]>; def ri : InstPTX<(outs RC:$d), (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), [(set RC:$d, (pat_load ADDRri:$a))]>; def ii : InstPTX<(outs RC:$d), (ins MEMii:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), [(set RC:$d, (pat_load ADDRii:$a))]>; } -multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { def rr : InstPTX<(outs), (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), [(pat_store RC:$d, ADDRrr:$a)]>; def ri : InstPTX<(outs), (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), [(pat_store RC:$d, ADDRri:$a)]>; def ii : InstPTX<(outs), (ins RC:$d, MEMii:$a), - !strconcat(opstr, ".%type\t[$a], $d"), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), [(pat_store RC:$d, ADDRii:$a)]>; } +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -236,60 +276,67 @@ defm SRA : INT3ntnc<"shr.s32", PTXsra>; ///===- Data Movement and Conversion Instructions -------------------------===// let neverHasSideEffects = 1 in { - // rely on isMoveInstr to separate MOVpp, MOVrr, etc. - def MOVpp + def MOVPREDrr : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; - def MOVrr - : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>; - def FMOVrr + def MOVU16rr + : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>; + def MOVU32rr + : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>; + def MOVU64rr + : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>; + def MOVF32rr : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; + def MOVF64rr + : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVpi + def MOVPREDri : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", [(set Preds:$d, imm:$a)]>; - def MOVri - : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", - [(set RRegs32:$d, imm:$a)]>; - def FMOVri + def MOVU16ri + : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RRegu16:$d, imm:$a)]>; + def MOVU32ri + : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RRegu32:$d, imm:$a)]>; + def MOVU164ri + : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RRegu64:$d, imm:$a)]>; + def MOVF32ri : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", [(set RRegf32:$d, fpimm:$a)]>; + def MOVF64ri + : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RRegf64:$d, fpimm:$a)]>; } -// Integer loads -defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; -defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; -defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; -defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; -defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; - -def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a), - "ld.param.%type\t$d, [$a]", []>; - -// Floating-point loads -defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>; -defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>; -defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>; -defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>; -defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>; - -def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), - "ld.param.%type\t$d, [$a]", []>; - -// Integer stores -defm STg : PTX_ST<"st.global", RRegs32, store_global>; -defm STl : PTX_ST<"st.local", RRegs32, store_local>; -// Store to parameter state space requires PTX 2.0 or higher? -// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; -defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; - -// Floating-point stores -defm FSTg : PTX_ST<"st.global", RRegf32, store_global>; -defm FSTl : PTX_ST<"st.local", RRegf32, store_local>; -// Store to parameter state space requires PTX 2.0 or higher? -// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>; -defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>; +// Loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDl : PTX_LD_ALL<"ld.local", load_local>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// This is a special instruction that is manually inserted for kernel parameters +def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", []>; +def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", []>; +def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", []>; +def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", []>; +def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", []>; + +// Stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STl : PTX_ST_ALL<"st.local", store_local>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// defm STp : PTX_ST_ALL<"st.param", store_parameter>; +// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; +// TODO: Do something with st.param if/when it is needed. ///===- Control Flow Instructions -----------------------------------------===// diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index b37c740006..c5e1910072 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Arg Reg: " << *i << "\n";); DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Local Var Reg: " << *i << "\n";); return false; diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 9158f0d31c..548e3bbeb9 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> { // Registers //===----------------------------------------------------------------------===// +///===- Predicate Registers -----------------------------------------------===// + def P0 : PTXReg<"p0">; def P1 : PTXReg<"p1">; def P2 : PTXReg<"p2">; @@ -52,6 +54,43 @@ def P29 : PTXReg<"p29">; def P30 : PTXReg<"p30">; def P31 : PTXReg<"p31">; +///===- 16-bit Integer Registers ------------------------------------------===// + +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; + +///===- 32-bit Integer Registers ------------------------------------------===// + def R0 : PTXReg<"r0">; def R1 : PTXReg<"r1">; def R2 : PTXReg<"r2">; @@ -85,6 +124,43 @@ def R29 : PTXReg<"r29">; def R30 : PTXReg<"r30">; def R31 : PTXReg<"r31">; +///===- 64-bit Integer Registers ------------------------------------------===// + +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; + +///===- 32-bit Floating-Point Registers -----------------------------------===// + def F0 : PTXReg<"f0">; def F1 : PTXReg<"f1">; def F2 : PTXReg<"f2">; @@ -118,6 +194,41 @@ def F29 : PTXReg<"f29">; def F30 : PTXReg<"f30">; def F31 : PTXReg<"f31">; +///===- 64-bit Floating-Point Registers -----------------------------------===// + +def FD0 : PTXReg<"fd0">; +def FD1 : PTXReg<"fd1">; +def FD2 : PTXReg<"fd2">; +def FD3 : PTXReg<"fd3">; +def FD4 : PTXReg<"fd4">; +def FD5 : PTXReg<"fd5">; +def FD6 : PTXReg<"fd6">; +def FD7 : PTXReg<"fd7">; +def FD8 : PTXReg<"fd8">; +def FD9 : PTXReg<"fd9">; +def FD10 : PTXReg<"fd10">; +def FD11 : PTXReg<"fd11">; +def FD12 : PTXReg<"fd12">; +def FD13 : PTXReg<"fd13">; +def FD14 : PTXReg<"fd14">; +def FD15 : PTXReg<"fd15">; +def FD16 : PTXReg<"fd16">; +def FD17 : PTXReg<"fd17">; +def FD18 : PTXReg<"fd18">; +def FD19 : PTXReg<"fd19">; +def FD20 : PTXReg<"fd20">; +def FD21 : PTXReg<"fd21">; +def FD22 : PTXReg<"fd22">; +def FD23 : PTXReg<"fd23">; +def FD24 : PTXReg<"fd24">; +def FD25 : PTXReg<"fd25">; +def FD26 : PTXReg<"fd26">; +def FD27 : PTXReg<"fd27">; +def FD28 : PTXReg<"fd28">; +def FD29 : PTXReg<"fd29">; +def FD30 : PTXReg<"fd30">; +def FD31 : PTXReg<"fd31">; + //===----------------------------------------------------------------------===// // Register classes @@ -129,14 +240,32 @@ def Preds : RegisterClass<"PTX", [i1], 8, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31]>; -def RRegs32 : RegisterClass<"PTX", [i32], 32, +def RRegu16 : RegisterClass<"PTX", [i16], 16, + [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, + RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15, + RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, + RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>; + +def RRegu32 : RegisterClass<"PTX", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31]>; +def RRegu64 : RegisterClass<"PTX", [i64], 64, + [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, + RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15, + RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, + RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>; + def RRegf32 : RegisterClass<"PTX", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + +def RRegf64 : RegisterClass<"PTX", [f64], 64, + [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7, + FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15, + FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23, + FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index 00e2c882a5..18a93052c9 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -12,12 +12,33 @@ //===----------------------------------------------------------------------===// #include "PTXSubtarget.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) { - std::string TARGET = "sm_20"; - // TODO: call ParseSubtargetFeatures(FS, TARGET); +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) + : PTXShaderModel(PTX_SM_1_0), + PTXVersion(PTX_VERSION_1_4) { + std::string TARGET = "generic"; + ParseSubtargetFeatures(FS, TARGET); +} + +std::string PTXSubtarget::getTargetString() const { + switch(PTXShaderModel) { + default: llvm_unreachable("Unknown shader model"); + case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_3: return "sm_13"; + case PTX_SM_2_0: return "sm_20"; + } +} + +std::string PTXSubtarget::getPTXVersionString() const { + switch(PTXVersion) { + default: llvm_unreachable("Unknown PTX version"); + case PTX_VERSION_1_4: return "1.4"; + case PTX_VERSION_2_0: return "2.0"; + case PTX_VERSION_2_1: return "2.1"; + } } #include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 7fd85f873a..9a9ada2af6 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -19,11 +19,36 @@ namespace llvm { class PTXSubtarget : public TargetSubtarget { private: - bool is_sm20; + enum PTXShaderModelEnum { + PTX_SM_1_0, + PTX_SM_1_3, + PTX_SM_2_0 + }; + + enum PTXVersionEnum { + PTX_VERSION_1_4, + PTX_VERSION_2_0, + PTX_VERSION_2_1 + }; + + /// Shader Model supported on the target GPU. + PTXShaderModelEnum PTXShaderModel; + + /// PTX Language Version. + PTXVe |