aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PTX/PTX.td30
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp64
-rw-r--r--lib/Target/PTX/PTXISelDAGToDAG.cpp27
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp31
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp17
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td203
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp4
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.td131
-rw-r--r--lib/Target/PTX/PTXSubtarget.cpp27
-rw-r--r--lib/Target/PTX/PTXSubtarget.h27
10 files changed, 440 insertions, 121 deletions
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 8b1a1b18da..9f62aa16f8 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -19,8 +19,34 @@ include "llvm/Target/Target.td"
// Subtarget Features.
//===----------------------------------------------------------------------===//
-def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
- "Enable sm_20 target architecture">;
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
+ "Use PTX Language Version 1.4">;
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0",
+ [FeaturePTX14]>;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1",
+ [FeaturePTX20]>;
+
+//===- PTX Shader Model ---------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
+ "Enable Shader Model 1.0 compliance">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
+ "Enable Shader Model 1.3 compliance",
+ [FeatureSM10, FeatureDouble]>;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
+ "Enable Shader Model 2.0 compliance",
+ [FeatureSM13]>;
//===----------------------------------------------------------------------===//
// PTX supported processors.
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 25f26fa4c4..35eeadce2d 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
@@ -37,13 +38,6 @@
using namespace llvm;
-static cl::opt<std::string>
-OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
-
-static cl::opt<std::string>
-OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
- cl::init("sm_10"));
-
namespace {
class PTXAsmPrinter : public AsmPrinter {
public:
@@ -82,11 +76,14 @@ private:
static const char PARAM_PREFIX[] = "__param_";
static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
+#define TEST_REGCLS(cls, clsstr) \
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
- TEST_REGCLS(RRegf32, f32);
- TEST_REGCLS(RRegs32, s32);
TEST_REGCLS(Preds, pred);
+ TEST_REGCLS(RRegu16, u16);
+ TEST_REGCLS(RRegu32, u32);
+ TEST_REGCLS(RRegu64, u64);
+ TEST_REGCLS(RRegf32, f32);
+ TEST_REGCLS(RRegf64, f64);
#undef TEST_REGCLS
llvm_unreachable("Not in any register class!");
@@ -121,7 +118,14 @@ static const char *getTypeName(const Type* type) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
case Type::FloatTyID: return ".f32";
- case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
case Type::ArrayTyID:
case Type::PointerTyID:
type = dyn_cast<const SequentialType>(type)->getElementType();
@@ -162,8 +166,11 @@ bool PTXAsmPrinter::doFinalization(Module &M) {
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
- OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
- OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+ OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ (ST.supportsDouble() ? "" : ", map_f64_to_f32")));
OutStreamer.AddBlankLine();
// declare global variables
@@ -236,11 +243,24 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_FPImmediate:
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+ // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+ if (isFloat) {
+ OS << "0F";
+ }
+ else {
+ OS << "0D";
+ }
+ // Emit the encoded floating-point value.
if (constFP.getZExtValue() > 0) {
- OS << "0F" << constFP.toString(16, false);
+ OS << constFP.toString(16, false);
}
else {
- OS << "0F00000000";
+ OS << "00000000";
+ // If We have a double-precision zero, pad to 8-bytes.
+ if (!isFloat) {
+ OS << "00000000";
+ }
}
break;
}
@@ -338,12 +358,18 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
if (!MFI->argRegEmpty()) {
decl += " (";
if (isKernel) {
- for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
- if (i != 0)
+ unsigned cnt = 0;
+ //for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
+ for(PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+ reg = *i;
+ assert(reg != PTX::NoRegister && "Not a valid register!");
+ if (i != b)
decl += ", ";
- decl += ".param .s32 "; // TODO: add types
+ decl += ".param .u32"; // TODO: Parse type from register map
+ decl += " ";
decl += PARAM_PREFIX;
- decl += utostr(i + 1);
+ decl += utostr(++cnt);
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index efb0e8b1af..1e6a53fee9 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
#include "PTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -66,14 +67,34 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
}
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
- SDValue index = Node->getOperand(1);
- DebugLoc dl = Node->getDebugLoc();
+ SDValue index = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned opcode;
if (index.getOpcode() != ISD::TargetConstant)
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+ if (Node->getValueType(0) == MVT::i16) {
+ opcode = PTX::LDpiU16;
+ }
+ else if (Node->getValueType(0) == MVT::i32) {
+ opcode = PTX::LDpiU32;
+ }
+ else if (Node->getValueType(0) == MVT::i64) {
+ opcode = PTX::LDpiU64;
+ }
+ else if (Node->getValueType(0) == MVT::f32) {
+ opcode = PTX::LDpiF32;
+ }
+ else if (Node->getValueType(0) == MVT::f64) {
+ opcode = PTX::LDpiF64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type for ld.param");
+ }
+
return PTXInstrInfo::
- GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+ GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
}
// Match memory operand of the form [reg+reg]
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index d30c9ecbe4..147b2a82cf 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,13 +28,17 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
- addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
+ addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
-
+ addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -90,10 +95,13 @@ struct argmap_entry {
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
- argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
- argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
+ argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
+ argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
+ argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
+ argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
+ argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
};
-} // end anonymous namespace
+} // end anonymous namespace
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
@@ -192,12 +200,21 @@ SDValue PTXTargetLowering::
SDValue Flag;
unsigned reg;
- if (Outs[0].VT == MVT::i32) {
+ if (Outs[0].VT == MVT::i16) {
+ reg = PTX::RH0;
+ }
+ else if (Outs[0].VT == MVT::i32) {
reg = PTX::R0;
}
+ else if (Outs[0].VT == MVT::i64) {
+ reg = PTX::RD0;
+ }
else if (Outs[0].VT == MVT::f32) {
reg = PTX::F0;
}
+ else if (Outs[0].VT == MVT::f64) {
+ reg = PTX::FD0;
+ }
else {
assert(false && "Can return only basic types");
}
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index f2e5e4c110..7277238c2e 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -27,9 +27,12 @@ static const struct map_entry {
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
- { &PTX::RRegs32RegClass, PTX::MOVrr },
- { &PTX::RRegf32RegClass, PTX::MOVrr },
- { &PTX::PredsRegClass, PTX::MOVpp }
+ { &PTX::RRegu16RegClass, PTX::MOVU16rr },
+ { &PTX::RRegu32RegClass, PTX::MOVU32rr },
+ { &PTX::RRegu64RegClass, PTX::MOVU64rr },
+ { &PTX::RRegf32RegClass, PTX::MOVF32rr },
+ { &PTX::RRegf64RegClass, PTX::MOVF64rr },
+ { &PTX::PredsRegClass, PTX::MOVPREDrr }
};
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -76,8 +79,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
switch (MI.getOpcode()) {
default:
return false;
- case PTX::MOVpp:
- case PTX::MOVrr:
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
"Invalid register-register move instruction");
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 9d962b0e25..fce6da66ef 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -114,7 +114,7 @@ def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
// Address operands
def MEMri : Operand<i32> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RRegs32, i32imm);
+ let MIOperandInfo = (ops RRegu32, i32imm);
}
def MEMii : Operand<i32> {
let PrintMethod = "printMemOperand";
@@ -143,75 +143,115 @@ def PTXret
// Instruction Class Templates
//===----------------------------------------------------------------------===//
-// Three-operand f32 instruction template
+// Three-operand floating-point instruction template
multiclass FLOAT3<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegf32:$d),
- (ins RRegf32:$a, RRegf32:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
- def ri : InstPTX<(outs RRegf32:$d),
- (ins RRegf32:$a, f32imm:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+ def rr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
+ def ri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, f32imm:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+ def rr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
+ def ri64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
}
multiclass INT3<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
}
// no %type directive, non-communtable
multiclass INT3ntnc<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
+ def rr : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
- def ir : InstPTX<(outs RRegs32:$d),
- (ins i32imm:$a, RRegs32:$b),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def ir : InstPTX<(outs RRegu32:$d),
+ (ins i32imm:$a, RRegu32:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+ [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
}
-multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
def rr : InstPTX<(outs RC:$d),
(ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRrr:$a))]>;
def ri : InstPTX<(outs RC:$d),
(ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRri:$a))]>;
def ii : InstPTX<(outs RC:$d),
(ins MEMii:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRii:$a))]>;
}
-multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
def rr : InstPTX<(outs),
(ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRrr:$a)]>;
def ri : InstPTX<(outs),
(ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRri:$a)]>;
def ii : InstPTX<(outs),
(ins RC:$d, MEMii:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRii:$a)]>;
}
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -236,60 +276,67 @@ defm SRA : INT3ntnc<"shr.s32", PTXsra>;
///===- Data Movement and Conversion Instructions -------------------------===//
let neverHasSideEffects = 1 in {
- // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
- def MOVpp
+ def MOVPREDrr
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
- def MOVrr
- : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
- def FMOVrr
+ def MOVU16rr
+ : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def MOVpi
+ def MOVPREDri
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
[(set Preds:$d, imm:$a)]>;
- def MOVri
- : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
- [(set RRegs32:$d, imm:$a)]>;
- def FMOVri
+ def MOVU16ri
+ : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RRegu16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RRegu32:$d, imm:$a)]>;
+ def MOVU164ri
+ : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RRegu64:$d, imm:$a)]>;
+ def MOVF32ri
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
[(set RRegf32:$d, fpimm:$a)]>;
+ def MOVF64ri
+ : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RRegf64:$d, fpimm:$a)]>;
}
-// Integer loads
-defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
-defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
-defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
-defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
-defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
-
-def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
- "ld.param.%type\t$d, [$a]", []>;
-
-// Floating-point loads
-defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
-defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
-defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
-defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
-defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
-
-def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
- "ld.param.%type\t$d, [$a]", []>;
-
-// Integer stores
-defm STg : PTX_ST<"st.global", RRegs32, store_global>;
-defm STl : PTX_ST<"st.local", RRegs32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
-defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
-
-// Floating-point stores
-defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
-defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
-defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local", load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// This is a special instruction that is manually inserted for kernel parameters
+def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
+ "ld.param.u16\t$d, [$a]", []>;
+def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
+ "ld.param.u32\t$d, [$a]", []>;
+def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
+ "ld.param.u64\t$d, [$a]", []>;
+def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
+ "ld.param.f32\t$d, [$a]", []>;
+def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
+ "ld.param.f64\t$d, [$a]", []>;
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local", store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
///===- Control Flow Instructions -----------------------------------------===//
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b37c740006..c5e1910072 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Arg Reg: " << *i << "\n";);
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Local Var Reg: " << *i << "\n";);
return false;
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 9158f0d31c..548e3bbeb9 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> {
// Registers
//===----------------------------------------------------------------------===//
+///===- Predicate Registers -----------------------------------------------===//
+
def P0 : PTXReg<"p0">;
def P1 : PTXReg<"p1">;
def P2 : PTXReg<"p2">;
@@ -52,6 +54,43 @@ def P29 : PTXReg<"p29">;
def P30 : PTXReg<"p30">;
def P31 : PTXReg<"p31">;
+///===- 16-bit Integer Registers ------------------------------------------===//
+
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+
+///===- 32-bit Integer Registers ------------------------------------------===//
+
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
def R2 : PTXReg<"r2">;
@@ -85,6 +124,43 @@ def R29 : PTXReg<"r29">;
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
+///===- 64-bit Integer Registers ------------------------------------------===//
+
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+
+///===- 32-bit Floating-Point Registers -----------------------------------===//
+
def F0 : PTXReg<"f0">;
def F1 : PTXReg<"f1">;
def F2 : PTXReg<"f2">;
@@ -118,6 +194,41 @@ def F29 : PTXReg<"f29">;
def F30 : PTXReg<"f30">;
def F31 : PTXReg<"f31">;
+///===- 64-bit Floating-Point Registers -----------------------------------===//
+
+def FD0 : PTXReg<"fd0">;
+def FD1 : PTXReg<"fd1">;
+def FD2 : PTXReg<"fd2">;
+def FD3 : PTXReg<"fd3">;
+def FD4 : PTXReg<"fd4">;
+def FD5 : PTXReg<"fd5">;
+def FD6 : PTXReg<"fd6">;
+def FD7 : PTXReg<"fd7">;
+def FD8 : PTXReg<"fd8">;
+def FD9 : PTXReg<"fd9">;
+def FD10 : PTXReg<"fd10">;
+def FD11 : PTXReg<"fd11">;
+def FD12 : PTXReg<"fd12">;
+def FD13 : PTXReg<"fd13">;
+def FD14 : PTXReg<"fd14">;
+def FD15 : PTXReg<"fd15">;
+def FD16 : PTXReg<"fd16">;
+def FD17 : PTXReg<"fd17">;
+def FD18 : PTXReg<"fd18">;
+def FD19 : PTXReg<"fd19">;
+def FD20 : PTXReg<"fd20">;
+def FD21 : PTXReg<"fd21">;
+def FD22 : PTXReg<"fd22">;
+def FD23 : PTXReg<"fd23">;
+def FD24 : PTXReg<"fd24">;
+def FD25 : PTXReg<"fd25">;
+def FD26 : PTXReg<"fd26">;
+def FD27 : PTXReg<"fd27">;
+def FD28 : PTXReg<"fd28">;
+def FD29 : PTXReg<"fd29">;
+def FD30 : PTXReg<"fd30">;
+def FD31 : PTXReg<"fd31">;
+
//===----------------------------------------------------------------------===//
// Register classes
@@ -129,14 +240,32 @@ def Preds : RegisterClass<"PTX", [i1], 8,
P16, P17, P18, P19, P20, P21, P22, P23,
P24, P25, P26, P27, P28, P29, P30, P31]>;
-def RRegs32 : RegisterClass<"PTX", [i32], 32,
+def RRegu16 : RegisterClass<"PTX", [i16], 16,
+ [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
+ RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
+ RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
+ RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>;
+
+def RRegu32 : RegisterClass<"PTX", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
R16, R17, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, R29, R30, R31]>;
+def RRegu64 : RegisterClass<"PTX", [i64], 64,
+ [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
+ RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
+ RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
+ RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>;
+
def RRegf32 : RegisterClass<"PTX", [f32], 32,
[F0, F1, F2, F3, F4, F5, F6, F7,
F8, F9, F10, F11, F12, F13, F14, F15,
F16, F17, F18, F19, F20, F21, F22, F23,
F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def RRegf64 : RegisterClass<"PTX", [f64], 64,
+ [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
+ FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
+ FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
+ FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 00e2c882a5..18a93052c9 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -12,12 +12,33 @@
//===----------------------------------------------------------------------===//
#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
- std::string TARGET = "sm_20";
- // TODO: call ParseSubtargetFeatures(FS, TARGET);
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
+ : PTXShaderModel(PTX_SM_1_0),
+ PTXVersion(PTX_VERSION_1_4) {
+ std::string TARGET = "generic";
+ ParseSubtargetFeatures(FS, TARGET);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXShaderModel) {
+ default: llvm_unreachable("Unknown shader model");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ default: llvm_unreachable("Unknown PTX version");
+ case PTX_VERSION_1_4: return "1.4";
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ }
}
#include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 7fd85f873a..9a9ada2af6 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -19,11 +19,36 @@
namespace llvm {
class PTXSubtarget : public TargetSubtarget {
private:
- bool is_sm20;
+ enum PTXShaderModelEnum {
+ PTX_SM_1_0,
+ PTX_SM_1_3,
+ PTX_SM_2_0
+ };
+
+ enum PTXVersionEnum {
+ PTX_VERSION_1_4,
+ PTX_VERSION_2_0,
+ PTX_VERSION_2_1
+ };
+
+ /// Shader Model supported on the target GPU.
+ PTXShaderModelEnum PTXShaderModel;
+
+ /// PTX Language Version.
+ PTXVe