aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp11
-rw-r--r--lib/Target/PTX/PTXISelDAGToDAG.cpp85
-rw-r--r--lib/Target/PTX/PTXInstrLoadStore.td27
-rw-r--r--test/CodeGen/PTX/stack-object.ll4
4 files changed, 77 insertions, 50 deletions
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 260ed231b1..c09a762678 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -299,10 +299,12 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
if (FrameInfo->getObjectSize(i) > 0) {
std::string def = "\t.local .align ";
def += utostr(FrameInfo->getObjectAlignment(i));
- def += " .b";
- def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+ def += " .b8";
def += " __local";
def += utostr(i);
+ def += "[";
+ def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
+ def += "]";
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
@@ -465,6 +467,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum,
raw_ostream &OS, const char *Modifier) {
OS << "__local" << MI->getOperand(opNum).getImm();
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() != 0){
+ OS << "+";
+ printOperand(MI, opNum+1, OS);
+ }
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index 383ba44f3f..d99d49075d 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -213,14 +213,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
-
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
@@ -242,7 +282,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
}
// neither [reg+imm] nor [imm+reg]
- return false;
+ return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
@@ -269,35 +309,30 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
- // let SelectADDRii handle the [imm] case
- if (isImm(Addr))
- return false;
- // it is [reg]
-
- assert(Addr.getValueType().isSimple() && "Type must be simple");
-
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
-
+ //errs() << "Success\n";
return true;
}
- if (Addr.getNumOperands() < 2)
- return false;
-
- // let SelectADDRii handle the [imm+imm] case
- if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
- return false;
-
- // try [reg+imm] and [imm+reg]
- for (int i = 0; i < 2; i ++)
- if (SelectImm(Addr.getOperand(1-i), Offset)) {
- Base = Addr.getOperand(i);
- return true;
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
}
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
- // neither [reg+imm] nor [imm+reg]
+ //errs() << "Failure\n";
return false;
}
diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td
index 83aafd81ef..bb84bb5696 100644
--- a/lib/Target/PTX/PTXInstrLoadStore.td
+++ b/lib/Target/PTX/PTXInstrLoadStore.td
@@ -24,9 +24,7 @@ def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
- const SDValue &MemOp = N->getOperand(1);
- if ((MemOp.getOpcode() != ISD::FrameIndex) &&
- (Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTX::GLOBAL;
return false;
@@ -41,11 +39,6 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return false;
}]>;
-def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const SDValue &MemOp = N->getOperand(1);
- return MemOp.getOpcode() == ISD::FrameIndex;
-}]>;
-
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
@@ -59,20 +52,12 @@ def store_global
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
const PointerType *PT;
- const SDValue &MemOp = N->getOperand(2);
- if ((MemOp.getOpcode() != ISD::FrameIndex) &&
- (Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTX::GLOBAL;
return false;
}]>;
-def store_local
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const SDValue &MemOp = N->getOperand(2);
- return MemOp.getOpcode() == ISD::FrameIndex;
-}]>;
-
def store_shared
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
@@ -221,16 +206,16 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (load_local ADDRlocal32:$a))]>;
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (load_local ADDRlocal64:$a))]>;
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
- [(store_local RC:$d, ADDRlocal32:$a)]>;
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
- [(store_local RC:$d, ADDRlocal64:$a)]>;
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
}
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll
index aab7f51f1f..65f8ee2300 100644
--- a/test/CodeGen/PTX/stack-object.ll
+++ b/test/CodeGen/PTX/stack-object.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
define ptx_device float @stack1(float %a) {
- ; CHECK: .local .align 4 .b32 __local0;
+ ; CHECK: .local .align 4 .b8 __local0[4];
%a.2 = alloca float, align 4
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2
@@ -10,7 +10,7 @@ define ptx_device float @stack1(float %a) {
}
define ptx_device float @stack1_align8(float %a) {
- ; CHECK: .local .align 8 .b32 __local0;
+ ; CHECK: .local .align 8 .b8 __local0[4];
%a.2 = alloca float, align 8
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2