diff options
author | Justin Holewinski <jholewinski@nvidia.com> | 2012-11-14 19:19:16 +0000 |
---|---|---|
committer | Justin Holewinski <jholewinski@nvidia.com> | 2012-11-14 19:19:16 +0000 |
commit | a20067b5d4ea718a3f0e47adae8b231e4a2db9f6 (patch) | |
tree | 7e05fc49d5ffda33669f40ba064f76e9daa206d0 | |
parent | 2337dd7c862f0fcf902725c261cc42a915a85bfd (diff) |
[NVPTX] Implement custom lowering of loads/stores for i1
Loads from i1 become loads from i8 followed by trunc
Stores to i1 become zext to i8 followed by store to i8
Fixes PR13291
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167948 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.cpp | 60 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.h | 3 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/pr13291-i1-store.ll | 26 |
3 files changed, 87 insertions, 2 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index be771e3567..8430e21204 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -856,11 +857,66 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(Node); + DebugLoc dl = Node->getDebugLoc(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + assert(ExtType == ISD::NON_EXTLOAD) ; + EVT VT = Node->getValueType(0); + assert(VT == MVT::i1 && "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + EVT VT = Tmp3.getValueType(); + assert(VT == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6449..94a177ceb0 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll new file mode 100644 index 0000000000..779f7798d8 --- /dev/null +++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 + +define ptx_kernel void @t1(i1* %a) { +; PTX32: mov.u16 %rc{{[0-9]+}}, 0; +; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}; +; PTX64: mov.u16 %rc{{[0-9]+}}, 0; +; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}; + store i1 false, i1* %a + ret void +} + + +define ptx_kernel void @t2(i1* %a, i8* %b) { +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1; +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}] +; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1; +; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1; + + %t1 = load i1* %a + %t2 = select i1 %t1, i8 1, i8 2 + store i8 %t2, i8* %b + ret void +} |