- Remove Tilmann's custom truncate lowering: it completely hosed over

DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
author: Scott Michel <scottm@aero.org> 2008-12-27 04:51:36 +0000
committer: Scott Michel <scottm@aero.org> 2008-12-27 04:51:36 +0000
commit: f0569be4a948c7ed816bfa2b8774a5a18458ee23 (patch)
tree: 541905fcbd5e64ef95599b1ca3c4182adc972688 /lib
parent: 1323e8bf6a7bec163c5d43006f5b3b78042cef61 (diff)
12 files changed, 1148 insertions, 628 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 589a260005..98aa084d50 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -117,7 +117,7 @@ namespace {
     }
  
     void
-    printMemRegImmS7(const MachineInstr *MI, unsigned OpNo)
+    printShufAddr(const MachineInstr *MI, unsigned OpNo)
     {
       char value = MI->getOperand(OpNo).getImm();
       O << (int) value;
@@ -183,16 +183,16 @@ namespace {
     }
 
     void
-    printMemRegImmS10(const MachineInstr *MI, unsigned OpNo)
+    printDFormAddr(const MachineInstr *MI, unsigned OpNo)
     {
       const MachineOperand &MO = MI->getOperand(OpNo);
       assert(MO.isImm() &&
-             "printMemRegImmS10 first operand is not immedate");
+             "printDFormAddr first operand is not immedate");
       int64_t value = int64_t(MI->getOperand(OpNo).getImm());
       int16_t value16 = int16_t(value);
       assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
              && "Invalid dform s10 offset argument");
-      O << value16 << "(";
+      O << (value16 & ~0xf) << "(";
       printOperand(MI, OpNo+1);
       O << ")";
     }
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 0000000000..6d679bac72
--- /dev/null
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,77 @@
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+//    constant.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+//    could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+//    mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
+//    (TODO)
+//
+// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+   SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+            [/* no pattern */]>;
+
+class CodeFrag<dag frag> {
+  dag Fragment = frag;
+}
+
+class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
+  Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+      (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
+
+class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
+  Pat<(cond R64C:$rA, R64C:$rB),
+      (XORIr32 cmpare.Fragment, -1)>;
+
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
+                                           (ORv2i64_i64 R64C:$rB))),
+                        0x0000000c)>;
+
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)),
+                        0x0000000f)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
+  def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
+  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+          I64EQv2i64.Fragment>;
+
+def I64Select:
+    Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
+        (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+
+def : I64SETCCNegCond<setne, I64EQr64>;
+
+def : I64SELECTNegCond<setne, I64EQr64>;
+\ No newline at end of file
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9ac0e2e256..f51aba2fda 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -165,24 +165,23 @@ namespace {
     MVT VT;
     unsigned ldresult_ins;      /// LDRESULT instruction (0 = undefined)
     bool ldresult_imm;          /// LDRESULT instruction requires immediate?
-    int prefslot_byte;          /// Byte offset of the "preferred" slot
+    unsigned lrinst;            /// LR instruction
   };
 
   const valtype_map_s valtype_map[] = {
-    { MVT::i1,    0,            false, 3 },
-    { MVT::i8,    SPU::ORBIr8,  true,  3 },
-    { MVT::i16,   SPU::ORHIr16, true,  2 },
-    { MVT::i32,   SPU::ORIr32,  true,  0 },
-    { MVT::i64,   SPU::ORr64,   false, 0 },
-    { MVT::f32,   SPU::ORf32,   false, 0 },
-    { MVT::f64,   SPU::ORf64,   false, 0 },
+    { MVT::i8,    SPU::ORBIr8,  true,  SPU::LRr8 },
+    { MVT::i16,   SPU::ORHIr16, true,  SPU::LRr16 },
+    { MVT::i32,   SPU::ORIr32,  true,  SPU::LRr32 },
+    { MVT::i64,   SPU::ORr64,   false, SPU::LRr64 },
+    { MVT::f32,   SPU::ORf32,   false, SPU::LRf32 },
+    { MVT::f64,   SPU::ORf64,   false, SPU::LRf64 },
     // vector types... (sigh!)
-    { MVT::v16i8, 0,            false, 0 },
-    { MVT::v8i16, 0,            false, 0 },
-    { MVT::v4i32, 0,            false, 0 },
-    { MVT::v2i64, 0,            false, 0 },
-    { MVT::v4f32, 0,            false, 0 },
-    { MVT::v2f64, 0,            false, 0 }
+    { MVT::v16i8, 0,            false, SPU::LRv16i8 },
+    { MVT::v8i16, 0,            false, SPU::LRv8i16 },
+    { MVT::v4i32, 0,            false, SPU::LRv4i32 },
+    { MVT::v2i64, 0,            false, SPU::LRv2i64 },
+    { MVT::v4f32, 0,            false, SPU::LRv4f32 },
+    { MVT::v2f64, 0,            false, SPU::LRv2f64 }
   };
 
   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
@@ -686,31 +685,32 @@ SPUDAGToDAGISel::Select(SDValue Op) {
       Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
     }
 
-    Chain = SDValue(Result, 1);
-
     return Result;
   } else if (Opc == SPUISD::IndirectAddr) {
-    SDValue Op0 = Op.getOperand(0);
-    if (Op0.getOpcode() == SPUISD::LDRESULT) {
-        /* || Op0.getOpcode() == SPUISD::AFormAddr) */
-      // (IndirectAddr (LDRESULT, imm))
-      SDValue Op1 = Op.getOperand(1);
-      MVT VT = Op.getValueType();
-
-      DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = ");
-      DEBUG(Op.getOperand(0).getNode()->dump(CurDAG));
-      DEBUG(cerr << "\nOp1 = ");
-      DEBUG(Op.getOperand(1).getNode()->dump(CurDAG));
-      DEBUG(cerr << "\n");
-
+    // Look at the operands: SelectCode() will catch the cases that aren't
+    // specifically handled here.
+    //
+    // SPUInstrInfo catches the following patterns:
+    // (SPUindirect (SPUhi ...), (SPUlo ...))
+    // (SPUindirect $sp, imm)
+    MVT VT = Op.getValueType();
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    RegisterSDNode *RN;
+
+    if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+        || (Op0.getOpcode() == ISD::Register
+            && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+                && RN->getReg() != SPU::R1))) {
+      NewOpc = SPU::Ar32;
       if (Op1.getOpcode() == ISD::Constant) {
         ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-        Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT);
+        Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
         NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
-        Ops[0] = Op0;
-        Ops[1] = Op1;
-        n_ops = 2;
       }
+      Ops[0] = Op0;
+      Ops[1] = Op1;
+      n_ops = 2;
     }
   }
   
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index c3c31e0f47..e975d0d039 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -77,37 +78,6 @@ namespace {
 
     return retval;
   }
-
-  //! Predicate that returns true if operand is a memory target
-  /*!
-    \arg Op Operand to test
-    \return true if the operand is a memory target (i.e., global
-    address, external symbol, constant pool) or an A-form
-    address.
-   */
-  bool isMemoryOperand(const SDValue &Op)
-  {
-    const unsigned Opc = Op.getOpcode();
-    return (Opc == ISD::GlobalAddress
-            || Opc == ISD::GlobalTLSAddress
-            || Opc == ISD::JumpTable
-            || Opc == ISD::ConstantPool
-            || Opc == ISD::ExternalSymbol
-            || Opc == ISD::TargetGlobalAddress
-            || Opc == ISD::TargetGlobalTLSAddress
-            || Opc == ISD::TargetJumpTable
-            || Opc == ISD::TargetConstantPool
-            || Opc == ISD::TargetExternalSymbol
-            || Opc == SPUISD::AFormAddr);
-  }
-
-  //! Predicate that returns true if the operand is an indirect target
-  bool isIndirectOperand(const SDValue &Op)
-  {
-    const unsigned Opc = Op.getOpcode();
-    return (Opc == ISD::Register
-            || Opc == SPUISD::LDRESULT);
-  }
 }
 
 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -135,20 +105,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
-  setTruncStoreAction(MVT::i8,    MVT::i8, Custom);
-  setTruncStoreAction(MVT::i16,   MVT::i8, Custom);
-  setTruncStoreAction(MVT::i32,   MVT::i8, Custom);
-  setTruncStoreAction(MVT::i64,   MVT::i8, Custom);
-  setTruncStoreAction(MVT::i128,  MVT::i8, Custom);
-
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
-
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Custom);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 
   // SPU constant load actions are custom lowered:
   setOperationAction(ISD::Constant,   MVT::i64, Custom);
@@ -160,11 +118,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
        ++sctype) {
     MVT VT = (MVT::SimpleValueType)sctype;
 
-    setOperationAction(ISD::LOAD, VT, Custom);
-    setOperationAction(ISD::STORE, VT, Custom);
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+      MVT StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
+  }
+
+  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+       ++sctype) {
+    MVT VT = (MVT::SimpleValueType) sctype;
+
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+      MVT StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
   }
 
-  // Custom lower BRCOND for i8 to "promote" the result to i16
+  // Custom lower BRCOND for i8 to "promote" the result to whatever the result
+  // operand happens to be:
   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 
   // Expand the jumptable branches
@@ -176,14 +156,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
-#if 0
   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
-#endif
 
   // SPU has no intrinsics for these particular operations:
   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 
-  // PowerPC has no SREM/UREM instructions
+  // SPU has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
   setOperationAction(ISD::SREM, MVT::i64, Expand);
@@ -232,14 +210,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
 
-  // SMUL_LOHI, UMUL_LOHI
-#if 0
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-#endif
-
   // Need to custom handle (some) common i8, i64 math ops
   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
@@ -265,12 +235,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
-  setOperationAction(ISD::SELECT, MVT::i64,  Expand);
+  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 
   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i64,   Expand);
+  setOperationAction(ISD::SETCC, MVT::i32,   Custom);
+  setOperationAction(ISD::SETCC, MVT::i64,   Custom);
 
   // Zero extension and sign extension for i64 have to be
   // custom legalized
@@ -278,10 +248,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 
-  // Custom lower truncates
-  setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
-  setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
-  setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
+  // Custom lower i128 -> i64 truncates
   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 
   // SPU has a legal FP -> signed INT instruction
@@ -292,7 +259,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 
   // FDIV on SPU requires custom lowering
   setOperationAction(ISD::FDIV, MVT::f32, Custom);
-  //setOperationAction(ISD::FDIV, MVT::f64, Custom);
+  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall
 
   // SPU has [U|S]INT_TO_FP
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
@@ -402,7 +369,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 
   setShiftAmountType(MVT::i32);
-  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
 
   setStackPointerRegisterToSaveRestore(SPU::R1);
 
@@ -435,7 +402,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
-    node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
+    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
@@ -471,9 +438,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
   return ((i != node_names.end()) ? i->second : 0);
 }
 
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
   MVT VT = Op.getValueType();
-  return (VT.isInteger() ? VT : MVT(MVT::i32));
+  // i16 and i32 are valid SETCC result types
+  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
 }
 
 //===----------------------------------------------------------------------===//
@@ -486,105 +458,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 //  LowerOperation implementation
 //===----------------------------------------------------------------------===//
 
-/// Aligned load common code for CellSPU
-/*!
-  \param[in] Op The SelectionDAG load or store operand
-  \param[in] DAG The selection DAG
-  \param[in] ST CellSPU subtarget information structure
-  \param[in,out] alignment Caller initializes this to the load or store node's
-  value from getAlignment(), may be updated while generating the aligned load
-  \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
-  offset (divisible by 16, modulo 16 == 0)
-  \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
-  offset of the preferred slot (modulo 16 != 0)
-  \param[in,out] VT Caller initializes this value type to the the load or store
-  node's loaded or stored value type; may be updated if an i1-extended load or
-  store.
-  \param[out] was16aligned true if the base pointer had 16-byte alignment,
-  otherwise false. Can help to determine if the chunk needs to be rotated.
-
- Both load and store lowering load a block of data aligned on a 16-byte
- boundary. This is the common aligned load code shared between both.
- */
-static SDValue
-AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
-            LSBaseSDNode *LSN,
-            unsigned &alignment, int &alignOffs, int &prefSlotOffs,
-            MVT &VT, bool &was16aligned)
-{
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-  SDValue basePtr = LSN->getBasePtr();
-  SDValue chain = LSN->getChain();
-
-  if (basePtr.getOpcode() == ISD::ADD) {
-    SDValue Op1 = basePtr.getNode()->getOperand(1);
-
-    if (Op1.getOpcode() == ISD::Constant
-        || Op1.getOpcode() == ISD::TargetConstant) {
-      const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
-
-      alignOffs = (int) CN->getZExtValue();
-      prefSlotOffs = (int) (alignOffs & 0xf);
-
-      // Adjust the rotation amount to ensure that the final result ends up in
-      // the preferred slot:
-      prefSlotOffs -= vtm->prefslot_byte;
-      basePtr = basePtr.getOperand(0);
-
-      // Loading from memory, can we adjust alignment?
-      if (basePtr.getOpcode() == SPUISD::AFormAddr) {
-        SDValue APtr = basePtr.getOperand(0);
-        if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
-          GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
-          alignment = GSDN->getGlobal()->getAlignment();
-        }
-      }
-    } else {
-      alignOffs = 0;
-      prefSlotOffs = -vtm->prefslot_byte;
-    }
-  } else if (basePtr.getOpcode() == ISD::FrameIndex) {
-    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
-    alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
-    prefSlotOffs = (int) (alignOffs & 0xf);
-    prefSlotOffs -= vtm->prefslot_byte;
-  } else {
-    alignOffs = 0;
-    prefSlotOffs = -vtm->prefslot_byte;
-  }
-
-  if (alignment == 16) {
-    // Realign the base pointer as a D-Form address:
-    if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
-      basePtr = DAG.getNode(ISD::ADD, PtrVT,
-                            basePtr,
-                            DAG.getConstant((alignOffs & ~0xf), PtrVT));
-    }
-
-    // Emit the vector load:
-    was16aligned = true;
-    return DAG.getLoad(MVT::v16i8, chain, basePtr,
-                       LSN->getSrcValue(), LSN->getSrcValueOffset(),
-                       LSN->isVolatile(), 16);
-  }
-
-  // Unaligned load or we're using the "large memory" model, which means that
-  // we have to be very pessimistic:
-  if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
-    basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
-                          DAG.getConstant(0, PtrVT));
-  }
-
-  // Add the offset
-  basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
-                        DAG.getConstant((alignOffs & ~0xf), PtrVT));
-  was16aligned = false;
-  return DAG.getLoad(MVT::v16i8, chain, basePtr,
-                     LSN->getSrcValue(), LSN->getSrcValueOffset(),
-                     LSN->isVolatile(), 16);
-}
-
 /// Custom lower loads for CellSPU
 /*!
  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
@@ -605,43 +478,110 @@ static SDValue
 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   LoadSDNode *LN = cast<LoadSDNode>(Op);
   SDValue the_chain = LN->getChain();
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   MVT InVT = LN->getMemoryVT();
   MVT OutVT = Op.getValueType();
   ISD::LoadExtType ExtType = LN->getExtensionType();
   unsigned alignment = LN->getAlignment();
-  SDValue Ops[8];
+  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 
   switch (LN->getAddressingMode()) {
   case ISD::UNINDEXED: {
-    int offset, rotamt;
-    bool was16aligned;
-    SDValue result =
-      AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
-                  was16aligned);
-
-    if (result.getNode() == 0)
-      return result;
-
-    the_chain = result.getValue(1);
-    // Rotate the chunk if necessary
-    if (rotamt < 0)
-      rotamt += 16;
-    if (rotamt != 0 || !was16aligned) {
-      SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-
-      Ops[0] = result;
-      if (was16aligned) {
-        Ops[1] = DAG.getConstant(rotamt, MVT::i16);
+    SDValue result;
+    SDValue basePtr = LN->getBasePtr();
+    SDValue rotate;
+
+    if (alignment == 16) {
+      ConstantSDNode *CN;
+
+      // Special cases for a known aligned load to simplify the base pointer
+      // and the rotation amount:
+      if (basePtr.getOpcode() == ISD::ADD
+          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+        // Known offset into basePtr
+        int64_t offset = CN->getSExtValue();
+        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+        if (rotamt < 0)
+          rotamt += 16;
+
+        rotate = DAG.getConstant(rotamt, MVT::i16);
+
+        // Simplify the base pointer for this case:
+        basePtr = basePtr.getOperand(0);
+        if ((offset & ~0xf) > 0) {
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                                basePtr,
+                                DAG.getConstant((offset & ~0xf), PtrVT));
+        }
+      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
+                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+        // Plain aligned a-form address: rotate into preferred slot
+        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+        int64_t rotamt = -vtm->prefslot_byte;
+        if (rotamt < 0)
+          rotamt += 16;
+        rotate = DAG.getConstant(rotamt, MVT::i16);
       } else {
-        MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-        LoadSDNode *LN1 = cast<LoadSDNode>(result);
-        Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+        // Offset the rotate amount by the basePtr and the preferred slot
+        // byte offset
+        int64_t rotamt = -vtm->prefslot_byte;
+        if (rotamt < 0)
+          rotamt += 16;
+        rotate = DAG.getNode(ISD::ADD, PtrVT,
+                             basePtr,
                              DAG.getConstant(rotamt, PtrVT));
       }
+    } else {
+      // Unaligned load: must be more pessimistic about addressing modes:
+      if (basePtr.getOpcode() == ISD::ADD) {
+        MachineFunction &MF = DAG.getMachineFunction();
+        MachineRegisterInfo &RegInfo = MF.getRegInfo();
+        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+        SDValue Flag;
+
+        SDValue Op0 = basePtr.getOperand(0);
+        SDValue Op1 = basePtr.getOperand(1);
+
+        if (isa<ConstantSDNode>(Op1)) {
+          // Convert the (add <ptr>, <const>) to an indirect address contained
+          // in a register. Note that this is done because we need to avoid
+          // creating a 0(reg) d-form address due to the SPU's block loads.
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+          basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+        } else {
+          // Convert the (add <arg1>, <arg2>) to an indirect address, which
+          // will likely be lowered as a reg(reg) x-form address.
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+        }
+      } else {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                              basePtr,
+                              DAG.getConstant(0, PtrVT));
+      }
 
-      result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
+      // Offset the rotate amount by the basePtr and the preferred slot
+      // byte offset
+      rotate = DAG.getNode(ISD::ADD, PtrVT,
+                           basePtr,
+                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
     }
 
+    // Re-emit as a v16i8 vector load
+    result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+                         LN->getSrcValue(), LN->getSrcValueOffset(),
+                         LN->isVolatile(), 16);
+
+    // Update the chain
+    the_chain = result.getValue(1);
+
+    // Rotate into the preferred slot:
+    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
+                         result.getValue(0), rotate);
+
     // Convert the loaded v16i8 vector to the appropriate vector type
     // specified by the operand:
     MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
@@ -704,23 +644,86 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
   switch (SN->getAddressingMode()) {
   case ISD::UNINDEXED: {
-    int chunk_offset, slot_offset;
-    bool was16aligned;
-
     // The vector type we really want to load from the 16-byte chunk.
     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 
-    SDValue alignLoadVec =
-      AlignedLoad(Op, DAG, ST, SN, alignment,
-                  chunk_offset, slot_offset, VT, was16aligned);
+    SDValue alignLoadVec;
+    SDValue basePtr = SN->getBasePtr();
+    SDValue the_chain = SN->getChain();
+    SDValue insertEltOffs;
+
+    if (alignment == 16) {
+      ConstantSDNode *CN;
+
+      // Special cases for a known aligned load to simplify the base pointer
+      // and insertion byte:
+      if (basePtr.getOpcode() == ISD::ADD
+          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+        // Known offset into basePtr
+        int64_t offset = CN->getSExtValue();
+
+        // Simplify the base pointer for this case:
+        basePtr = basePtr.getOperand(0);
+        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                                    basePtr,
+                                    DAG.getConstant((offset & 0xf), PtrVT));
+
+        if ((offset & ~0xf) > 0) {
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                                basePtr,
+                                DAG.getConstant((offset & ~0xf), PtrVT));
+        }
+      } else {
+        // Otherwise, assume it's at byte 0 of basePtr
+        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                                    basePtr,
+                                    DAG.getConstant(0, PtrVT));
+      }
+    } else {
+      // Unaligned load: must be more pessimistic about addressing modes:
+      if (basePtr.getOpcode() == ISD::ADD) {
+        MachineFunction &MF = DAG.getMachineFunction();
+        MachineRegisterInfo &RegInfo = MF.getRegInfo();
+        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+        SDValue Flag;
+
+        SDValue Op0 = basePtr.getOperand(0);
+        SDValue Op1 = basePtr.getOperand(1);
+
+        if (isa<ConstantSDNode>(Op1)) {
+          // Convert the (add <ptr>, <const>) to an indirect address contained
+          // in a register. Note that this is done because we need to avoid
+          // creating a 0(reg) d-form address due to the SPU's block loads.
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+          basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+        } else {
+          // Convert the (add <arg1>, <arg2>) to an indirect address, which
+          // will likely be lowered as a reg(reg) x-form address.
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+        }
+      } else {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+                              basePtr,
+                              DAG.getConstant(0, PtrVT));
+      }
+
+      // Insertion point is solely determined by basePtr's contents
+      insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+    }
+
+    // Re-emit as a v16i8 vector load
+    alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+                               SN->getSrcValue(), SN->getSrcValueOffset(),
+                               SN->isVolatile(), 16);
 
-    if (alignLoadVec.getNode() == 0)
-      return alignLoadVec;
+    // Update the chain
+    the_chain = alignLoadVec.getValue(1);
 
     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
-    SDValue basePtr = LN->getBasePtr();
-    SDValue the_chain = alignLoadVec.getValue(1);
     SDValue theValue = SN->getValue();
     SDValue result;
 
@@ -732,29 +735,20 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
       theValue = theValue.getOperand(0);
     }
 
-    chunk_offset &= 0xf;
-
-    SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
-    SDValue insertEltPtr;
-
     // If the base pointer is already a D-form address, then just create
     // a new D-form address with a slot offset and the orignal base pointer.
     // Otherwise generate a D-form address with the slot offset relative
     // to the stack pointer, which is always aligned.
-    DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
-    DEBUG(basePtr.getNode()->dump(&DAG));
-    DEBUG(cerr << "\n");
-
-    if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
-        (basePtr.getOpcode() == ISD::ADD
-         && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
-      insertEltPtr = basePtr;
-    } else {
-      insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
-    }
+#if !defined(NDEBUG)
+      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+        cerr << "CellSPU LowerSTORE: basePtr = ";
+        basePtr.getNode()->dump(&DAG);
+        cerr << "\n";
+      }
+#endif
 
     SDValue insertEltOp =
-            DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
+            DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
     SDValue vectorizeOp =
             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 
@@ -919,22 +913,31 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
-//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 static SDValue
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
-{
+LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
   SDValue Cond = Op.getOperand(1);
author	Scott Michel <scottm@aero.org>	2008-12-27 04:51:36 +0000
committer	Scott Michel <scottm@aero.org>	2008-12-27 04:51:36 +0000
commit	f0569be4a948c7ed816bfa2b8774a5a18458ee23 (patch)
tree	541905fcbd5e64ef95599b1ca3c4182adc972688 /lib
parent	1323e8bf6a7bec163c5d43006f5b3b78042cef61 (diff)