aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2008-12-27 04:51:36 +0000
committerScott Michel <scottm@aero.org>2008-12-27 04:51:36 +0000
commitf0569be4a948c7ed816bfa2b8774a5a18458ee23 (patch)
tree541905fcbd5e64ef95599b1ca3c4182adc972688 /lib
parent1323e8bf6a7bec163c5d43006f5b3b78042cef61 (diff)
- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp8
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td77
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp66
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp797
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td5
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp55
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td726
-rw-r--r--lib/Target/CellSPU/SPUNodes.td14
-rw-r--r--lib/Target/CellSPU/SPUOperands.td10
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp5
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.cpp7
12 files changed, 1148 insertions, 628 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 589a260005..98aa084d50 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -117,7 +117,7 @@ namespace {
}
void
- printMemRegImmS7(const MachineInstr *MI, unsigned OpNo)
+ printShufAddr(const MachineInstr *MI, unsigned OpNo)
{
char value = MI->getOperand(OpNo).getImm();
O << (int) value;
@@ -183,16 +183,16 @@ namespace {
}
void
- printMemRegImmS10(const MachineInstr *MI, unsigned OpNo)
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo)
{
const MachineOperand &MO = MI->getOperand(OpNo);
assert(MO.isImm() &&
- "printMemRegImmS10 first operand is not immedate");
+ "printDFormAddr first operand is not immedate");
int64_t value = int64_t(MI->getOperand(OpNo).getImm());
int16_t value16 = int16_t(value);
assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
&& "Invalid dform s10 offset argument");
- O << value16 << "(";
+ O << (value16 & ~0xf) << "(";
printOperand(MI, OpNo+1);
O << ")";
}
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 0000000000..6d679bac72
--- /dev/null
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,77 @@
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
+// (TODO)
+//
+// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
+
+class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 cmpare.Fragment, -1)>;
+
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB))),
+ 0x0000000c)>;
+
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)),
+ 0x0000000f)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
+ def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64EQv2i64.Fragment>;
+
+def I64Select:
+ Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
+ (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+
+def : I64SETCCNegCond<setne, I64EQr64>;
+
+def : I64SELECTNegCond<setne, I64EQr64>; \ No newline at end of file
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9ac0e2e256..f51aba2fda 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -165,24 +165,23 @@ namespace {
MVT VT;
unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
bool ldresult_imm; /// LDRESULT instruction requires immediate?
- int prefslot_byte; /// Byte offset of the "preferred" slot
+ unsigned lrinst; /// LR instruction
};
const valtype_map_s valtype_map[] = {
- { MVT::i1, 0, false, 3 },
- { MVT::i8, SPU::ORBIr8, true, 3 },
- { MVT::i16, SPU::ORHIr16, true, 2 },
- { MVT::i32, SPU::ORIr32, true, 0 },
- { MVT::i64, SPU::ORr64, false, 0 },
- { MVT::f32, SPU::ORf32, false, 0 },
- { MVT::f64, SPU::ORf64, false, 0 },
+ { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
+ { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
+ { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
+ { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
+ { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
+ { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
// vector types... (sigh!)
- { MVT::v16i8, 0, false, 0 },
- { MVT::v8i16, 0, false, 0 },
- { MVT::v4i32, 0, false, 0 },
- { MVT::v2i64, 0, false, 0 },
- { MVT::v4f32, 0, false, 0 },
- { MVT::v2f64, 0, false, 0 }
+ { MVT::v16i8, 0, false, SPU::LRv16i8 },
+ { MVT::v8i16, 0, false, SPU::LRv8i16 },
+ { MVT::v4i32, 0, false, SPU::LRv4i32 },
+ { MVT::v2i64, 0, false, SPU::LRv2i64 },
+ { MVT::v4f32, 0, false, SPU::LRv4f32 },
+ { MVT::v2f64, 0, false, SPU::LRv2f64 }
};
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
@@ -686,31 +685,32 @@ SPUDAGToDAGISel::Select(SDValue Op) {
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
}
- Chain = SDValue(Result, 1);
-
return Result;
} else if (Opc == SPUISD::IndirectAddr) {
- SDValue Op0 = Op.getOperand(0);
- if (Op0.getOpcode() == SPUISD::LDRESULT) {
- /* || Op0.getOpcode() == SPUISD::AFormAddr) */
- // (IndirectAddr (LDRESULT, imm))
- SDValue Op1 = Op.getOperand(1);
- MVT VT = Op.getValueType();
-
- DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = ");
- DEBUG(Op.getOperand(0).getNode()->dump(CurDAG));
- DEBUG(cerr << "\nOp1 = ");
- DEBUG(Op.getOperand(1).getNode()->dump(CurDAG));
- DEBUG(cerr << "\n");
-
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ MVT VT = Op.getValueType();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
- Ops[0] = Op0;
- Ops[1] = Op1;
- n_ops = 2;
}
+ Ops[0] = Op0;
+ Ops[1] = Op1;
+ n_ops = 2;
}
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index c3c31e0f47..e975d0d039 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -77,37 +78,6 @@ namespace {
return retval;
}
-
- //! Predicate that returns true if operand is a memory target
- /*!
- \arg Op Operand to test
- \return true if the operand is a memory target (i.e., global
- address, external symbol, constant pool) or an A-form
- address.
- */
- bool isMemoryOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::GlobalAddress
- || Opc == ISD::GlobalTLSAddress
- || Opc == ISD::JumpTable
- || Opc == ISD::ConstantPool
- || Opc == ISD::ExternalSymbol
- || Opc == ISD::TargetGlobalAddress
- || Opc == ISD::TargetGlobalTLSAddress
- || Opc == ISD::TargetJumpTable
- || Opc == ISD::TargetConstantPool
- || Opc == ISD::TargetExternalSymbol
- || Opc == SPUISD::AFormAddr);
- }
-
- //! Predicate that returns true if the operand is an indirect target
- bool isIndirectOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::Register
- || Opc == SPUISD::LDRESULT);
- }
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -135,20 +105,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setTruncStoreAction(MVT::i8, MVT::i8, Custom);
- setTruncStoreAction(MVT::i16, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i64, MVT::i8, Custom);
- setTruncStoreAction(MVT::i128, MVT::i8, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
@@ -160,11 +118,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
}
- // Custom lower BRCOND for i8 to "promote" the result to i16
+ // Custom lower BRCOND for i8 to "promote" the result to whatever the result
+ // operand happens to be:
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
@@ -176,14 +156,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-#if 0
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-#endif
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- // PowerPC has no SREM/UREM instructions
+ // SPU has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
@@ -232,14 +210,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
- // SMUL_LOHI, UMUL_LOHI
-#if 0
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-#endif
-
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
@@ -265,12 +235,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
// Zero extension and sign extension for i64 have to be
// custom legalized
@@ -278,10 +248,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
- // Custom lower truncates
- setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
+ // Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction
@@ -292,7 +259,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f32, Custom);
- //setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
// SPU has [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
@@ -402,7 +369,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setShiftAmountType(MVT::i32);
- setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -435,7 +402,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
@@ -471,9 +438,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
return ((i != node_names.end()) ? i->second : 0);
}
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
MVT VT = Op.getValueType();
- return (VT.isInteger() ? VT : MVT(MVT::i32));
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
}
//===----------------------------------------------------------------------===//
@@ -486,105 +458,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-/// Aligned load common code for CellSPU
-/*!
- \param[in] Op The SelectionDAG load or store operand
- \param[in] DAG The selection DAG
- \param[in] ST CellSPU subtarget information structure
- \param[in,out] alignment Caller initializes this to the load or store node's
- value from getAlignment(), may be updated while generating the aligned load
- \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
- offset (divisible by 16, modulo 16 == 0)
- \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
- offset of the preferred slot (modulo 16 != 0)
- \param[in,out] VT Caller initializes this value type to the the load or store
- node's loaded or stored value type; may be updated if an i1-extended load or
- store.
- \param[out] was16aligned true if the base pointer had 16-byte alignment,
- otherwise false. Can help to determine if the chunk needs to be rotated.
-
- Both load and store lowering load a block of data aligned on a 16-byte
- boundary. This is the common aligned load code shared between both.
- */
-static SDValue
-AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
- LSBaseSDNode *LSN,
- unsigned &alignment, int &alignOffs, int &prefSlotOffs,
- MVT &VT, bool &was16aligned)
-{
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
- SDValue basePtr = LSN->getBasePtr();
- SDValue chain = LSN->getChain();
-
- if (basePtr.getOpcode() == ISD::ADD) {
- SDValue Op1 = basePtr.getNode()->getOperand(1);
-
- if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
-
- alignOffs = (int) CN->getZExtValue();
- prefSlotOffs = (int) (alignOffs & 0xf);
-
- // Adjust the rotation amount to ensure that the final result ends up in
- // the preferred slot:
- prefSlotOffs -= vtm->prefslot_byte;
- basePtr = basePtr.getOperand(0);
-
- // Loading from memory, can we adjust alignment?
- if (basePtr.getOpcode() == SPUISD::AFormAddr) {
- SDValue APtr = basePtr.getOperand(0);
- if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
- alignment = GSDN->getGlobal()->getAlignment();
- }
- }
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
- } else if (basePtr.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
- alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
- prefSlotOffs = (int) (alignOffs & 0xf);
- prefSlotOffs -= vtm->prefslot_byte;
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
-
- if (alignment == 16) {
- // Realign the base pointer as a D-Form address:
- if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
- basePtr = DAG.getNode(ISD::ADD, PtrVT,
- basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- }
-
- // Emit the vector load:
- was16aligned = true;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
- }
-
- // Unaligned load or we're using the "large memory" model, which means that
- // we have to be very pessimistic:
- if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Add the offset
- basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- was16aligned = false;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
-}
-
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
@@ -605,43 +478,110 @@ static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
MVT InVT = LN->getMemoryVT();
MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- SDValue Ops[8];
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int offset, rotamt;
- bool was16aligned;
- SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
- was16aligned);
-
- if (result.getNode() == 0)
- return result;
-
- the_chain = result.getValue(1);
- // Rotate the chunk if necessary
- if (rotamt < 0)
- rotamt += 16;
- if (rotamt != 0 || !was16aligned) {
- SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-
- Ops[0] = result;
- if (was16aligned) {
- Ops[1] = DAG.getConstant(rotamt, MVT::i16);
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
} else {
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
}
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = result.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
+ result.getValue(0), rotate);
+
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
@@ -704,23 +644,86 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int chunk_offset, slot_offset;
- bool was16aligned;
-
// The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
- SDValue alignLoadVec =
- AlignedLoad(Op, DAG, ST, SN, alignment,
- chunk_offset, slot_offset, VT, was16aligned);
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
- if (alignLoadVec.getNode() == 0)
- return alignLoadVec;
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
- SDValue basePtr = LN->getBasePtr();
- SDValue the_chain = alignLoadVec.getValue(1);
SDValue theValue = SN->getValue();
SDValue result;
@@ -732,29 +735,20 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
theValue = theValue.getOperand(0);
}
- chunk_offset &= 0xf;
-
- SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
- SDValue insertEltPtr;
-
// If the base pointer is already a D-form address, then just create
// a new D-form address with a slot offset and the orignal base pointer.
// Otherwise generate a D-form address with the slot offset relative
// to the stack pointer, which is always aligned.
- DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
- DEBUG(basePtr.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
-
- if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
- (basePtr.getOpcode() == ISD::ADD
- && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
- insertEltPtr = basePtr;
- } else {
- insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
- }
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
+ DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
SDValue vectorizeOp =
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
@@ -919,22 +913,31 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
-{
+LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
SDValue Cond = Op.getOperand(1);