diff options
author | Scott Michel <scottm@aero.org> | 2008-12-27 04:51:36 +0000 |
---|---|---|
committer | Scott Michel <scottm@aero.org> | 2008-12-27 04:51:36 +0000 |
commit | f0569be4a948c7ed816bfa2b8774a5a18458ee23 (patch) | |
tree | 541905fcbd5e64ef95599b1ca3c4182adc972688 /lib | |
parent | 1323e8bf6a7bec163c5d43006f5b3b78042cef61 (diff) |
- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not
needed. Consequently, the CellSPU backend would produce correct, but _really
slow and horrible_, code.
Replaced with instruction sequences that do the equivalent truncation in
SPUInstrInfo.td.
- Re-examine how unaligned loads and stores work. Generated unaligned
load code has been tested on the CellSPU hardware; see the i32operations.c
and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be
toy test code, it does prove that some real world code does compile
correctly.)
- Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc
fault because i64 ult is not yet implemented.)
- Added i64 eq and neq for setcc and select/setcc; started new instruction
information file for them in SPU64InstrInfo.td. Additional i64 operations
should be added to this file and not to SPUInstrInfo.td.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 8 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPU64InstrInfo.td | 77 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 66 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 797 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrFormats.td | 5 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.cpp | 55 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 726 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUNodes.td | 14 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUOperands.td | 10 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPURegisterInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUTargetAsmInfo.cpp | 7 |
12 files changed, 1148 insertions, 628 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 589a260005..98aa084d50 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -117,7 +117,7 @@ namespace { } void - printMemRegImmS7(const MachineInstr *MI, unsigned OpNo) + printShufAddr(const MachineInstr *MI, unsigned OpNo) { char value = MI->getOperand(OpNo).getImm(); O << (int) value; @@ -183,16 +183,16 @@ namespace { } void - printMemRegImmS10(const MachineInstr *MI, unsigned OpNo) + printDFormAddr(const MachineInstr *MI, unsigned OpNo) { const MachineOperand &MO = MI->getOperand(OpNo); assert(MO.isImm() && - "printMemRegImmS10 first operand is not immedate"); + "printDFormAddr first operand is not immedate"); int64_t value = int64_t(MI->getOperand(OpNo).getImm()); int16_t value16 = int16_t(value); assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) && "Invalid dform s10 offset argument"); - O << value16 << "("; + O << (value16 & ~0xf) << "("; printOperand(MI, OpNo+1); O << ")"; } diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td new file mode 100644 index 0000000000..6d679bac72 --- /dev/null +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -0,0 +1,77 @@ +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// 64-bit comparisons: +// +// 1. The instruction sequences for vector vice scalar differ by a +// constant. +// +// 2. There are no "immediate" forms, since loading 64-bit constants +// could be a constant pool load. +// +// 3. i64 setcc results are i32, which are subsequently converted to a FSM +// mask when used in a select pattern. +// +// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask +// (TODO) +// +// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBr64_cond: + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; + +class CodeFrag<dag frag> { + dag Fragment = frag; +} + +class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>: + Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>; + +class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>: + Pat<(cond R64C:$rA, R64C:$rB), + (XORIr32 cmpare.Fragment, -1)>; + +// The i64 seteq fragment that does the scalar->vector conversion and +// comparison: +def CEQr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB))), + 0x0000000c)>; + + +// The i64 seteq fragment that does the vector comparison +def CEQv2i64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), + 0x0000000f)>; + +// i64 seteq (equality): the setcc result is i32, which is converted to a +// vector FSM mask when used in a select pattern. +// +// v2i64 seteq (equality): the setcc result is v4i32 +multiclass CompareEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; + def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; +} + +defm I64EQ: CompareEqual64; + +def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; + +def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64EQv2i64.Fragment>; + +def I64Select: + Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), + (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; + +def : I64SETCCNegCond<setne, I64EQr64>; + +def : I64SELECTNegCond<setne, I64EQr64>;
\ No newline at end of file diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9ac0e2e256..f51aba2fda 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -165,24 +165,23 @@ namespace { MVT VT; unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) bool ldresult_imm; /// LDRESULT instruction requires immediate? - int prefslot_byte; /// Byte offset of the "preferred" slot + unsigned lrinst; /// LR instruction }; const valtype_map_s valtype_map[] = { - { MVT::i1, 0, false, 3 }, - { MVT::i8, SPU::ORBIr8, true, 3 }, - { MVT::i16, SPU::ORHIr16, true, 2 }, - { MVT::i32, SPU::ORIr32, true, 0 }, - { MVT::i64, SPU::ORr64, false, 0 }, - { MVT::f32, SPU::ORf32, false, 0 }, - { MVT::f64, SPU::ORf64, false, 0 }, + { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 }, + { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 }, + { MVT::i32, SPU::ORIr32, true, SPU::LRr32 }, + { MVT::i64, SPU::ORr64, false, SPU::LRr64 }, + { MVT::f32, SPU::ORf32, false, SPU::LRf32 }, + { MVT::f64, SPU::ORf64, false, SPU::LRf64 }, // vector types... (sigh!) - { MVT::v16i8, 0, false, 0 }, - { MVT::v8i16, 0, false, 0 }, - { MVT::v4i32, 0, false, 0 }, - { MVT::v2i64, 0, false, 0 }, - { MVT::v4f32, 0, false, 0 }, - { MVT::v2f64, 0, false, 0 } + { MVT::v16i8, 0, false, SPU::LRv16i8 }, + { MVT::v8i16, 0, false, SPU::LRv8i16 }, + { MVT::v4i32, 0, false, SPU::LRv4i32 }, + { MVT::v2i64, 0, false, SPU::LRv2i64 }, + { MVT::v4f32, 0, false, SPU::LRv4f32 }, + { MVT::v2f64, 0, false, SPU::LRv2f64 } }; const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); @@ -686,31 +685,32 @@ SPUDAGToDAGISel::Select(SDValue Op) { Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain); } - Chain = SDValue(Result, 1); - return Result; } else if (Opc == SPUISD::IndirectAddr) { - SDValue Op0 = Op.getOperand(0); - if (Op0.getOpcode() == SPUISD::LDRESULT) { - /* || Op0.getOpcode() == SPUISD::AFormAddr) */ - // (IndirectAddr (LDRESULT, imm)) - SDValue Op1 = Op.getOperand(1); - MVT VT = Op.getValueType(); - - DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = "); - DEBUG(Op.getOperand(0).getNode()->dump(CurDAG)); - DEBUG(cerr << "\nOp1 = "); - DEBUG(Op.getOperand(1).getNode()->dump(CurDAG)); - DEBUG(cerr << "\n"); - + // Look at the operands: SelectCode() will catch the cases that aren't + // specifically handled here. + // + // SPUInstrInfo catches the following patterns: + // (SPUindirect (SPUhi ...), (SPUlo ...)) + // (SPUindirect $sp, imm) + MVT VT = Op.getValueType(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + RegisterSDNode *RN; + + if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) + || (Op0.getOpcode() == ISD::Register + && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0 + && RN->getReg() != SPU::R1))) { + NewOpc = SPU::Ar32; if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *CN = cast<ConstantSDNode>(Op1); - Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT); + Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32); - Ops[0] = Op0; - Ops[1] = Op1; - n_ops = 2; } + Ops[0] = Op0; + Ops[1] = Op1; + n_ops = 2; } } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index c3c31e0f47..e975d0d039 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -77,37 +78,6 @@ namespace { return retval; } - - //! Predicate that returns true if operand is a memory target - /*! - \arg Op Operand to test - \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an A-form - address. - */ - bool isMemoryOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::GlobalAddress - || Opc == ISD::GlobalTLSAddress - || Opc == ISD::JumpTable - || Opc == ISD::ConstantPool - || Opc == ISD::ExternalSymbol - || Opc == ISD::TargetGlobalAddress - || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetJumpTable - || Opc == ISD::TargetConstantPool - || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::AFormAddr); - } - - //! Predicate that returns true if the operand is an indirect target - bool isIndirectOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::Register - || Opc == SPUISD::LDRESULT); - } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -135,20 +105,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setTruncStoreAction(MVT::i8, MVT::i8, Custom); - setTruncStoreAction(MVT::i16, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i64, MVT::i8, Custom); - setTruncStoreAction(MVT::i128, MVT::i8, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); // SPU constant load actions are custom lowered: setOperationAction(ISD::Constant, MVT::i64, Custom); @@ -160,11 +118,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } } - // Custom lower BRCOND for i8 to "promote" the result to i16 + // Custom lower BRCOND for i8 to "promote" the result to whatever the result + // operand happens to be: setOperationAction(ISD::BRCOND, MVT::Other, Custom); // Expand the jumptable branches @@ -176,14 +156,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -#if 0 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); -#endif // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - // PowerPC has no SREM/UREM instructions + // SPU has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); @@ -232,14 +210,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::MUL, MVT::i32, Custom); setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall - // SMUL_LOHI, UMUL_LOHI -#if 0 - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); -#endif - // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i8, Custom); @@ -265,12 +235,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::i8, Legal); setOperationAction(ISD::SELECT, MVT::i16, Legal); setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Legal); setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); // Zero extension and sign extension for i64 have to be // custom legalized @@ -278,10 +248,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - // Custom lower truncates - setOperationAction(ISD::TRUNCATE, MVT::i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i32, Custom); + // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); // SPU has a legal FP -> signed INT instruction @@ -292,7 +259,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // FDIV on SPU requires custom lowering setOperationAction(ISD::FDIV, MVT::f32, Custom); - //setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall // SPU has [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); @@ -402,7 +369,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setShiftAmountType(MVT::i32); - setBooleanContents(ZeroOrOneBooleanContent); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -435,7 +402,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; @@ -471,9 +438,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { MVT VT = Op.getValueType(); - return (VT.isInteger() ? VT : MVT(MVT::i32)); + // i16 and i32 are valid SETCC result types + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); } //===----------------------------------------------------------------------===// @@ -486,105 +458,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { // LowerOperation implementation //===----------------------------------------------------------------------===// -/// Aligned load common code for CellSPU -/*! - \param[in] Op The SelectionDAG load or store operand - \param[in] DAG The selection DAG - \param[in] ST CellSPU subtarget information structure - \param[in,out] alignment Caller initializes this to the load or store node's - value from getAlignment(), may be updated while generating the aligned load - \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned - offset (divisible by 16, modulo 16 == 0) - \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the - offset of the preferred slot (modulo 16 != 0) - \param[in,out] VT Caller initializes this value type to the the load or store - node's loaded or stored value type; may be updated if an i1-extended load or - store. - \param[out] was16aligned true if the base pointer had 16-byte alignment, - otherwise false. Can help to determine if the chunk needs to be rotated. - - Both load and store lowering load a block of data aligned on a 16-byte - boundary. This is the common aligned load code shared between both. - */ -static SDValue -AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, - LSBaseSDNode *LSN, - unsigned &alignment, int &alignOffs, int &prefSlotOffs, - MVT &VT, bool &was16aligned) -{ - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - SDValue basePtr = LSN->getBasePtr(); - SDValue chain = LSN->getChain(); - - if (basePtr.getOpcode() == ISD::ADD) { - SDValue Op1 = basePtr.getNode()->getOperand(1); - - if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1)); - - alignOffs = (int) CN->getZExtValue(); - prefSlotOffs = (int) (alignOffs & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - prefSlotOffs -= vtm->prefslot_byte; - basePtr = basePtr.getOperand(0); - - // Loading from memory, can we adjust alignment? - if (basePtr.getOpcode() == SPUISD::AFormAddr) { - SDValue APtr = basePtr.getOperand(0); - if (APtr.getOpcode() == ISD::TargetGlobalAddress) { - GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr); - alignment = GSDN->getGlobal()->getAlignment(); - } - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - } else if (basePtr.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr); - alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); - prefSlotOffs = (int) (alignOffs & 0xf); - prefSlotOffs -= vtm->prefslot_byte; - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - - if (alignment == 16) { - // Realign the base pointer as a D-Form address: - if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - basePtr = DAG.getNode(ISD::ADD, PtrVT, - basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - } - - // Emit the vector load: - was16aligned = true; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); - } - - // Unaligned load or we're using the "large memory" model, which means that - // we have to be very pessimistic: - if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - was16aligned = false; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); -} - /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements @@ -605,43 +478,110 @@ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast<LoadSDNode>(Op); SDValue the_chain = LN->getChain(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); MVT InVT = LN->getMemoryVT(); MVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - SDValue Ops[8]; + const valtype_map_s *vtm = getValueTypeMapEntry(InVT); switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - int offset, rotamt; - bool was16aligned; - SDValue result = - AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT, - was16aligned); - - if (result.getNode() == 0) - return result; - - the_chain = result.getValue(1); - // Rotate the chunk if necessary - if (rotamt < 0) - rotamt += 16; - if (rotamt != 0 || !was16aligned) { - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - - Ops[0] = result; - if (was16aligned) { - Ops[1] = DAG.getConstant(rotamt, MVT::i16); + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); } else { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - LoadSDNode *LN1 = cast<LoadSDNode>(result); - Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, PtrVT, + basePtr, DAG.getConstant(rotamt, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2); + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, PtrVT, + basePtr, + DAG.getConstant(-vtm->prefslot_byte, PtrVT)); } + // Re-emit as a v16i8 vector load + result = DAG.getLoad(MVT::v16i8, the_chain, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + + // Update the chain + the_chain = result.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, + result.getValue(0), rotate); + // Convert the loaded v16i8 vector to the appropriate vector type // specified by the operand: MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); @@ -704,23 +644,86 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - int chunk_offset, slot_offset; - bool was16aligned; - // The vector type we really want to load from the 16-byte chunk. MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); - SDValue alignLoadVec = - AlignedLoad(Op, DAG, ST, SN, alignment, - chunk_offset, slot_offset, VT, was16aligned); + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Re-emit as a v16i8 vector load + alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); - if (alignLoadVec.getNode() == 0) - return alignLoadVec; + // Update the chain + the_chain = alignLoadVec.getValue(1); LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); - SDValue basePtr = LN->getBasePtr(); - SDValue the_chain = alignLoadVec.getValue(1); SDValue theValue = SN->getValue(); SDValue result; @@ -732,29 +735,20 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - chunk_offset &= 0xf; - - SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); - SDValue insertEltPtr; - // If the base pointer is already a D-form address, then just create // a new D-form address with a slot offset and the orignal base pointer. // Otherwise generate a D-form address with the slot offset relative // to the stack pointer, which is always aligned. - DEBUG(cerr << "CellSPU LowerSTORE: basePtr = "); - DEBUG(basePtr.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); - - if (basePtr.getOpcode() == SPUISD::IndirectAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { - insertEltPtr = basePtr; - } else { - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); - } +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr); + DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs); SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); @@ -919,22 +913,31 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) static SDValue -LowerBRCOND(SDValue Op, SelectionDAG &DAG) -{ +LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { SDValue Cond = Op.getOperand(1); |