diff options
Diffstat (limited to 'lib/Target/CellSPU/SPUISelLowering.cpp')
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 3267 |
1 files changed, 0 insertions, 3267 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp deleted file mode 100644 index 31b87331a9..0000000000 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ /dev/null @@ -1,3267 +0,0 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SPUTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#include "SPUISelLowering.h" -#include "SPUTargetMachine.h" -#include "SPUFrameLowering.h" -#include "SPUMachineFunction.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - // Byte offset of the preferred slot (counted from the MSB) - int prefslotOffset(EVT VT) { - int retval=0; - if (VT==MVT::i1) retval=3; - if (VT==MVT::i8) retval=3; - if (VT==MVT::i16) retval=2; - - return retval; - } - - //! Expand a library call into an actual call DAG node - /*! - \note - This code is taken from SelectionDAGLegalize, since it is not exposed as - part of the LLVM SelectionDAG API. - */ - - SDValue - ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, - bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); - Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - // Splice the libcall in wherever FindInputOutputChains tells us to. - Type *RetTy = - Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, - false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op.getDebugLoc()); - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; - } -} - -SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()), - SPUTM(TM) { - - // Use _setjmp/_longjmp instead of setjmp/longjmp. - setUseUnderscoreSetJmp(true); - setUseUnderscoreLongJmp(true); - - // Set RTLIB libcall names as used by SPU: - setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); - - // Set up the SPU's register classes: - addRegisterClass(MVT::i8, &SPU::R8CRegClass); - addRegisterClass(MVT::i16, &SPU::R16CRegClass); - addRegisterClass(MVT::i32, &SPU::R32CRegClass); - addRegisterClass(MVT::i64, &SPU::R64CRegClass); - addRegisterClass(MVT::f32, &SPU::R32FPRegClass); - addRegisterClass(MVT::f64, &SPU::R64FPRegClass); - addRegisterClass(MVT::i128, &SPU::GPRCRegClass); - - // SPU has no sign or zero extended loads for i1, i8, i16: - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - - setTruncStoreAction(MVT::i128, MVT::i64, Expand); - setTruncStoreAction(MVT::i128, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i16, Expand); - setTruncStoreAction(MVT::i128, MVT::i8, Expand); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // SPU constant load actions are custom lowered: - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Custom); - - // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - // Expand the jumptable branches - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // Custom lower SELECT_CC for most cases, but expand by default - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - - // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - - // SPU has no division/remainder instructions - setOperationAction(ISD::SREM, MVT::i8, Expand); - setOperationAction(ISD::UREM, MVT::i8, Expand); - setOperationAction(ISD::SDIV, MVT::i8, Expand); - setOperationAction(ISD::UDIV, MVT::i8, Expand); - setOperationAction(ISD::SDIVREM, MVT::i8, Expand); - setOperationAction(ISD::UDIVREM, MVT::i8, Expand); - setOperationAction(ISD::SREM, MVT::i16, Expand); - setOperationAction(ISD::UREM, MVT::i16, Expand); - setOperationAction(ISD::SDIV, MVT::i16, Expand); - setOperationAction(ISD::UDIV, MVT::i16, Expand); - setOperationAction(ISD::SDIVREM, MVT::i16, Expand); - setOperationAction(ISD::UDIVREM, MVT::i16, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setOperationAction(ISD::UDIV, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i128, Expand); - setOperationAction(ISD::UREM, MVT::i128, Expand); - setOperationAction(ISD::SDIV, MVT::i128, Expand); - setOperationAction(ISD::UDIV, MVT::i128, Expand); - setOperationAction(ISD::SDIVREM, MVT::i128, Expand); - setOperationAction(ISD::UDIVREM, MVT::i128, Expand); - - // We don't support sin/cos/sqrt/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); - - // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt - // for f32!) - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); - - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - - // SPU can do rotate right and left, so legalize it... but customize for i8 - // because instructions don't exist. - - // FIXME: Change from "expand" to appropriate type once ROTR is supported in - // .td files. - setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); - - setOperationAction(ISD::ROTL, MVT::i32, Legal); - setOperationAction(ISD::ROTL, MVT::i16, Legal); - setOperationAction(ISD::ROTL, MVT::i8, Custom); - - // SPU has no native version of shift left/right for i8 - setOperationAction(ISD::SHL, MVT::i8, Custom); - setOperationAction(ISD::SRL, MVT::i8, Custom); - setOperationAction(ISD::SRA, MVT::i8, Custom); - - // Make these operations legal and handle them during instruction selection: - setOperationAction(ISD::SHL, MVT::i64, Legal); - setOperationAction(ISD::SRL, MVT::i64, Legal); - setOperationAction(ISD::SRA, MVT::i64, Legal); - - // Custom lower i8, i32 and i64 multiplications - setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Legal); - setOperationAction(ISD::MUL, MVT::i64, Legal); - - // Expand double-width multiplication - // FIXME: It would probably be reasonable to support some of these operations - setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::MULHU, MVT::i16, Expand); - setOperationAction(ISD::MULHS, MVT::i16, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - - // Need to custom handle (some) common i8, i64 math ops - setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i64, Legal); - setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Legal); - - // SPU does not have BSWAP. It does have i32 support CTLZ. - // CTPOP has to be custom lowered. - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i8, Custom); - setOperationAction(ISD::CTPOP, MVT::i16, Custom); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - setOperationAction(ISD::CTPOP, MVT::i128, Expand); - - setOperationAction(ISD::CTTZ , MVT::i8, Expand); - setOperationAction(ISD::CTTZ , MVT::i16, Expand); - setOperationAction(ISD::CTTZ , MVT::i32, Expand); - setOperationAction(ISD::CTTZ , MVT::i64, Expand); - setOperationAction(ISD::CTTZ , MVT::i128, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); - - setOperationAction(ISD::CTLZ , MVT::i8, Promote); - setOperationAction(ISD::CTLZ , MVT::i16, Promote); - setOperationAction(ISD::CTLZ , MVT::i32, Legal); - setOperationAction(ISD::CTLZ , MVT::i64, Expand); - setOperationAction(ISD::CTLZ , MVT::i128, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); - - // SPU has a version of select that implements (a&~c)|(b&c), just like - // select ought to work: - setOperationAction(ISD::SELECT, MVT::i8, Legal); - setOperationAction(ISD::SELECT, MVT::i16, Legal); - setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Legal); - - setOperationAction(ISD::SETCC, MVT::i8, Legal); - setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Legal); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - - // Custom lower i128 -> i64 truncates - setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - - // Custom lower i32/i64 -> i128 sign extend - setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); - // SPU has a legal FP -> signed INT instruction for f32, but for f64, need - // to expand to a libcall, hence the custom lowering: - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); - - // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - - // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - - setOperationAction(ISD::BITCAST, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::f32, Legal); - setOperationAction(ISD::BITCAST, MVT::i64, Legal); - setOperationAction(ISD::BITCAST, MVT::f64, Legal); - - // We cannot sextinreg(i1). Expand to shifts. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // We want to legalize GlobalAddress and ConstantPool nodes into the - // appropriate instructions to materialize the address. - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); - } - - // VASTART needs to be custom lowered to use the VarArgsFrameIndex - setOperationAction(ISD::VASTART , MVT::Other, Custom); - - // Use the default implementation. - setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); - setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); - - // Cell SPU has instructions for converting between i64 and fp. - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - - // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); - - // BUILD_PAIR can't be handled natively, and should be expanded to shl/or - setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); - - // First set operation action for all vector types to expand. Then we - // will selectively turn on ones that can be effectively codegen'd. - addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass); - addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass); - - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - - // Set operation actions to legal types only. - if (!isTypeLegal(VT)) continue; - - // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD, VT, Legal); - setOperationAction(ISD::SUB, VT, Legal); - // mul has to be custom lowered. - setOperationAction(ISD::MUL, VT, Legal); - - setOperationAction(ISD::AND, VT, Legal); - setOperationAction(ISD::OR, VT, Legal); - setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Custom); - - // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - - // Expand all trunc stores - for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { - MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j; - setTruncStoreAction(VT, TargetVT, Expand); - } - - // Custom lower build_vector, constant pool spills, insert and - // extract vector elements: - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - - setOperationAction(ISD::AND, MVT::v16i8, Custom); - setOperationAction(ISD::OR, MVT::v16i8, Custom); - setOperationAction(ISD::XOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - - setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - - setBooleanContents(ZeroOrNegativeOneBooleanContent); - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? - - setStackPointerRegisterToSaveRestore(SPU::R1); - - // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - - setMinFunctionAlignment(3); - - computeRegisterProperties(); - - // Set pre-RA register scheduler default to BURR, which produces slightly - // better code than the default (could also be TDRR, but TargetLowering.h - // needs a mod to support that model): - setSchedulingPreference(Sched::RegPressure); -} - -const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; - case SPUISD::Hi: return "SPUISD::Hi"; - case SPUISD::Lo: return "SPUISD::Lo"; - case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; - case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; - case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; - case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; - case SPUISD::CALL: return "SPUISD::CALL"; - case SPUISD::SHUFB: return "SPUISD::SHUFB"; - case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; - case SPUISD::CNTB: return "SPUISD::CNTB"; - case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; - case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; - case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; - case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; - case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; - case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; - case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; - case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; - case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; - case SPUISD::SELB: return "SPUISD::SELB"; - case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; - case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; - case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; - } -} - -//===----------------------------------------------------------------------===// -// Return the Cell SPU's SETCC result type -//===----------------------------------------------------------------------===// - -EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { - // i8, i16 and i32 are valid SETCC result types - MVT::SimpleValueType retval; - - switch(VT.getSimpleVT().SimpleTy){ - case MVT::i1: - case MVT::i8: - retval = MVT::i8; break; - case MVT::i16: - retval = MVT::i16; break; - case MVT::i32: - default: - retval = MVT::i32; - } - return retval; -} - -//===----------------------------------------------------------------------===// -// Calling convention code: -//===----------------------------------------------------------------------===// - -#include "SPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// -// LowerOperation implementation -//===----------------------------------------------------------------------===// - -/// Custom lower loads for CellSPU -/*! - All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to rotate to extract the requested element. - - For extending loads, we also want to ensure that the following sequence is - emitted, e.g. for MVT::f32 extending load to MVT::f64: - -\verbatim -%1 v16i8,ch = load -%2 v16i8,ch = rotate %1 -%3 v4f8, ch = bitconvert %2 -%4 f32 = vec2perfslot %3 -%5 f64 = fp_extend %4 -\endverbatim -*/ -static SDValue -LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - LoadSDNode *LN = cast<LoadSDNode>(Op); - SDValue the_chain = LN->getChain(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - EVT InVT = LN->getMemoryVT(); - EVT OutVT = Op.getValueType(); - ISD::LoadExtType ExtType = LN->getExtensionType(); - unsigned alignment = LN->getAlignment(); - int pso = prefslotOffset(InVT); - DebugLoc dl = Op.getDebugLoc(); - EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, - (128 / InVT.getSizeInBits())); - - // two sanity checks - assert( LN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = LN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); - - SDValue result; - SDValue basePtr = LN->getBasePtr(); - SDValue rotate; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - - // Special cases for a known aligned load to simplify the base pointer - // and the rotation amount: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - int64_t rotamt = int64_t((offset & 0xf) - pso); - - if (rotamt < 0) - rotamt += 16; - - rotate = DAG.getConstant(rotamt, MVT::i16); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) - || (basePtr.getOpcode() == SPUISD::IndirectAddr - && basePtr.getOperand(0).getOpcode() == SPUISD::Hi - && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { - // Plain aligned a-form address: rotate into preferred slot - // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getConstant(rotamt, MVT::i16); - } else { - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(rotamt, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa<ConstantSDNode>(Op1)) { - // Convert the (add <ptr>, <const>) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add <arg1>, <arg2>) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(-pso, PtrVT)); - } - - // Do the load as a i128 to allow possible shifting - SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, 16); - - // When the size is not greater than alignment we get all data with just - // one load - if (alignment >= InVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - // Rotate into the preferred slot: - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, - low.getValue(0), rotate); - - // Convert the loaded v16i8 vector to the appropriate vector type - // specified by the operand: - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - InVT, (128 / InVT.getSizeInBits())); - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, - DAG.getNode(ISD::BITCAST, dl, vecVT, result)); - } - // When alignment is less than the size, we might need (known only at - // run-time) two loads - // TODO: if the memory address is composed only from constants, we have - // extra kowledge, and might avoid the second load - else { - // storage position offset from lower 16 byte aligned memory chunk - SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, - basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); - // get a registerfull of ones. (this implementation is a workaround: LLVM - // cannot handle 128 bit signed int constants) - SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); - ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); - - SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, - DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(16, PtrVT)), - highMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, - 16); - - the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), - high.getValue(1)); - - // Shift the (possible) high part right to compensate the misalignemnt. - // if there is no highpart (i.e. value is i64 and offset is 4), this - // will zero out the high value. - high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, - DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - offset - )); - - // Shift the low similarly - // TODO: add SPUISD::SHL_BYTES - low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); - - // Merge the two parts - result = DAG.getNode(ISD::BITCAST, dl, vecVT, - DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); - - if (!InVT.isVector()) { - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); - } - - } - // Handle extending loads by extending the scalar result: - if (ExtType == ISD::SEXTLOAD) { - result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::ZEXTLOAD) { - result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::EXTLOAD) { - unsigned NewOpc = ISD::ANY_EXTEND; - - if (OutVT.isFloatingPoint()) - NewOpc = ISD::FP_EXTEND; - - result = DAG.getNode(NewOpc, dl, OutVT, result); - } - - SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); - SDValue retops[2] = { - result, - the_chain - }; - - result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, - retops, sizeof(retops) / sizeof(retops[0])); - return result; -} - -/// Custom lower stores for CellSPU -/*! - All CellSPU stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to generate a shuffle to insert the - requested element into its place, then store the resulting block. - */ -static SDValue -LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - StoreSDNode *SN = cast<StoreSDNode>(Op); - SDValue Value = SN->getValue(); - EVT VT = Value.getValueType(); - EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); - unsigned alignment = SN->getAlignment(); - SDValue result; - EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, - (128 / StVT.getSizeInBits())); - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = SN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); - - - // two sanity checks - assert( SN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - SDValue alignLoadVec; - SDValue basePtr = SN->getBasePtr(); - SDValue the_chain = SN->getChain(); - SDValue insertEltOffs; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer - // and insertion byte: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & 0xf), PtrVT)); - - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else { - // Otherwise, assume it's at byte 0 of basePtr - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa<ConstantSDNode>(Op1)) { - // Convert the (add <ptr>, <const>) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add <arg1>, <arg2>) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Insertion point is solely determined by basePtr's contents - insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Load the lower part of the memory to which to store. - SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, - lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), - false, 16); - - // if we don't need to store over the 16 byte boundary, one store suffices - if (alignment >= StVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - LoadSDNode *LN = cast<LoadSDNode>(low); - SDValue theValue = SN->getValue(); - - if (StVT != VT - && (theValue.getOpcode() == ISD::AssertZext - || theValue.getOpcode() == ISD::AssertSext)) { - // Drill down and get the value for zero- and sign-extended - // quantities - theValue = theValue.getOperand(0); - } - - // If the base pointer is already a D-form address, then just create - // a new D-form address with a slot offset and the orignal base pointer. - // Otherwise generate a D-form address with the slot offset relative - // to the stack pointer, which is always aligned. -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "CellSPU LowerSTORE: basePtr = "; - basePtr.getNode()->dump(&DAG); - errs() << "\n"; - } -#endif - - SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, - insertEltOffs); - SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, - theValue); - - result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, - vectorizeOp, low, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, insertEltOp)); - - result = DAG.getStore(the_chain, dl, result, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), - 16); - - } - // do the store when it might cross the 16 byte memory access boundary. - else { - // TODO issue a warning if SN->isVolatile()== true? This is likely not |