aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/CellSPU/SPUISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/CellSPU/SPUISelLowering.cpp')
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp3267
1 files changed, 0 insertions, 3267 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
deleted file mode 100644
index 31b87331a9..0000000000
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ /dev/null
@@ -1,3267 +0,0 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUISelLowering.h"
-#include "SPUTargetMachine.h"
-#include "SPUFrameLowering.h"
-#include "SPUMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- // Byte offset of the preferred slot (counted from the MSB)
- int prefslotOffset(EVT VT) {
- int retval=0;
- if (VT==MVT::i1) retval=3;
- if (VT==MVT::i8) retval=3;
- if (VT==MVT::i16) retval=2;
-
- return retval;
- }
-
- //! Expand a library call into an actual call DAG node
- /*!
- \note
- This code is taken from SelectionDAGLegalize, since it is not exposed as
- part of the LLVM SelectionDAG API.
- */
-
- SDValue
- ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
- bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op.getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op.getOperand(i);
- Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
-
- // Splice the libcall in wherever FindInputOutputChains tells us to.
- Type *RetTy =
- Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
- TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
- false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- Callee, Args, DAG, Op.getDebugLoc());
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
- }
-}
-
-SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()),
- SPUTM(TM) {
-
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
- // Set RTLIB libcall names as used by SPU:
- setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
-
- // Set up the SPU's register classes:
- addRegisterClass(MVT::i8, &SPU::R8CRegClass);
- addRegisterClass(MVT::i16, &SPU::R16CRegClass);
- addRegisterClass(MVT::i32, &SPU::R32CRegClass);
- addRegisterClass(MVT::i64, &SPU::R64CRegClass);
- addRegisterClass(MVT::f32, &SPU::R32FPRegClass);
- addRegisterClass(MVT::f64, &SPU::R64FPRegClass);
- addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
-
- // SPU has no sign or zero extended loads for i1, i8, i16:
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
- setTruncStoreAction(MVT::i128, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i16, Expand);
- setTruncStoreAction(MVT::i128, MVT::i8, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // SPU constant load actions are custom lowered:
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
- // SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- // Expand the jumptable branches
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
-
- // Custom lower SELECT_CC for most cases, but expand by default
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-
- // SPU has no intrinsics for these particular operations:
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-
- // SPU has no division/remainder instructions
- setOperationAction(ISD::SREM, MVT::i8, Expand);
- setOperationAction(ISD::UREM, MVT::i8, Expand);
- setOperationAction(ISD::SDIV, MVT::i8, Expand);
- setOperationAction(ISD::UDIV, MVT::i8, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::SREM, MVT::i16, Expand);
- setOperationAction(ISD::UREM, MVT::i16, Expand);
- setOperationAction(ISD::SDIV, MVT::i16, Expand);
- setOperationAction(ISD::UDIV, MVT::i16, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i128, Expand);
- setOperationAction(ISD::UREM, MVT::i128, Expand);
- setOperationAction(ISD::SDIV, MVT::i128, Expand);
- setOperationAction(ISD::UDIV, MVT::i128, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
-
- // We don't support sin/cos/sqrt/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
-
- // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
- // for f32!)
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
- // SPU can do rotate right and left, so legalize it... but customize for i8
- // because instructions don't exist.
-
- // FIXME: Change from "expand" to appropriate type once ROTR is supported in
- // .td files.
- setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
-
- setOperationAction(ISD::ROTL, MVT::i32, Legal);
- setOperationAction(ISD::ROTL, MVT::i16, Legal);
- setOperationAction(ISD::ROTL, MVT::i8, Custom);
-
- // SPU has no native version of shift left/right for i8
- setOperationAction(ISD::SHL, MVT::i8, Custom);
- setOperationAction(ISD::SRL, MVT::i8, Custom);
- setOperationAction(ISD::SRA, MVT::i8, Custom);
-
- // Make these operations legal and handle them during instruction selection:
- setOperationAction(ISD::SHL, MVT::i64, Legal);
- setOperationAction(ISD::SRL, MVT::i64, Legal);
- setOperationAction(ISD::SRA, MVT::i64, Legal);
-
- // Custom lower i8, i32 and i64 multiplications
- setOperationAction(ISD::MUL, MVT::i8, Custom);
- setOperationAction(ISD::MUL, MVT::i32, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Legal);
-
- // Expand double-width multiplication
- // FIXME: It would probably be reasonable to support some of these operations
- setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::MULHU, MVT::i8, Expand);
- setOperationAction(ISD::MULHS, MVT::i8, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::MULHU, MVT::i16, Expand);
- setOperationAction(ISD::MULHS, MVT::i16, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
-
- // Need to custom handle (some) common i8, i64 math ops
- setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Legal);
- setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Legal);
-
- // SPU does not have BSWAP. It does have i32 support CTLZ.
- // CTPOP has to be custom lowered.
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i8, Custom);
- setOperationAction(ISD::CTPOP, MVT::i16, Custom);
- setOperationAction(ISD::CTPOP, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
- setOperationAction(ISD::CTPOP, MVT::i128, Expand);
-
- setOperationAction(ISD::CTTZ , MVT::i8, Expand);
- setOperationAction(ISD::CTTZ , MVT::i16, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTTZ , MVT::i128, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
-
- setOperationAction(ISD::CTLZ , MVT::i8, Promote);
- setOperationAction(ISD::CTLZ , MVT::i16, Promote);
- setOperationAction(ISD::CTLZ , MVT::i32, Legal);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i128, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
-
- // SPU has a version of select that implements (a&~c)|(b&c), just like
- // select ought to work:
- setOperationAction(ISD::SELECT, MVT::i8, Legal);
- setOperationAction(ISD::SELECT, MVT::i16, Legal);
- setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Legal);
-
- setOperationAction(ISD::SETCC, MVT::i8, Legal);
- setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Legal);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
-
- // Custom lower i128 -> i64 truncates
- setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
-
- // Custom lower i32/i64 -> i128 sign extend
- setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
- // to expand to a libcall, hence the custom lowering:
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
-
- // FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
-
- // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
- setOperationAction(ISD::BITCAST, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::f32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i64, Legal);
- setOperationAction(ISD::BITCAST, MVT::f64, Legal);
-
- // We cannot sextinreg(i1). Expand to shifts.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // We want to legalize GlobalAddress and ConstantPool nodes into the
- // appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
- }
-
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
-
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
-
- // Cell SPU has instructions for converting between i64 and fp.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-
- // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
-
- // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
- // First set operation action for all vector types to expand. Then we
- // will selectively turn on ones that can be effectively codegen'd.
- addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
-
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
- // Set operation actions to legal types only.
- if (!isTypeLegal(VT)) continue;
-
- // add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD, VT, Legal);
- setOperationAction(ISD::SUB, VT, Legal);
- // mul has to be custom lowered.
- setOperationAction(ISD::MUL, VT, Legal);
-
- setOperationAction(ISD::AND, VT, Legal);
- setOperationAction(ISD::OR, VT, Legal);
- setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Custom);
-
- // These operations need to be expanded:
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
-
- // Expand all trunc stores
- for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
- MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
- setTruncStoreAction(VT, TargetVT, Expand);
- }
-
- // Custom lower build_vector, constant pool spills, insert and
- // extract vector elements:
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- }
-
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-
- setOperationAction(ISD::AND, MVT::v16i8, Custom);
- setOperationAction(ISD::OR, MVT::v16i8, Custom);
- setOperationAction(ISD::XOR, MVT::v16i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
-
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
-
- setStackPointerRegisterToSaveRestore(SPU::R1);
-
- // We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
-
- setMinFunctionAlignment(3);
-
- computeRegisterProperties();
-
- // Set pre-RA register scheduler default to BURR, which produces slightly
- // better code than the default (could also be TDRR, but TargetLowering.h
- // needs a mod to support that model):
- setSchedulingPreference(Sched::RegPressure);
-}
-
-const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
- case SPUISD::Hi: return "SPUISD::Hi";
- case SPUISD::Lo: return "SPUISD::Lo";
- case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
- case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
- case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
- case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
- case SPUISD::CALL: return "SPUISD::CALL";
- case SPUISD::SHUFB: return "SPUISD::SHUFB";
- case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
- case SPUISD::CNTB: return "SPUISD::CNTB";
- case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
- case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
- case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
- case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
- case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
- case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
- case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
- case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
- case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
- case SPUISD::SELB: return "SPUISD::SELB";
- case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
- case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
- case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Return the Cell SPU's SETCC result type
-//===----------------------------------------------------------------------===//
-
-EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i8, i16 and i32 are valid SETCC result types
- MVT::SimpleValueType retval;
-
- switch(VT.getSimpleVT().SimpleTy){
- case MVT::i1:
- case MVT::i8:
- retval = MVT::i8; break;
- case MVT::i16:
- retval = MVT::i16; break;
- case MVT::i32:
- default:
- retval = MVT::i32;
- }
- return retval;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling convention code:
-//===----------------------------------------------------------------------===//
-
-#include "SPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// LowerOperation implementation
-//===----------------------------------------------------------------------===//
-
-/// Custom lower loads for CellSPU
-/*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
-
- For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for MVT::f32 extending load to MVT::f64:
-
-\verbatim
-%1 v16i8,ch = load
-%2 v16i8,ch = rotate %1
-%3 v4f8, ch = bitconvert %2
-%4 f32 = vec2perfslot %3
-%5 f64 = fp_extend %4
-\endverbatim
-*/
-static SDValue
-LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- LoadSDNode *LN = cast<LoadSDNode>(Op);
- SDValue the_chain = LN->getChain();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- EVT InVT = LN->getMemoryVT();
- EVT OutVT = Op.getValueType();
- ISD::LoadExtType ExtType = LN->getExtensionType();
- unsigned alignment = LN->getAlignment();
- int pso = prefslotOffset(InVT);
- DebugLoc dl = Op.getDebugLoc();
- EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
- (128 / InVT.getSizeInBits()));
-
- // two sanity checks
- assert( LN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = LN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
-
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - pso);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, MVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, MVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(-pso, PtrVT));
- }
-
- // Do the load as a i128 to allow possible shifting
- SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false, 16);
-
- // When the size is not greater than alignment we get all data with just
- // one load
- if (alignment >= InVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- // Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
- low.getValue(0), rotate);
-
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- InVT, (128 / InVT.getSizeInBits()));
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BITCAST, dl, vecVT, result));
- }
- // When alignment is less than the size, we might need (known only at
- // run-time) two loads
- // TODO: if the memory address is composed only from constants, we have
- // extra kowledge, and might avoid the second load
- else {
- // storage position offset from lower 16 byte aligned memory chunk
- SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
- basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
- // get a registerfull of ones. (this implementation is a workaround: LLVM
- // cannot handle 128 bit signed int constants)
- SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
- ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
- SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
- DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(16, PtrVT)),
- highMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false,
- 16);
-
- the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
- high.getValue(1));
-
- // Shift the (possible) high part right to compensate the misalignemnt.
- // if there is no highpart (i.e. value is i64 and offset is 4), this
- // will zero out the high value.
- high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
- DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- offset
- ));
-
- // Shift the low similarly
- // TODO: add SPUISD::SHL_BYTES
- low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
-
- // Merge the two parts
- result = DAG.getNode(ISD::BITCAST, dl, vecVT,
- DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
-
- if (!InVT.isVector()) {
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
- }
-
- }
- // Handle extending loads by extending the scalar result:
- if (ExtType == ISD::SEXTLOAD) {
- result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::ZEXTLOAD) {
- result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::EXTLOAD) {
- unsigned NewOpc = ISD::ANY_EXTEND;
-
- if (OutVT.isFloatingPoint())
- NewOpc = ISD::FP_EXTEND;
-
- result = DAG.getNode(NewOpc, dl, OutVT, result);
- }
-
- SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
- SDValue retops[2] = {
- result,
- the_chain
- };
-
- result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
- retops, sizeof(retops) / sizeof(retops[0]));
- return result;
-}
-
-/// Custom lower stores for CellSPU
-/*!
- All CellSPU stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to generate a shuffle to insert the
- requested element into its place, then store the resulting block.
- */
-static SDValue
-LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- StoreSDNode *SN = cast<StoreSDNode>(Op);
- SDValue Value = SN->getValue();
- EVT VT = Value.getValueType();
- EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
- unsigned alignment = SN->getAlignment();
- SDValue result;
- EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
- (128 / StVT.getSizeInBits()));
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = SN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
-
-
- // two sanity checks
- assert( SN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Load the lower part of the memory to which to store.
- SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
- false, 16);
-
- // if we don't need to store over the 16 byte boundary, one store suffices
- if (alignment >= StVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- LoadSDNode *LN = cast<LoadSDNode>(low);
- SDValue theValue = SN->getValue();
-
- if (StVT != VT
- && (theValue.getOpcode() == ISD::AssertZext
- || theValue.getOpcode() == ISD::AssertSext)) {
- // Drill down and get the value for zero- and sign-extended
- // quantities
- theValue = theValue.getOperand(0);
- }
-
- // If the base pointer is already a D-form address, then just create
- // a new D-form address with a slot offset and the orignal base pointer.
- // Otherwise generate a D-form address with the slot offset relative
- // to the stack pointer, which is always aligned.
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "CellSPU LowerSTORE: basePtr = ";
- basePtr.getNode()->dump(&DAG);
- errs() << "\n";
- }
-#endif
-
- SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
- insertEltOffs);
- SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
- theValue);
-
- result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, low,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32, insertEltOp));
-
- result = DAG.getStore(the_chain, dl, result, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(),
- 16);
-
- }
- // do the store when it might cross the 16 byte memory access boundary.
- else {
- // TODO issue a warning if SN->isVolatile()== true? This is likely not