diff options
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 411 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 378 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 105 | ||||
-rw-r--r-- | test/CodeGen/CellSPU/2009-01-01-BrCond.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/CellSPU/fneg-fabs.ll | 6 |
7 files changed, 424 insertions, 497 deletions
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 73607bf251..d148622009 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -200,182 +200,212 @@ namespace { return retval; } -} -namespace { + //! Generate the carry-generate shuffle mask. + SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector<SDValue, 16 > ShufBytes; -//===--------------------------------------------------------------------===// -/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine -/// instructions for SelectionDAG operations. -/// -class SPUDAGToDAGISel : - public SelectionDAGISel -{ - SPUTargetMachine &TM; - SPUTargetLowering &SPUtli; - unsigned GlobalBaseReg; - -public: - explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), - SPUtli(*tm.getTargetLowering()) - { } - - virtual bool runOnFunction(Function &Fn) { - // Make sure we re-emit a set of the global base reg if necessary - GlobalBaseReg = 0; - SelectionDAGISel::runOnFunction(Fn); - return true; - } + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. - inline SDValue getI32Imm(uint32_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } - /// getI64Imm - Return a target constant with the specified value, of type - /// i64. - inline SDValue getI64Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i64); + //! Generate the borrow-generate shuffle mask + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector<SDValue, 16 > ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } - /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + //===------------------------------------------------------------------===// + /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine + /// instructions for SelectionDAG operations. + /// + class SPUDAGToDAGISel : + public SelectionDAGISel + { + SPUTargetMachine &TM; + SPUTargetLowering &SPUtli; + unsigned GlobalBaseReg; + + public: + explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), + SPUtli(*tm.getTargetLowering()) + { } + + virtual bool runOnFunction(Function &Fn) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnFunction(Fn); + return true; } - SDNode *emitBuildVector(SDValue build_vec) { - MVT vecVT = build_vec.getValueType(); - SDNode *bvNode = build_vec.getNode(); - DebugLoc dl = bvNode->getDebugLoc(); - - // Check to see if this vector can be represented as a CellSPU immediate - // constant by invoking all of the instruction selection predicates: - if (((vecVT == MVT::v8i16) && - (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || - ((vecVT == MVT::v4i32) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || - ((vecVT == MVT::v2i64) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(build_vec); - - // No, need to emit a constant pool spill: - std::vector<Constant*> CV; - - for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i)); - CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(uint32_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); } - Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, - SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); - } + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } - /// Select - Convert the specified operand from a target-independent to a - /// target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); - - //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); - - //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); - - //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); - - //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDValue &Op, MVT OpVT); - - //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - //! D-form address predicate - bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, - SDValue &Base); - - /// D-form address selection workhorse - bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, - SDValue &Base, int minOffset, int maxOffset); - - //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { - SDValue Op0, Op1; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) - SelectXFormAddr(Op, Op, Op0, Op1); - break; - case 'o': // offsetable - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) { - Op0 = Op; - Op1 = getSmallIPtrImm(0); + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } - break; - case 'v': // not offsetable + + SDNode *emitBuildVector(SDValue build_vec) { + MVT vecVT = build_vec.getValueType(); + MVT eltVT = vecVT.getVectorElementType(); + SDNode *bvNode = build_vec.getNode(); + DebugLoc dl = bvNode->getDebugLoc(); + + // Check to see if this vector can be represented as a CellSPU immediate + // constant by invoking all of the instruction selection predicates: + if (((vecVT == MVT::v8i16) && + (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || + ((vecVT == MVT::v4i32) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || + ((vecVT == MVT::v2i64) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) + return Select(build_vec); + + // No, need to emit a constant pool spill: + std::vector<Constant*> CV; + + for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i)); + CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, + SPUtli.getSPUTargetMachine()); + return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); + } + + /// Select - Convert the specified operand from a target-independent to a + /// target-specific node if it hasn't already been changed. + SDNode *Select(SDValue Op); + + //! Emit the instruction sequence for i64 shl + SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 srl + SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); + + //! Emit the instruction sequence for i64 sra + SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl); + + //! Alternate instruction emit sequence for loading i64 constants + SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl); + + //! Returns true if the address N is an A-form (local store) address + bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + //! D-form address predicate + bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// Alternate D-form address using i7 offset predicate + bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base); + + /// D-form address selection workhorse + bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, + SDValue &Base, int minOffset, int maxOffset); + + //! Address predicate if N can be expressed as an indexed [r+r] operation. + bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) + SelectXFormAddr(Op, Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectDFormAddr(Op, Op, Op0, Op1) + && !SelectAFormAddr(Op, Op, Op0, Op1)) { + Op0 = Op; + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable #if 1 - assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); + assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); #else - SelectAddrIdxOnly(Op, Op, Op0, Op1); + SelectAddrIdxOnly(Op, Op, Op0, Op1); #endif - break; - } + break; + } - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; - } + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); + /// InstructionSelect - This callback is invoked by + /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. + virtual void InstructionSelect(); - virtual const char *getPassName() const { - return "Cell SPU DAG->DAG Pattern Instruction Selection"; - } + virtual const char *getPassName() const { + return "Cell SPU DAG->DAG Pattern Instruction Selection"; + } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new SPUHazardRecognizer(*II); - } + /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for + /// this target when scheduling the DAG. + virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { + const TargetInstrInfo *II = TM.getInstrInfo(); + assert(II && "No InstrInfo?"); + return new SPUHazardRecognizer(*II); + } - // Include the pieces autogenerated from the target description. + // Include the pieces autogenerated from the target description. #include "SPUGenDAGISel.inc" -}; - + }; } /// InstructionSelect - This callback is invoked by @@ -689,7 +719,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(Op, OpVT); + return SelectI64Constant(Op, OpVT, Op.getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = Op.getOperand(0); @@ -747,21 +777,21 @@ SPUDAGToDAGISel::Select(SDValue Op) { zextShuffle)); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, Op.getOperand(0), Op.getOperand(1), @@ -813,6 +843,54 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (OpVT == MVT::i64) { return SelectSRAi64(Op, OpVT); } + } else if (Opc == ISD::FNEG + && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { + DebugLoc dl = Op.getDebugLoc(); + // Check if the pattern is a special form of DFNMS: + // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) + SDValue Op0 = Op.getOperand(0); + if (Op0.getOpcode() == ISD::FSUB) { + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == ISD::FMUL) { + unsigned Opc = SPU::DFNMSf64; + if (OpVT == MVT::v2f64) + Opc = SPU::DFNMSv2f64; + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op00.getOperand(0), + Op00.getOperand(1), + Op0.getOperand(1)); + } + } + + SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); + SDNode *signMask = 0; + unsigned Opc = SPU::ORfneg64; + + if (OpVT == MVT::f64) { + signMask = SelectI64Constant(negConst, MVT::i64, dl); + } else if (OpVT == MVT::v2f64) { + Opc = SPU::ORfnegvec; + signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, + MVT::v2i64, + negConst, negConst)); + } + + return CurDAG->getTargetNode(Opc, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (Opc == ISD::FABS) { + if (OpVT == MVT::f64) { + SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); + return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } else if (OpVT == MVT::v2f64) { + SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); + SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + absConst, absConst); + SDNode *signMask = emitBuildVector(absVec); + return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); + } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT MVT VT = N->getValueType(0); @@ -1087,13 +1165,17 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { /*! Do the necessary magic necessary to load a i64 constant */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT, + DebugLoc dl) { ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode()); - // Currently there's no DL on the input, but won't hurt to pretend. - DebugLoc dl = Op.getDebugLoc(); + return SelectI64Constant(CN->getZExtValue(), OpVT, dl); +} + +SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, + DebugLoc dl) { MVT OpVecVT = MVT::getVectorVT(OpVT, 2); SDValue i64vec = - SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue(), dl); + SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); // Here's where it gets interesting, because we have to parse out the // subtree handed back in i64vec: @@ -1143,8 +1225,11 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { SDValue(lhsNode, 0), SDValue(rhsNode, 0), SDValue(shufMaskNode, 0))); - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(shufNode, 0)); + } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { + return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(i64vec), 0)); } else { cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; abort(); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e840ee0747..43248dd7be 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,5 +1,5 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -1353,7 +1353,7 @@ getVecImm(SDNode *N) { } } - return 0; // All UNDEF: use implicit def.; not Constant node + return 0; } /// get_vec_i18imm - Test if this vector is a vector filled with the same value @@ -1480,131 +1480,30 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { - EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { - const APFloat &apf = CN->getValueAPF(); - EltBits = (CN->getValueType(0) == MVT::f32 - ? FloatToBits(apf.convertToFloat()) - : DoubleToBits(apf.convertToDouble())); - } else { - // Nonconstant element. - return true; - } - - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } - - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} - -/// If this is a splat (repetition) of a value across the whole vector, return -/// the smallest size that splats it. For example, "0x01010101010101..." is a -/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -/// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - int MinSplatBits, - uint64_t &SplatBits, uint64_t &SplatUndef, - int &SplatSize) { - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { - if (MinSplatBits < 64) { - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { - if (MinSplatBits < 32) { - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { - if (MinSplatBits < 16) { - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) - == ((Bits16 >> 8) & ~Undef16)) { - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; - } - } else { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - } - } else { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - } - } else { - SplatBits = Bits128[0]; - SplatUndef = Undef128[0]; - SplatSize = 8; - return true; - } - } - - return false; // Can't be a splat if two pieces don't match. -} - //! Lower a BUILD_VECTOR instruction creatively: SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits) - || !isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) - return SDValue(); // Not a constant vector, not a splat. + BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); + + if (minSplatBits < 16) + minSplatBits = 16; + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min + + uint64_t SplatBits = APSplatBits.getZExtValue(); + unsigned SplatSize = SplatBitSize / 8; switch (VT.getSimpleVT()) { default: @@ -1620,8 +1519,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(Value32, MVT::i32); return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v4i32, T, T, T, T)); + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T)); break; } case MVT::v2f64: { @@ -1636,45 +1534,42 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits | (SplatBits << 8); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) - Ops[i] = DAG.getConstant(Value16, MVT::i16); + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector<SDValue, 8> Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8)); + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { - unsigned short Value16; - if (SplatSize == 2) - Value16 = (unsigned short) (SplatBits & 0xffff); - else - Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); - SDValue T = DAG.getConstant(Value16, VT.getVectorElementType()); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) Ops[i] = T; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8); + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector<SDValue, 8> Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); } case MVT::v4i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } case MVT::v2i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); } case MVT::v2i64: { - return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl); + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } } return SDValue(); } +/*! + */ SDValue -SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, - DebugLoc dl) { +SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { uint32_t upper = uint32_t(SplatVal >> 32); uint32_t lower = uint32_t(SplatVal); @@ -1685,10 +1580,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Val, Val, Val, Val)); } else { - SDValue LO32; - SDValue HI32; - SmallVector<SDValue, 16> ShufBytes; - SDValue Result; bool upper_special, lower_special; // NOTE: This code creates common-case shuffle masks that can be easily @@ -1699,6 +1590,18 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } + + SDValue LO32; + SDValue HI32; + SmallVector<SDValue, 16> ShufBytes; + SDValue Result; + // Create lower vector if not a special pattern if (!lower_special) { SDValue LO32C = DAG.getConstant(lower, MVT::i32); @@ -1721,13 +1624,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, LO32 = HI32; if (upper_special) HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero, - Zero, Zero); - } for (int i = 0; i < 4; ++i) { uint64_t val = 0; @@ -2022,9 +1918,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { ShufMask[i] = DAG.getConstant(bits, MVT::i32); } - SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufMask[0], - sizeof(ShufMask) / sizeof(ShufMask[0])); + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), @@ -2067,28 +1963,28 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { /*NOTREACHED*/ case MVT::i8: { SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i16: { SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i32: case MVT::f32: { SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i64: case MVT::f64: { SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, loFactor, hiFactor, loFactor, hiFactor); break; } @@ -2164,71 +2060,65 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, case ISD::ROTR: case ISD::ROTL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0) - : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, dl, ShiftVT, N1) - : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), - TLI.getShiftAmountTy())); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: SDValue ExpandArg = DAG.getNode(ISD::OR, dl, MVT::i16, N0, DAG.getNode(ISD::SHL, dl, MVT::i16, N0, DAG.getConstant(8, MVT::i32))); + + // Truncate back down to i8 |