diff options
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 89 | ||||
-rw-r--r-- | lib/Target/PowerPC/README_ALTIVEC.txt | 9 |
2 files changed, 91 insertions, 7 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 80f9cc053b..137b3f9554 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -544,6 +544,48 @@ SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { return SDOperand(); } +// If this is a vector of constants or undefs, get the bits. A bit in +// UndefBits is set if the corresponding element of the vector is an +// ISD::UNDEF value. For undefs, the corresponding VectorBits values are +// zero. Return true if this is not an array of constants, false if it is. +// +// Note that VectorBits/UndefBits are returned in 'little endian' form, so +// elements 0,1 go in VectorBits[0] and 2,3 go in VectorBits[1] for a v4i32. +static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], + uint64_t UndefBits[2]) { + // Start with zero'd results. + VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; + + unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDOperand OpVal = BV->getOperand(i); + + unsigned PartNo = i >= e/2; // In the upper 128 bits? + unsigned SlotNo = i & (e/2-1); // Which subpiece of the uint64_t it is. + + uint64_t EltBits = 0; + if (OpVal.getOpcode() == ISD::UNDEF) { + uint64_t EltUndefBits = ~0U >> (32-EltBitSize); + UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); + continue; + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { + EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); + } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { + assert(CN->getValueType(0) == MVT::f32 && + "Only one legal FP vector type!"); + EltBits = FloatToBits(CN->getValue()); + } else { + // Nonconstant element. + return true; + } + + VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); + } + + //printf("%llx %llx %llx %llx\n", + // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); + return false; +} /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -922,12 +964,20 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { } case ISD::BUILD_VECTOR: { // If this is a case we can't handle, return null and let the default - // expansion code take care of it. If we CAN select this case, return Op. - - // FIXME: We should handle splat(-0.0), and other cases here. + // expansion code take care of it. If we CAN select this case, return Op + // or something simpler. + + // If this is a vector of constants or undefs, get the bits. A bit in + // UndefBits is set if the corresponding element of the vector is an + // ISD::UNDEF value. For undefs, the corresponding VectorBits values are + // zero. + uint64_t VectorBits[2]; + uint64_t UndefBits[2]; + if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) + return SDOperand(); // Not a constant vector. // See if this is all zeros. - if (ISD::isBuildVectorAllZeros(Op.Val)) { + if ((VectorBits[0] | VectorBits[1]) == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32) { SDOperand Z = DAG.getConstant(0, MVT::i32); @@ -962,6 +1012,37 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { } return Op; } + + // If this is some other splat of 4-byte elements, see if we can handle it + // in another way. + // FIXME: Make this more undef happy and work with other widths (1,2 bytes). + if (VectorBits[0] == VectorBits[1] && + unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) { + unsigned Bits = unsigned(VectorBits[0]); + + // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is + // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). These are important + // for fneg/fabs. + if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) { + // Make -1 and vspltisw -1: + SDOperand OnesI = DAG.getConstant(~0U, MVT::i32); + SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + OnesI, OnesI, OnesI, OnesI); + + // Make the VSLW intrinsic, computing 0x8000_0000. + SDOperand Res + = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, MVT::v4i32, + DAG.getConstant(Intrinsic::ppc_altivec_vslw, MVT::i32), + OnesV, OnesV); + + // If this is 0x7FFF_FFFF, xor by OnesV to invert it. + if (Bits == 0x7FFFFFFF) + Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); + + return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); + } + } + return SDOperand(); } diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 7e92f0b788..78ea2cd879 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -43,7 +43,8 @@ There are a wide range of vector constants we can generate with combinations of altivec instructions. Examples GCC does: "t=vsplti*, r = t+t" for constants it can't generate with one vsplti - -0.0 (sign bit): vspltisw v0,-1 / vslw v0,v0,v0 +This should be added to the ISD::BUILD_VECTOR case in +PPCTargetLowering::LowerOperation. //===----------------------------------------------------------------------===// @@ -110,8 +111,10 @@ e.g. x86 cmov (not supported on bytes). This would fix two problems: 1. Writing patterns multiple times. -2. Identical operations in different types are not getting CSE'd (e.g. - { 0U, 0U, 0U, 0U } and {0.0, 0.0, 0.0, 0.0}. +2. Identical operations in different types are not getting CSE'd. + +We already do this for shuffle and build_vector. We need load,undef,and,or,xor, +etc. //===----------------------------------------------------------------------===// |