diff options
author | Evan Cheng <evan.cheng@apple.com> | 2006-03-22 02:53:00 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2006-03-22 02:53:00 +0000 |
commit | b9df0ca67b9a59c75685a72ee50b1b471aa9d1bf (patch) | |
tree | b80b3ba6f28f8a10984cd2b1a09529d0d0a5d2db | |
parent | a9f2a717e9112a808d3d0907b19d52deb7bdf0ae (diff) |
Some splat and shuffle support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26940 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 85 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 18 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 62 |
3 files changed, 157 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1d4221414f..68189e7d29 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -278,6 +278,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); } if (TM.getSubtarget<X86Subtarget>().hasSSE2()) { @@ -299,6 +300,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); } computeRegisterProperties(); @@ -1366,6 +1368,66 @@ static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); } +/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to PSHUFD. +bool X86::isPSHUFDMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + SDOperand Elt = N->getOperand(0); + assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); + for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { + assert(isa<ConstantSDNode>(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false; + } + + return true; +} + +/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies +/// a splat of a single element. +bool X86::isSplatMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + // We can only splat 64-bit, and 32-bit quantities. + if (N->getNumOperands() != 4 && N->getNumOperands() != 2) + return false; + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + SDOperand Elt = N->getOperand(0); + assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); + for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { + assert(isa<ConstantSDNode>(N->getOperand(i)) && + "Invalid VECTOR_SHUFFLE mask!"); + if (N->getOperand(i) != Elt) return false; + } + + // Make sure it is a splat of the first vector operand. + return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); +} + +/// getShuffleImmediate - Return the appropriate immediate to shuffle +/// the specified isShuffleMask VECTOR_SHUFFLE mask. +unsigned X86::getShuffleImmediate(SDNode *N) { + unsigned NumOperands = N->getNumOperands(); + unsigned Shift = (NumOperands == 4) ? 2 : 1; + unsigned Mask = 0; + unsigned i = NumOperands - 1; + do { + Mask |= cast<ConstantSDNode>(N->getOperand(i))->getValue(); + Mask <<= Shift; + --i; + } while (i != 0); + + return Mask; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -2141,6 +2203,28 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt); } + case ISD::VECTOR_SHUFFLE: { + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand PermMask = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + + if (V2.getOpcode() == ISD::UNDEF) { + // Handle splat cases. + if (X86::isSplatMask(PermMask.Val)) { + if (VT == MVT::v2f64 || VT == MVT::v2i64) + // Use unpcklpd + return DAG.getNode(X86ISD::UNPCKLP, VT, V1, V1); + // Leave the VECTOR_SHUFFLE alone. It matches SHUFP*. + break; + } else if (VT == MVT::v4f32 && X86::isPSHUFDMask(PermMask.Val)) + // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. + break; + } + + // TODO. + assert(0); + } } } @@ -2175,6 +2259,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::SCALAR_TO_VECTOR: return "X86ISD::SCALAR_TO_VECTOR"; + case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index bdbe46d2b0..ecfc8a237d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -149,6 +149,9 @@ namespace llvm { /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base /// type does not have to match the operand type. SCALAR_TO_VECTOR, + + /// UNPCKLP - X86 unpack and interleave low instructions. + UNPCKLP, }; // X86 specific condition code. These correspond to X86_*_COND in @@ -174,6 +177,21 @@ namespace llvm { }; } + /// Define some predicates that are used for node matching. + namespace X86 { + /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PSHUFD. + bool isPSHUFDMask(SDNode *N); + + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of a single element. + bool isSplatMask(SDNode *N); + + /// getShuffleImmediate - Return the appropriate immediate to shuffle + /// the specified isShuffleMask VECTOR_SHUFFLE mask. + unsigned getShuffleImmediate(SDNode *N); + } + //===----------------------------------------------------------------------===// // X86TargetLowering - X86 Implementation of the TargetLowering interface class X86TargetLowering : public TargetLowering { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 866203846b..29519e9e5c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -17,14 +17,19 @@ // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, - [SDNPHasChain]>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR", - SDTypeProfile<1, 1, []>, []>; +def SDTX86Unpcklp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; + +def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, + [SDNPHasChain]>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR", + SDTypeProfile<1, 1, []>, []>; +def X86unpcklp : SDNode<"X86ISD::UNPCKLP", + SDTX86Unpcklp, []>; //===----------------------------------------------------------------------===// // SSE pattern fragments @@ -36,6 +41,20 @@ def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>; def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; +// SHUFFLE_get_imm xform function: convert vector_shuffle mask to PSHUF*, +// SHUF* etc. imm. +def SHUFFLE_get_imm : SDNodeXForm<build_vector, [{ + return getI8Imm(X86::getShuffleImmediate(N)); +}]>; + +def SHUFFLE_splat_mask : PatLeaf<(build_vector), [{ + return X86::isSplatMask(N); +}], SHUFFLE_get_imm>; + +def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isPSHUFDMask(N); +}], SHUFFLE_get_imm>; + //===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -652,6 +671,21 @@ def CMPPDrm : PDI<0xC2, MRMSrcMem, } // Shuffle and unpack instructions +def PSHUFWrr : PSI<0x70, AddRegFrm, + (ops VR64:$dst, VR64:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; +def PSHUFWrm : PSI<0x70, MRMSrcMem, + (ops VR64:$dst, i64mem:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; +def PSHUFDrr : PDI<0x70, AddRegFrm, + (ops VR128:$dst, VR128:$src1, i8imm:$src2), + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (vector_shuffle (v4f32 VR128:$src1), (undef), + PSHUFD_shuffle_mask:$src2))]>; +def PSHUFDrm : PDI<0x70, MRMSrcMem, + (ops VR128:$dst, i128mem:$src1, i8imm:$src2), + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + def SHUFPSrr : PSI<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>; @@ -755,3 +789,15 @@ def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, Requires<[HasSSE2]>; + +// Splat v4f32 / v4i32 +def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFFLE_splat_mask:$sm), + (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFFLE_splat_mask:$sm))>; +def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFFLE_splat_mask:$sm), + (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFFLE_splat_mask:$sm))>; + +// Splat v2f64 / v2i64 +def : Pat<(X86unpcklp (v2f64 VR128:$src1), VR128:$src2), + (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>; +def : Pat<(X86unpcklp (v2i64 VR128:$src1), VR128:$src2), + (v2i64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>; |