Some splat and shuffle support.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26940 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2006-03-22 02:53:00 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2006-03-22 02:53:00 +0000
commit: b9df0ca67b9a59c75685a72ee50b1b471aa9d1bf (patch)
tree: b80b3ba6f28f8a10984cd2b1a09529d0d0a5d2db
parent: a9f2a717e9112a808d3d0907b19d52deb7bdf0ae (diff)
3 files changed, 157 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1d4221414f..68189e7d29 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -278,6 +278,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
     setOperationAction(ISD::MUL,              MVT::v4f32, Legal);
     setOperationAction(ISD::LOAD,             MVT::v4f32, Legal);
     setOperationAction(ISD::BUILD_VECTOR,     MVT::v4f32, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE,   MVT::v4f32, Custom);
   }
 
   if (TM.getSubtarget<X86Subtarget>().hasSSE2()) {
@@ -299,6 +300,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i64, Expand);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,   MVT::v2f64, Custom);
   }
 
   computeRegisterProperties();
@@ -1366,6 +1368,66 @@ static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
           (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
 }
 
+/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to PSHUFD.
+bool X86::isPSHUFDMask(SDNode *N) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+  if (N->getNumOperands() != 4)
+    return false;
+
+  // This is a splat operation if each element of the permute is the same, and
+  // if the value doesn't reference the second vector.
+  SDOperand Elt = N->getOperand(0);
+  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
+    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+           "Invalid VECTOR_SHUFFLE mask!");
+    if (cast<ConstantSDNode>(N->getOperand(i))->getValue() >= 4) return false;
+  }
+
+  return true;
+}
+
+/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
+/// a splat of a single element.
+bool X86::isSplatMask(SDNode *N) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+  // We can only splat 64-bit, and 32-bit quantities.
+  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
+    return false;
+
+  // This is a splat operation if each element of the permute is the same, and
+  // if the value doesn't reference the second vector.
+  SDOperand Elt = N->getOperand(0);
+  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
+    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
+           "Invalid VECTOR_SHUFFLE mask!");
+    if (N->getOperand(i) != Elt) return false;
+  }
+
+  // Make sure it is a splat of the first vector operand.
+  return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
+}
+
+/// getShuffleImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask.
+unsigned X86::getShuffleImmediate(SDNode *N) {
+  unsigned NumOperands = N->getNumOperands();
+  unsigned Shift = (NumOperands == 4) ? 2 : 1;
+  unsigned Mask = 0;
+  unsigned i = NumOperands - 1;
+  do {
+    Mask |= cast<ConstantSDNode>(N->getOperand(i))->getValue();
+    Mask <<= Shift;
+    --i;
+  } while (i != 0);
+
+  return Mask;
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@@ -2141,6 +2203,28 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
     SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
     return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt);
   }
+  case ISD::VECTOR_SHUFFLE: {
+    SDOperand V1 = Op.getOperand(0);
+    SDOperand V2 = Op.getOperand(1);
+    SDOperand PermMask = Op.getOperand(2);
+    MVT::ValueType VT = Op.getValueType();
+
+    if (V2.getOpcode() == ISD::UNDEF) {
+      // Handle splat cases.
+      if (X86::isSplatMask(PermMask.Val)) {
+        if (VT == MVT::v2f64 || VT == MVT::v2i64)
+          // Use unpcklpd
+          return DAG.getNode(X86ISD::UNPCKLP, VT, V1, V1);
+        // Leave the VECTOR_SHUFFLE alone. It matches SHUFP*.
+        break;
+      } else if (VT == MVT::v4f32 && X86::isPSHUFDMask(PermMask.Val))
+        // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
+        break;
+    }
+
+    // TODO.
+    assert(0);
+  }
   }
 }
 
@@ -2175,6 +2259,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
   case X86ISD::SCALAR_TO_VECTOR:   return "X86ISD::SCALAR_TO_VECTOR";
+  case X86ISD::UNPCKLP:            return "X86ISD::UNPCKLP";
   }
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index bdbe46d2b0..ecfc8a237d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -149,6 +149,9 @@ namespace llvm {
       /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base
       /// type does not have to match the operand type.
       SCALAR_TO_VECTOR,
+
+      /// UNPCKLP - X86 unpack and interleave low instructions.
+      UNPCKLP,
     };
 
     // X86 specific condition code. These correspond to X86_*_COND in
@@ -174,6 +177,21 @@ namespace llvm {
     };
   }
 
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+   /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+   /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+   bool isPSHUFDMask(SDNode *N);
+
+   /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
+   /// specifies a splat of a single element.
+   bool isSplatMask(SDNode *N);
+
+   /// getShuffleImmediate - Return the appropriate immediate to shuffle
+   /// the specified isShuffleMask VECTOR_SHUFFLE mask.
+   unsigned getShuffleImmediate(SDNode *N);
+ }
+
   //===----------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
   class X86TargetLowering : public TargetLowering {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 866203846b..29519e9e5c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -17,14 +17,19 @@
 // SSE specific DAG Nodes.
 //===----------------------------------------------------------------------===//
 
-def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, 
-                      [SDNPHasChain]>;
-def X86fand  : SDNode<"X86ISD::FAND",     SDTFPBinOp,
-                      [SDNPCommutative, SDNPAssociative]>;
-def X86fxor  : SDNode<"X86ISD::FXOR",     SDTFPBinOp,
-                      [SDNPCommutative, SDNPAssociative]>;
-def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR",
-                      SDTypeProfile<1, 1, []>, []>;
+def SDTX86Unpcklp : SDTypeProfile<1, 2,
+                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
+
+def X86loadp   : SDNode<"X86ISD::LOAD_PACK", SDTLoad, 
+                        [SDNPHasChain]>;
+def X86fand    : SDNode<"X86ISD::FAND",     SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86fxor    : SDNode<"X86ISD::FXOR",     SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86s2vec   : SDNode<"X86ISD::SCALAR_TO_VECTOR",
+                        SDTypeProfile<1, 1, []>, []>;
+def X86unpcklp : SDNode<"X86ISD::UNPCKLP",
+                        SDTX86Unpcklp, []>;
 
 //===----------------------------------------------------------------------===//
 // SSE pattern fragments
@@ -36,6 +41,20 @@ def X86loadpf64  : PatFrag<(ops node:$ptr), (f64   (X86loadp node:$ptr))>;
 def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
 def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
 
+// SHUFFLE_get_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUF* etc. imm.
+def SHUFFLE_get_imm : SDNodeXForm<build_vector, [{
+  return getI8Imm(X86::getShuffleImmediate(N));
+}]>;
+
+def SHUFFLE_splat_mask : PatLeaf<(build_vector), [{
+  return X86::isSplatMask(N);
+}], SHUFFLE_get_imm>;
+
+def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
+  return X86::isPSHUFDMask(N);
+}], SHUFFLE_get_imm>;
+
 //===----------------------------------------------------------------------===//
 // SSE scalar FP Instructions
 //===----------------------------------------------------------------------===//
@@ -652,6 +671,21 @@ def CMPPDrm : PDI<0xC2, MRMSrcMem,
 }
 
 // Shuffle and unpack instructions
+def PSHUFWrr : PSI<0x70, AddRegFrm,
+                   (ops VR64:$dst, VR64:$src1, i8imm:$src2),
+                   "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+def PSHUFWrm : PSI<0x70, MRMSrcMem,
+                   (ops VR64:$dst, i64mem:$src1, i8imm:$src2),
+                   "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+def PSHUFDrr : PDI<0x70, AddRegFrm,
+                   (ops VR128:$dst, VR128:$src1, i8imm:$src2),
+                   "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                 [(set VR128:$dst, (vector_shuffle (v4f32 VR128:$src1), (undef),
+                                    PSHUFD_shuffle_mask:$src2))]>;
+def PSHUFDrm : PDI<0x70, MRMSrcMem,
+                   (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
+                   "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+
 def SHUFPSrr : PSI<0xC6, MRMSrcReg, 
                    (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
                    "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
@@ -755,3 +789,15 @@ def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
 def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
   Requires<[HasSSE2]>;
 
+
+// Splat v4f32 / v4i32
+def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFFLE_splat_mask:$sm),
+          (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFFLE_splat_mask:$sm))>;
+def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), SHUFFLE_splat_mask:$sm),
+          (v4i32 (SHUFPSrr VR128:$src, VR128:$src, SHUFFLE_splat_mask:$sm))>;
+
+// Splat v2f64 / v2i64
+def : Pat<(X86unpcklp (v2f64 VR128:$src1), VR128:$src2),
+          (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>;
+def : Pat<(X86unpcklp (v2i64 VR128:$src1), VR128:$src2),
+          (v2i64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>;
author	Evan Cheng <evan.cheng@apple.com>	2006-03-22 02:53:00 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2006-03-22 02:53:00 +0000
commit	b9df0ca67b9a59c75685a72ee50b1b471aa9d1bf (patch)
tree	b80b3ba6f28f8a10984cd2b1a09529d0d0a5d2db
parent	a9f2a717e9112a808d3d0907b19d52deb7bdf0ae (diff)