aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2006-03-25 09:37:23 +0000
committerEvan Cheng <evan.cheng@apple.com>2006-03-25 09:37:23 +0000
commitc60bd97b94261366800c2eb57e95ddd44092e6f8 (patch)
tree153420c667a09aa339a932af2d7e9091339f12c0
parent984f38bf4fbf7c8ccf6d207966393e79201f25ef (diff)
Build arbitrary vector with more than 2 distinct scalar elements with a
series of unpack and interleave ops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27119 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86InstrSSE.td76
3 files changed, 104 insertions, 5 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 01951e636a..823d0709d3 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2376,7 +2376,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
abort();
}
case ISD::BUILD_VECTOR: {
+ std::set<SDOperand> Values;
SDOperand Elt0 = Op.getOperand(0);
+ Values.insert(Elt0);
bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt0) &&
@@ -2384,15 +2386,16 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
bool RestAreZero = true;
unsigned NumElems = Op.getNumOperands();
for (unsigned i = 1; i < NumElems; ++i) {
- SDOperand V = Op.getOperand(i);
- if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) {
+ SDOperand Elt = Op.getOperand(i);
+ if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
if (!FPC->isExactlyValue(+0.0))
RestAreZero = false;
- } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) {
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
if (!C->isNullValue())
RestAreZero = false;
} else
RestAreZero = false;
+ Values.insert(Elt);
}
if (RestAreZero) {
@@ -2402,6 +2405,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
}
+ if (Values.size() > 2) {
+ // Expand into a number of unpckl*.
+ // e.g. for v4f32
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ MVT::ValueType VT = Op.getValueType();
+ std::vector<SDOperand> V(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
+ NumElems >>= 1;
+ while (NumElems != 0) {
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]);
+ NumElems >>= 1;
+ }
+ return V[0];
+ }
+
return SDOperand();
}
}
@@ -2439,6 +2461,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC";
+ case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
}
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1dc90e536e..71d7751e48 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -153,6 +153,10 @@ namespace llvm {
/// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base
/// does not have to match the operand type.
ZEXT_S2VEC,
+
+ /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS,
+ /// X86::PUNPCKL*.
+ UNPCKL,
};
// X86 specific condition code. These correspond to X86_*_COND in
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index bdd43fbc39..a1946aae27 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -28,6 +28,11 @@ def X86s2vec : SDNode<"X86ISD::S2VEC",
def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
SDTypeProfile<1, 1, []>, []>;
+def SDTUnpckl : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl,
+ []>;
+
//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
@@ -787,10 +792,14 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
"unpckhpd {$src2, $dst|$dst, $src2}", []>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
- "unpcklps {$src2, $dst|$dst, $src2}", []>;
+ "unpcklps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
+ VR128:$src2)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "unpcklps {$src2, $dst|$dst, $src2}", []>;
+ "unpcklps {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
+ (load addr:$src2))))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"unpcklpd {$src2, $dst|$dst, $src2}", []>;
@@ -885,6 +894,69 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"psubd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (v4i32 (sub VR128:$src1,
(load addr:$src2))))]>;
+
+// Unpack and interleave
+def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
+ VR128:$src2)))]>;
+def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklbw {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
+ (load addr:$src2))))]>;
+def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
+ VR128:$src2)))]>;
+def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklwd {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
+ (load addr:$src2))))]>;
+def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
+ VR128:$src2)))]>;
+def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckldq {$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
+ (load addr:$src2))))]>;
+def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
+
+def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+ "punpckhqdq {$src2, $dst|$dst, $src2}", []>;
}
//===----------------------------------------------------------------------===//