aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp40
-rw-r--r--lib/Target/X86/X86InstrFormats.td3
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--lib/Target/X86/X86InstrSSE.td22
4 files changed, 65 insertions, 1 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0b5ac6a251..2f74c0fdd4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3955,6 +3955,34 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
+/// PromoteVectorToScalarSplat - Since there's no native support for
+/// scalar_to_vector for 256-bit AVX, a 128-bit scalar_to_vector +
+/// INSERT_SUBVECTOR is generated. Recognize this idiom and do the
+/// shuffle before the insertion, this yields less instructions in the end.
+static SDValue PromoteVectorToScalarSplat(ShuffleVectorSDNode *SV,
+ SelectionDAG &DAG) {
+ EVT SrcVT = SV->getValueType(0);
+ SDValue V1 = SV->getOperand(0);
+ DebugLoc dl = SV->getDebugLoc();
+ int NumElems = SrcVT.getVectorNumElements();
+
+ assert(SrcVT.is256BitVector() && "unknown howto handle vector type");
+
+ SmallVector<int, 4> Mask;
+ for (int i = 0; i < NumElems/2; ++i)
+ Mask.push_back(SV->getMaskElt(i));
+
+ EVT SVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
+ NumElems/2);
+ SDValue SV1 = DAG.getVectorShuffle(SVT, dl, V1.getOperand(1),
+ DAG.getUNDEF(SVT), &Mask[0]);
+ SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), SV1,
+ DAG.getConstant(0, MVT::i32), DAG, dl);
+
+ return Insert128BitVector(InsV, SV1,
+ DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+}
+
/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32 and
/// v8i32, v16i16 or v32i8 to v8f32.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
@@ -5742,7 +5770,17 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
- // Handle splats by matching through known masks
+ // Since there's no native support for scalar_to_vector for 256-bit AVX, a
+ // 128-bit scalar_to_vector + INSERT_SUBVECTOR is generated. Recognize this
+ // idiom and do the shuffle before the insertion, this yields less
+ // instructions in the end.
+ if (VT.is256BitVector() &&
+ V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V1.getOperand(0).getOpcode() == ISD::UNDEF &&
+ V1.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return PromoteVectorToScalarSplat(SVOp, DAG);
+
+ // Handle splats by matching through known shuffle masks
if ((VT.is128BitVector() && NumElem <= 4) ||
(VT.is256BitVector() && NumElem <= 8))
return SDValue();
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 6d89bcc29e..34d26761b7 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -501,6 +501,9 @@ class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: PDI<o, F, outs, ins, asm, pattern>, REX_W;
+class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
// MMX Instruction templates
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index f792f53b7b..4dd2d41446 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -467,3 +467,4 @@ def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index), [{
return X86::isVINSERTF128Index(N);
}], INSERT_get_vinsertf128_imm>;
+
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 24c734ab96..82e0d2a2ff 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2858,6 +2858,14 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
+def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -5358,6 +5366,20 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+// Special COPY patterns
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//