diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-01-13 08:12:35 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-01-13 08:12:35 +0000 |
commit | 12216172c04fe76a90e9de34fc4161e92d097278 (patch) | |
tree | 6015625f4477bedeb225a8509b1726a279102b80 /lib/Target | |
parent | db080e814fcc2b67e2790351539053c8b9d13639 (diff) |
Make X86 instruction selection use 256-bit VPXOR for build_vector of all ones if AVX2 is enabled. This gives the ExeDepsFix pass a chance to choose FP vs int as appropriate. Also use v8i32 as the type for getZeroVector if AVX2 is enabled. This is consistent with SSE2 using prefering v4i32.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148108 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 81 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 1 |
4 files changed, 62 insertions, 37 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6f22fafee..cdeee0ba87 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4234,8 +4234,8 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, - DebugLoc dl) { +static SDValue getZeroVector(EVT VT, bool HasSSE2, bool HasAVX2, + SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Always build SSE zero vectors as <4 x i32> bitcasted @@ -4250,12 +4250,17 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } } else if (VT.getSizeInBits() == 256) { // AVX - // 256-bit logic and arithmetic instructions in AVX are - // all floating-point, no support for integer ops. Default - // to emitting fp zeroed vectors then. - SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + if (HasAVX2) { // AVX2 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else { + // 256-bit logic and arithmetic instructions in AVX are all + // floating-point, no support for integer ops. Emit fp zeroed vectors. + SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + } } return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -4445,11 +4450,13 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { /// element of V2 is swizzled into the zero/undef vector, landing at element /// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, - bool isZero, bool HasSSE2, + bool IsZero, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT VT = V2.getValueType(); - SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + SDValue V1 = IsZero + ? getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), DAG, + V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) @@ -4729,7 +4736,8 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { if (NumZero) - V = getZeroVector(MVT::v8i16, true, DAG, dl); + V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false, + DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; @@ -4777,7 +4785,8 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, if (isNonZero) { if (First) { if (NumZero) - V = getZeroVector(MVT::v8i16, true, DAG, dl); + V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false, + DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; @@ -5065,7 +5074,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Op.getValueType() == MVT::v8i32) return Op; - return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); + return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), + Subtarget->hasAVX2(), DAG, dl); } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width @@ -5132,8 +5142,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // convert it to a vector with movd (S2V+shuffle to zero extend). Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. @@ -5161,28 +5170,28 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { if (VT.getSizeInBits() == 256) { - SDValue ZeroVec = getZeroVector(VT, true, DAG, dl); + SDValue ZeroVec = getZeroVector(VT, Subtarget->hasSSE2(), + Subtarget->hasAVX2(), DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); if (VT.getSizeInBits() == 256) { - SDValue ZeroVec = getZeroVector(MVT::v8i32, true, DAG, dl); + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget->hasSSE2(), + Subtarget->hasAVX2(), DAG, dl); Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32), DAG, dl); } else { assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); } @@ -5211,8 +5220,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a shuffle of zero and zero-extended scalar to vector. - Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG); SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(i == Idx ? 0 : 1); @@ -5268,8 +5276,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); - return getShuffleVectorZeroOrUndef(V2, Idx, true, - Subtarget->hasSSE2(), DAG); + return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); } @@ -5294,7 +5301,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1 << i)); if (isZero) - V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + V[i] = getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), + DAG, dl); else V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } @@ -6402,7 +6410,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) - return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + return getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), + DAG, dl); // Handle splat operations if (SVOp->isSplat()) { @@ -7663,8 +7672,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, Op.getOperand(0)); // Zero out the upper parts of the register. - Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasSSE2(), - DAG); + Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), @@ -10104,7 +10112,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { // R s>> 7 === R s< 0 - SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl); + SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true, + /* HasAVX2 */false, DAG, dl); return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); } @@ -10146,7 +10155,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { // R s>> 7 === R s< 0 - SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl); + SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, + true /* HasAVX2 */, DAG, dl); return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); } @@ -12685,7 +12695,8 @@ static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + bool HasAVX2) { DebugLoc dl = N->getDebugLoc(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); SDValue V1 = SVOp->getOperand(0); @@ -12737,7 +12748,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, // Emit a zeroed vector and insert the desired subvector on its // first half. - SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl); + SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, HasAVX2, DAG, dl); SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), DAG.getConstant(0, MVT::i32), DAG, dl); return DCI.CombineTo(N, InsV); @@ -12782,7 +12793,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, // Combine 256-bit vector shuffles. This is only profitable when in AVX mode if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE) - return PerformShuffleCombine256(N, DAG, DCI); + return PerformShuffleCombine256(N, DAG, DCI, Subtarget->hasAVX2()); // Only handle 128 wide vector from here on. if (VT.getSizeInBits() != 128) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f5ca7c2b4a..cd74a25593 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2908,6 +2908,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: case X86::AVX2_SETALLONES: + case X86::AVX2_SET0: Alignment = 32; break; case X86::V_SET0: @@ -2952,6 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PDY: case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: + case X86::AVX2_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -2985,7 +2987,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); - else if (Opc == X86::AVX2_SETALLONES) + else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d3cd6a82fd..27f51282a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -279,13 +279,24 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // JIT implementatioan, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, Predicates = [HasAVX] in { + isCodeGenOnly = 1 in { +let Predicates = [HasAVX] in { def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; } +let Predicates = [HasAVX2], neverHasSideEffects = 1 in +def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "", + []>, VEX_4V; +} +let Predicates = [HasAVX2], AddedComplexity = 5 in { + def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; +} // AVX has no support for 256-bit integer instructions, but since the 128-bit // VPXOR instruction writes zero to its upper part, it's safe build zeros. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 9232196139..1f7d42240c 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -373,6 +373,7 @@ ReSimplify: case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; + case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 |