aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-08-08 21:31:08 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-08-08 21:31:08 +0000
commit328a9d4a0fda149302a8c60bb80cbabe86e922d6 (patch)
treeb93f1b4ae957231c0304093dc94e717a024f04ac
parent09176e10dbe575b0f4c68803695c47ccb4b81f81 (diff)
Add support for several vector shifts operations while in AVX mode. Fix PR10581
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137067 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp62
-rw-r--r--test/CodeGen/X86/avx-shift.ll64
2 files changed, 116 insertions, 10 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 792d68c2c8..b2329470ab 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -883,7 +883,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
}
- if (Subtarget->hasSSE41()) {
+ if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
@@ -922,10 +922,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE2()) {
+ if (Subtarget->hasSSE2() || Subtarget->hasAVX()) {
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
@@ -935,7 +936,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
}
- if (Subtarget->hasSSE42())
+ if (Subtarget->hasSSE42() || Subtarget->hasAVX())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
if (!UseSoftFloat && Subtarget->hasAVX()) {
@@ -975,6 +976,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SRL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i16, Custom);
+ setOperationAction(ISD::SHL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i16, Custom);
+
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -9195,11 +9209,42 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
-
LLVMContext *Context = DAG.getContext();
- // Must have SSE2.
- if (!Subtarget->hasSSE2()) return SDValue();
+ if (!(Subtarget->hasSSE2() || Subtarget->hasAVX()))
+ return SDValue();
+
+ // Decompose 256-bit shifts into smaller 128-bit shifts.
+ if (VT.getSizeInBits() == 256) {
+ int NumElems = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ // Extract the two vectors
+ SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+ SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+ DAG, dl);
+
+ // Recreate the shift amount vectors
+ SmallVector<SDValue, 4> Amt1Csts;
+ SmallVector<SDValue, 4> Amt2Csts;
+ for (int i = 0; i < NumElems/2; ++i)
+ Amt1Csts.push_back(Amt->getOperand(i));
+ for (int i = NumElems/2; i < NumElems; ++i)
+ Amt2Csts.push_back(Amt->getOperand(i));
+
+ SDValue Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt1Csts[0], NumElems/2);
+ SDValue Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+ &Amt2Csts[0], NumElems/2);
+
+ // Issue new vector shifts for the smaller types
+ V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+ V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+ // Concatenate the result back
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+ }
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
@@ -9250,9 +9295,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Lower SHL with variable shift amount.
- // Cannot lower SHL without SSE2 or later.
- if (!Subtarget->hasSSE2()) return SDValue();
-
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
@@ -12099,7 +12141,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
// so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
+ if (!(Subtarget->hasSSE2() || Subtarget->hasAVX()))
return SDValue();
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
new file mode 100644
index 0000000000..791194fc1c
--- /dev/null
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;;; Shift left
+; CHECK: vpslld
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+ %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+ ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+ %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+ %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+ ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+ %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+ ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+ %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+ %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+ ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+ %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+ ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+ %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ ret <16 x i16> %s
+}
+