aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2010-10-19 20:00:17 +0000
committerDale Johannesen <dalej@apple.com>2010-10-19 20:00:17 +0000
commit575cd148cedec66af0adc266c5d7ecdbe1641d7e (patch)
treec4557a11d9becf6956f1e88521c1201d2b7e30e2
parent879b65b396e9dff5f0192f85932473123b0d799e (diff)
Enable using vdup for vector constants which are splat of
integers by default, and remove the controlling flag, now that LICM will hoist such vdup's. 8003375. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116852 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp49
-rw-r--r--test/CodeGen/Thumb2/machine-licm-vdup.ll37
-rw-r--r--test/CodeGen/Thumb2/machine-licm.ll8
3 files changed, 21 insertions, 73 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d9a7ec3245..3626e84462 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -59,13 +59,6 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
cl::init(false));
-// This option should go away when Machine LICM is smart enough to hoist a
-// reg-to-reg VDUP.
-static cl::opt<bool>
-EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
- cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
- cl::init(false));
-
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
@@ -3442,26 +3435,24 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats. For f32 constant splats, reduce to
- // i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- if (VT.getVectorElementType().isFloatingPoint()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(i)));
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
- NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerBUILD_VECTOR(Val, DAG, ST));
- }
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
- if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
}
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
// If all elements are constants and the case above didn't get hit, fall back
@@ -3470,12 +3461,6 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (isConstant)
return SDValue();
- if (!EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats.
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- }
-
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
deleted file mode 100644
index 2f17e27ab4..0000000000
--- a/test/CodeGen/Thumb2/machine-licm-vdup.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
-; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
-; Eventually this should become the default and be moved into machine-licm.ll.
-
-define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
-entry:
-; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-; CHECK: vdup.32 q{{.*}}, r3
- br i1 undef, label %bb1, label %bb2
-
-bb1:
-; CHECK-NEXT: %bb1
- %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
- %tmp1 = shl i32 %indvar, 2
- %gep1 = getelementptr i8* %ptr1, i32 %tmp1
- %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
- %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
- %gep2 = getelementptr i8* %ptr2, i32 %tmp1
- call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
- %indvar.next = add i32 %indvar, 1
- %cond = icmp eq i32 %indvar.next, 10
- br i1 %cond, label %bb2, label %bb1
-
-bb2:
- ret void
-}
-
-; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 3fac512add..14d04a4d8f 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -55,8 +55,8 @@ return: ; preds = %bb, %entry
define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
entry:
; CHECK: t2:
-; CHECK: adr r{{.}}, #LCPI1_0
-; CHECK: vldmia r3, {d16, d17}
+; CHECK: mov.w r3, #1065353216
+; CHECK: vdup.32 q{{.*}}, r3
br i1 undef, label %bb1, label %bb2
bb1:
@@ -76,8 +76,8 @@ bb2:
ret void
}
-; CHECK: LCPI1_0:
-; CHECK: .section
+; CHECK-NOT: LCPI1_0:
+; CHECK: .subsections_via_symbols
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly