diff options
author | Bob Wilson <bob.wilson@apple.com> | 2011-01-07 04:59:04 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2011-01-07 04:59:04 +0000 |
commit | 5e8b833707e6d59576d91b23a2c24e596eace60e (patch) | |
tree | 93a69003f2b97be2b2b36c809e06d0fb3e933653 | |
parent | 67b067d2f91ede565a3cde857442a44314d72537 (diff) |
Add ARM patterns to match EXTRACT_SUBVECTOR nodes.
Also fix an off-by-one in SelectionDAGBuilder that was preventing shuffle
vectors from being translated to EXTRACT_SUBVECTOR.
Patch by Tim Northover.
The test changes are needed to keep those spill-q tests from testing aligned
spills and restores. If the only aligned stack objects are spill slots, we
no longer realign the stack frame. Prior to this patch, an EXTRACT_SUBVECTOR
was legalized by loading from the stack, which created an aligned frame index.
Now, however, there is nothing except the spill slot in the stack frame, so
I added an aligned alloca.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122995 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Target/TargetSelectionDAG.td | 3 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 17 | ||||
-rw-r--r-- | test/CodeGen/ARM/spill-q.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-spill-q.ll | 5 |
6 files changed, 30 insertions, 4 deletions
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 92ceee5bfb..37ebc12e5b 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -421,6 +421,9 @@ def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; +def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", + SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, + []>; def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 88490b3120..2167523f23 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2745,7 +2745,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } else { StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts < SrcNumElts) + StartIdx[Input] + MaskNumElts <= SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f50eac523f..b4432d5236 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -94,7 +94,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e307105a5c..20437963f6 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4530,6 +4530,23 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; // Other Vector Shuffles. +// Aligned extractions: really just dropping registers + +class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> + : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), + (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; + +def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; + +def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; + +def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; + +def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; + +def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; + + // VEXT : Vector Extract class VEXTd<string OpcodeStr, string Dt, ValueType Ty> diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index dc83e25905..bf4e55cb06 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -15,7 +15,10 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}sp, :128 ; CHECK: vld1.64 {{.*}}sp, :128 entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] + %aligned_vec = alloca <4 x float>, align 16 + %"alloca point" = bitcast i32 0 to i32 + %vecptr = bitcast <4 x float>* %aligned_vec to i8* + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 0d73fba93e..d9a0617f5a 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -15,7 +15,10 @@ define void @aaa(%quuz* %this, i8* %block) { ; CHECK: vst1.64 {{.*}}[{{.*}}, :128] ; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: - %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] + %aligned_vec = alloca <4 x float>, align 16 + %"alloca point" = bitcast i32 0 to i32 + %vecptr = bitcast <4 x float>* %aligned_vec to i8* + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind store float 6.300000e+01, float* undef, align 4 %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 |