diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-08-17 05:54:34 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-08-17 05:54:34 +0000 |
commit | 7aaf5bf3db44c94bd630e07d63c3a4a1d92e44f4 (patch) | |
tree | d47e06c81bb4c764050efcf537e1225383c95bc4 | |
parent | e1bcb440dc0ca3c41fda1c0c581abfc4f38ca170 (diff) |
Allow more cases of undef shuffle indices and add tests for them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@111226 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 34 | ||||
-rw-r--r-- | test/CodeGen/ARM/vext.ll | 20 | ||||
-rw-r--r-- | test/CodeGen/ARM/vrev.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/ARM/vtrn.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/ARM/vuzp.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/ARM/vzip.ll | 27 |
6 files changed, 141 insertions, 12 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b05c5dd259..c2bd471faa 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3148,6 +3148,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first @@ -3163,6 +3168,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, ReverseVEXT = true; } + if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast<unsigned>(M[i])) return false; } @@ -3188,13 +3194,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) M[i] != - (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -3210,8 +3219,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + NumElts + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) return false; } return true; @@ -3229,9 +3238,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if (M[i] < 0) continue; - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) return false; } return true; @@ -3246,6 +3254,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != 2 * i + WhichResult) return false; } @@ -3271,7 +3280,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { - if ((unsigned) M[i + j * Half] != Idx) + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } @@ -3294,8 +3304,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx + NumElts) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) return false; Idx += 1; } @@ -3320,8 +3330,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) return false; Idx += 1; } diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index c11a67c6c4..e460a84f62 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <4 x i32> %tmp3 } +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd_undef: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq_undef: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6> + ret <16 x i8> %tmp3 +} + diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll index deed554d84..e1fe64b02d 100644 --- a/test/CodeGen/ARM/vrev.ll +++ b/test/CodeGen/ARM/vrev.ll @@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> ret <16 x i8> %tmp2 } + +; Undef shuffle indices should not prevent matching to VREV: + +define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8_undef: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %tmp2 +} + +define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16_undef: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef> + ret <8 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index 10bb10ac24..b1c2f93b47 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VTRN: + +define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8_undef: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16_undef: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 6cef188d76..9130f62891 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VUZP: + +define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8_undef: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16_undef: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index a9ecdcab42..926970aeb2 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp5 = fadd <4 x float> %tmp3, %tmp4 ret <4 x float> %tmp5 } + +; Undef shuffle indices should not prevent matching to VZIP: + +define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8_undef: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + |