diff options
author | Michael Liao <michael.liao@intel.com> | 2012-08-31 20:12:31 +0000 |
---|---|---|
committer | Michael Liao <michael.liao@intel.com> | 2012-08-31 20:12:31 +0000 |
commit | 265bcb1e5b106a7c5db2bfcfb13cceffe0c413be (patch) | |
tree | 4b71ac71eae05fb78c0c2225f0dc2e843c84bd53 | |
parent | 3185f9a2ea80afec30064b7cd095f82c31dc154e (diff) |
Fix PR12359
- In addition to undefined, if V2 is zero vector, skip 2nd PSHUFB and POR as
well as PSHUFB will zero elements with negative indices.
Patch by Sriram Murali <sriram.murali@intel.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163018 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/pr12359.ll | 10 |
2 files changed, 15 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f9184f693d..f4329d3bf9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5881,8 +5881,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef<int> MaskVals = SVOp->getMask(); - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -5906,7 +5904,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (V2IsUndef) + + // As PSHUFB will zero elements with negative indices, it's safe to ignore + // the 2nd operand if it's undefined or zero. + if (V2.getOpcode() == ISD::UNDEF || + ISD::isBuildVectorAllZeros(V2.getNode())) return V1; // Calculate the shuffle mask for the second input, shuffle it, and diff --git a/test/CodeGen/X86/pr12359.ll b/test/CodeGen/X86/pr12359.ll new file mode 100644 index 0000000000..024b163fa7 --- /dev/null +++ b/test/CodeGen/X86/pr12359.ll @@ -0,0 +1,10 @@ +; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s +define <16 x i8> @shuf(<16 x i8> %inval1) { +entry: + %0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4, i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4> + ret <16 x i8> %0 +; CHECK: shuf +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret +} |