diff options
author | Stuart Hastings <stuart@apple.com> | 2011-05-17 22:13:31 +0000 |
---|---|---|
committer | Stuart Hastings <stuart@apple.com> | 2011-05-17 22:13:31 +0000 |
commit | ca1ef485854d668f794bf389154aa371aa2ed535 (patch) | |
tree | ac9f34221b8adf81cfcd29f34c91a0e4b0b772a2 | |
parent | c81c9709ef219809b0d04c55a80a8d18c7194f6a (diff) |
X86 pmovsx/pmovzx ignore the upper half of their inputs.
rdar://problem/6945110
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131493 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 22 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-05-17-pmovzxwd.ll | 15 |
3 files changed, 38 insertions, 1 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 726105f75d..83653fd6d5 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -588,6 +588,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + unsigned VWidth = + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(VWidth, 0); + InputDemandedElts = InputDemandedElts.getBitsSet(VWidth, 0, LowHalfElts); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; + } + break; + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 6e727ce6e3..4c9fed3abc 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -867,7 +867,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return 0; - // If multiple users are using the root value, procede with + // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements // are needed. if (!V->hasOneUse()) { diff --git a/test/CodeGen/X86/2011-05-17-pmovzxwd.ll b/test/CodeGen/X86/2011-05-17-pmovzxwd.ll new file mode 100644 index 0000000000..9ef67fcb4e --- /dev/null +++ b/test/CodeGen/X86/2011-05-17-pmovzxwd.ll @@ -0,0 +1,15 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s +; <rdar://problem/6945110> + +define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind { +entry: + %tmp = load <4 x i16>* %src + %tmp1 = load <8 x i16>* %foo +; CHECK: shufflevector + %tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> +; CHECK-NOT: shufflevector + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> + %0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone |