aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2007-04-14 22:29:23 +0000
committerChris Lattner <sabre@nondot.org>2007-04-14 22:29:23 +0000
commit69878336519f09aadf976cf1b5c73dafbb54ce6f (patch)
treeb77f325fd9a00d9d718773bba9404186bbbae8af
parent2caf0ec7ad9899a068d1e232e8ddac5da97dbf0e (diff)
Implement InstCombine/vec_demanded_elts.ll:test2. This allows us to turn
unsigned test(float f) { return _mm_cvtsi128_si32( (__m128i) _mm_set_ss( f*f )); } into: _test: movss 4(%esp), %xmm0 mulss %xmm0, %xmm0 movd %xmm0, %eax ret instead of: _test: movss 4(%esp), %xmm0 mulss %xmm0, %xmm0 xorps %xmm1, %xmm1 movss %xmm0, %xmm1 movd %xmm1, %eax ret GCC gets: _test: subl $28, %esp movss 32(%esp), %xmm0 mulss %xmm0, %xmm0 xorps %xmm1, %xmm1 movss %xmm0, %xmm1 movaps %xmm1, %xmm0 movd %xmm0, 12(%esp) movl 12(%esp), %eax addl $28, %esp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36020 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp66
1 files changed, 66 insertions, 0 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 17d2f93ca6..912ab2db60 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -1489,7 +1489,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
UndefElts |= 1ULL << IdxNo;
break;
}
+ case Instruction::BitCast: {
+ // Packed->packed casts only.
+ const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+ if (!VTy) break;
+ unsigned InVWidth = VTy->getNumElements();
+ uint64_t InputDemandedElts = 0;
+ unsigned Ratio;
+
+ if (VWidth == InVWidth) {
+ // If we are converting from <4x i32> -> <4 x f32>, we demand the same
+ // elements as are demanded of us.
+ Ratio = 1;
+ InputDemandedElts = DemandedElts;
+ } else if (VWidth > InVWidth) {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the result than there are in the source,
+ // then an input element is live if any of the corresponding output
+ // elements are live.
+ Ratio = VWidth/InVWidth;
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+ if (DemandedElts & (1ULL << OutIdx))
+ InputDemandedElts |= 1ULL << (OutIdx/Ratio);
+ }
+ } else {
+ // Untested so far.
+ break;
+
+ // If there are more elements in the source than there are in the result,
+ // then an input element is live if the corresponding output element is
+ // live.
+ Ratio = InVWidth/VWidth;
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if (DemandedElts & (1ULL << InIdx/Ratio))
+ InputDemandedElts |= 1ULL << InIdx;
+ }
+ // div/rem demand all inputs, because they don't want divide by zero.
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+ UndefElts2, Depth+1);
+ if (TmpV) {
+ I->setOperand(0, TmpV);
+ MadeChange = true;
+ }
+
+ UndefElts = UndefElts2;
+ if (VWidth > InVWidth) {
+ assert(0 && "Unimp");
+ // If there are more elements in the result than there are in the source,
+ // then an output element is undef if the corresponding input element is
+ // undef.
+ for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+ if (UndefElts2 & (1ULL << (OutIdx/Ratio)))
+ UndefElts |= 1ULL << OutIdx;
+ } else if (VWidth < InVWidth) {
+ assert(0 && "Unimp");
+ // If there are more elements in the source than there are in the result,
+ // then a result element is undef if all of the corresponding input
+ // elements are undef.
+ UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
+ for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+ if ((UndefElts2 & (1ULL << InIdx)) == 0) // Not undef?
+ UndefElts &= ~(1ULL << (InIdx/Ratio)); // Clear undef bit.
+ }
+ break;
+ }
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: