diff options
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 16 | ||||
-rw-r--r-- | test/Transforms/InstCombine/mul.ll | 17 |
2 files changed, 27 insertions, 6 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index eebb7cf82f..bdea6e40d2 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -2480,10 +2480,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) - // We need a better interface for long double here. - if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa<VectorType>(Op1->getType())) { + if (isa<ConstantAggregateZero>(Op1)) + return ReplaceInstUsesWith(I, Op1); + + // As above, vector X*splat(1.0) -> X in all defined cases. + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) + if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue())) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); } if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) @@ -11636,3 +11643,4 @@ FunctionPass *llvm::createInstructionCombiningPass() { return new InstCombiner(); } + diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll index 8e9ab8d421..1a74025434 100644 --- a/test/Transforms/InstCombine/mul.ll +++ b/test/Transforms/InstCombine/mul.ll @@ -1,7 +1,5 @@ ; This test makes sure that mul instructions are properly eliminated. -; ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul -; END. define i32 @test1(i32 %A) { %B = mul i32 %A, 1 ; <i32> [#uses=1] @@ -78,3 +76,18 @@ define i32 @test12(i8 %a, i32 %b) { ret i32 %e } +; PR2642 +define internal void @test13(<4 x float>*) { + load <4 x float>* %0, align 1 + mul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 > + store <4 x float> %3, <4 x float>* %0, align 1 + ret void +} + +define internal void @test14(<4 x float>*) { + load <4 x float>* %0, align 1 + mul <4 x float> %2, zeroinitializer + store <4 x float> %3, <4 x float>* %0, align 1 + ret void +} + |