aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp16
-rw-r--r--test/Transforms/InstCombine/mul.ll17
2 files changed, 27 insertions, 6 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index eebb7cf82f..bdea6e40d2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2480,10 +2480,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
- // We need a better interface for long double here.
- if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
- if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ } else if (isa<VectorType>(Op1->getType())) {
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1))
+ if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
@@ -11636,3 +11643,4 @@ FunctionPass *llvm::createInstructionCombiningPass() {
return new InstCombiner();
}
+
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 8e9ab8d421..1a74025434 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,7 +1,5 @@
; This test makes sure that mul instructions are properly eliminated.
-;
; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
-; END.
define i32 @test1(i32 %A) {
%B = mul i32 %A, 1 ; <i32> [#uses=1]
@@ -78,3 +76,18 @@ define i32 @test12(i8 %a, i32 %b) {
ret i32 %e
}
+; PR2642
+define internal void @test13(<4 x float>*) {
+ load <4 x float>* %0, align 1
+ mul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
+ store <4 x float> %3, <4 x float>* %0, align 1
+ ret void
+}
+
+define internal void @test14(<4 x float>*) {
+ load <4 x float>* %0, align 1
+ mul <4 x float> %2, zeroinitializer
+ store <4 x float> %3, <4 x float>* %0, align 1
+ ret void
+}
+