2 files changed, 27 insertions, 6 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index eebb7cf82f..bdea6e40d2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2480,10 +2480,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
       // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
       // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      // We need a better interface for long double here.
-      if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
-        if (Op1F->isExactlyValue(1.0))
-          return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+    } else if (isa<VectorType>(Op1->getType())) {
+      if (isa<ConstantAggregateZero>(Op1))
+        return ReplaceInstUsesWith(I, Op1);
+      
+      // As above, vector X*splat(1.0) -> X in all defined cases.
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1))
+        if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+          if (F->isExactlyValue(1.0))
+            return ReplaceInstUsesWith(I, Op0);
     }
     
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
@@ -11636,3 +11643,4 @@ FunctionPass *llvm::createInstructionCombiningPass() {
   return new InstCombiner();
 }
 
+
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 8e9ab8d421..1a74025434 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,7 +1,5 @@
 ; This test makes sure that mul instructions are properly eliminated.
-;
 ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep mul
-; END.
 
 define i32 @test1(i32 %A) {
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
@@ -78,3 +76,18 @@ define i32 @test12(i8 %a, i32 %b) {
         ret i32 %e
 }
 
+; PR2642
+define internal void @test13(<4 x float>*) {
+	load <4 x float>* %0, align 1
+	mul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
+	store <4 x float> %3, <4 x float>* %0, align 1
+	ret void
+}
+
+define internal void @test14(<4 x float>*) {
+	load <4 x float>* %0, align 1
+	mul <4 x float> %2, zeroinitializer
+	store <4 x float> %3, <4 x float>* %0, align 1
+	ret void
+}
+