2 files changed, 60 insertions, 13 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ef5bbc4798..82c359194f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1337,31 +1337,53 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
 
 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) {
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
   Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType();
 
   // If this is a bitcast from int to float, check to see if the int is an
   // extraction from a vector.
   Value *VecInput = 0;
+  // bitcast(trunc(bitcast(somevector)))
   if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
       isa<VectorType>(VecInput->getType())) {
     const VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    const Type *DestTy = CI.getType();
-    
-    // If the element type of the vector doesn't match the result type, but the
-    // vector type's size is a multiple of the result type, bitcast it to be a
-    // vector type we can extract from.
-    if (VecTy->getElementType() != DestTy &&
-        VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) {
-      VecTy = VectorType::get(DestTy,
-            VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits());
-      VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-    }
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
     
-    if (VecTy->getElementType() == DestTy)
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+    }
   }
   
+  // bitcast(trunc(lshr(bitcast(somevector), cst))
+  ConstantInt *ShAmt = 0;
+  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+                                m_ConstantInt(ShAmt)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+        ShAmt->getZExtValue() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+      
+      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+    }
+  }
   return 0;
 }
 
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index c248b5e4bc..10898397b9 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -35,3 +35,28 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 ; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
 ; CHECK-NEXT:  ret float %add
 }
+
+; Optimize bitcasts that are extracting other elements of a vector.  This
+; happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+  
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 2
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}