aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp48
-rw-r--r--test/Transforms/InstCombine/bitcast.ll25
2 files changed, 60 insertions, 13 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ef5bbc4798..82c359194f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1337,31 +1337,53 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) {
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
// If this is a bitcast from int to float, check to see if the int is an
// extraction from a vector.
Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
isa<VectorType>(VecInput->getType())) {
const VectorType *VecTy = cast<VectorType>(VecInput->getType());
- const Type *DestTy = CI.getType();
-
- // If the element type of the vector doesn't match the result type, but the
- // vector type's size is a multiple of the result type, bitcast it to be a
- // vector type we can extract from.
- if (VecTy->getElementType() != DestTy &&
- VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) {
- VecTy = VectorType::get(DestTy,
- VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits());
- VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
- }
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
- if (VecTy->getElementType() == DestTy)
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
}
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
return 0;
}
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index c248b5e4bc..10898397b9 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -35,3 +35,28 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
; CHECK-NEXT: %add = fadd float %tmp24, %tmp4
; CHECK-NEXT: ret float %add
}
+
+; Optimize bitcasts that are extracting other elements of a vector. This
+; happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+ %tmp28 = bitcast <2 x float> %A to i64
+ %tmp29 = lshr i64 %tmp28, 32
+ %tmp23 = trunc i64 %tmp29 to i32
+ %tmp24 = bitcast i32 %tmp23 to float
+
+ %tmp = bitcast <2 x i64> %B to i128
+ %tmp1 = lshr i128 %tmp, 64
+ %tmp2 = trunc i128 %tmp1 to i32
+ %tmp4 = bitcast i32 %tmp2 to float
+
+ %add = fadd float %tmp24, %tmp4
+ ret float %add
+
+; CHECK: @test3
+; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2
+; CHECK-NEXT: %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT: ret float %add
+}