aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2013-03-06 05:44:53 +0000
committerJim Grosbach <grosbach@apple.com>2013-03-06 05:44:53 +0000
commit186d8a3d67ccd2b2401c5d7d4e2cc15c4d1fdeae (patch)
tree6576d682ded82491fbd57f9bfee66256ee1a8b64
parent508d11b19cfd0f2231037d28f4ce6ab428929d3b (diff)
InstCombine: Don't shrink allocas when combining with a bitcast.
When considering folding a bitcast of an alloca into the alloca itself, make sure we don't shrink the amount of memory being allocated, or things rapidly go sideways. rdar://13324424 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176547 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--test/Transforms/Inline/devirtualize.ll2
-rw-r--r--test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll45
3 files changed, 52 insertions, 1 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a960ab2499..d162223a6f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index 18bbf7a6f6..d46154ef6a 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce < %s | FileCheck %s
+; RUN: opt -S -Os < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
new file mode 100644
index 0000000000..b20c3a07c0
--- /dev/null
+++ b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._my_struct = type <{ [12 x i8], [4 x i8] }>
+
+@initval = common global %struct._my_struct zeroinitializer, align 1
+
+; InstCombine will try to change the %struct._my_struct alloca into an
+; allocation of an i96 because of the bitcast to create %2. That's not valid,
+; as the other 32 bits of the structure still feed into the return value
+define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK: @function
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
+; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
+entry:
+ %retval = alloca %struct._my_struct, align 8
+ %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+ %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+ %0 = zext i32 %x to i96
+ %bf.value = shl nuw nsw i96 %0, 6
+ %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
+ %1 = zext i32 %y to i96
+ %bf.value2 = shl nuw nsw i96 %1, 32
+ %bf.shl3 = and i96 %bf.value2, 288230371856744448
+ %bf.value.masked = and i96 %bf.value, 4294967232
+ %2 = zext i32 %z to i96
+ %bf.value8 = and i96 %2, 63
+ %bf.clear4 = or i96 %bf.shl3, %bf.value.masked
+ %bf.set5 = or i96 %bf.clear4, %bf.value8
+ %bf.set10 = or i96 %bf.set5, %bf.clear
+ %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96*
+ store i96 %bf.set10, i96* %retval.0.cast7, align 8
+ %retval.12.idx8 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 1
+ %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32*
+ store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4
+ %trunc = trunc i96 %bf.set10 to i64
+ %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
+ %retval.8.idx12 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 0, i64 8
+ %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
+ %retval.8.load14 = load i64* %retval.8.cast13, align 8
+ %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
+ ret { i64, i64 } %.fca.1.insert
+}