Fix a stunning oversight in the inline cost analysis. It was never

propagating one of the values it simplified to a constant across a myriad of instructions. Notably, ptrtoint instructions when we had a constant pointer (say, 0) didn't propagate that, blocking a massive number of down-stream optimizations. This was uncovered when investigating why we fail to inline and delete the boilerplate in: void f() { std::vector<int> v; v.push_back(1); } It turns out most of the efforts I've made thus far to improve the analysis weren't making it far purely because of this. After this is fixed, the store-to-load forwarding patch enables LLVM to optimize the above to an empty function. We still can't nuke a second push_back, but for different reasons. There is a very real chance this will cause somewhat noticable changes in inlining behavior, so please let me know if you see regressions (or improvements!) because of this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171196 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chandler Carruth <chandlerc@gmail.com> 2012-12-28 14:43:42 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2012-12-28 14:43:42 +0000
commit: 73527d30cddd9b542a01a33c333bc707504fd05f (patch)
tree: 6515b9226b2ee54fd18d1b872d46abc429b899ee
parent: ba94204e94ba88f7c897a5a59d1c770b7dc3d04e (diff)
2 files changed, 54 insertions, 4 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 685050765a..835b8343b2 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -358,7 +358,10 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
 
 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
   // Propagate constants through bitcasts.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
       SimplifiedValues[&I] = C;
       return true;
@@ -383,7 +386,10 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 
 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
       SimplifiedValues[&I] = C;
       return true;
@@ -416,7 +422,10 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 
 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
       SimplifiedValues[&I] = C;
       return true;
@@ -443,7 +452,10 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 
 bool CallAnalyzer::visitCastInst(CastInst &I) {
   // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
       SimplifiedValues[&I] = C;
       return true;
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index b2a14fe0b7..77bc3784ac 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -149,6 +149,44 @@ bb.false:
   ret i8 %z8
 }
 
+define i64 @caller5(i64 %y) {
+; Check that we can round trip constants through various kinds of casts etc w/o
+; losing track of the constant prop in the inline cost analysis.
+;
+; CHECK: @caller5
+; CHECK-NOT: call
+; CHECK: ret i64 -1
+
+entry:
+  %x = call i64 @callee5(i64 42, i64 %y)
+  ret i64 %x
+}
+
+define i64 @callee5(i64 %x, i64 %y) {
+  %inttoptr = inttoptr i64 %x to i8*
+  %bitcast = bitcast i8* %inttoptr to i32*
+  %ptrtoint = ptrtoint i32* %bitcast to i64
+  %trunc = trunc i64 %ptrtoint to i32
+  %zext = zext i32 %trunc to i64
+  %cmp = icmp eq i64 %zext, 42
+  br i1 %cmp, label %bb.true, label %bb.false
+
+bb.true:
+  ret i64 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i64 %y, 1
+  %y2 = add i64 %y1, 1
+  %y3 = add i64 %y2, 1
+  %y4 = add i64 %y3, 1
+  %y5 = add i64 %y4, 1
+  %y6 = add i64 %y5, 1
+  %y7 = add i64 %y6, 1
+  %y8 = add i64 %y7, 1
+  ret i64 %y8
+}
+
 
 define i32 @PR13412.main() {
 ; This is a somewhat complicated three layer subprogram that was reported to
author	Chandler Carruth <chandlerc@gmail.com>	2012-12-28 14:43:42 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2012-12-28 14:43:42 +0000
commit	73527d30cddd9b542a01a33c333bc707504fd05f (patch)
tree	6515b9226b2ee54fd18d1b872d46abc429b899ee
parent	ba94204e94ba88f7c897a5a59d1c770b7dc3d04e (diff)