Elide pointer to int casts on phi nodes.

Handles the eliding of pointer to integer casts operands of the phi node. Also caches unelided casts generated in the reader (removing duplicates within the same block). This reduces the size of thawed pnacl-llc.pexe by about 2%. BUG= https://code.google.com/p/nativeclient/issues/detailid=3544 R=mseaborn@chromium.org Review URL: https://codereview.chromium.org/22909016
author: Karl Schimpf <kschimpf@google.com> 2013-08-26 09:29:51 -0700
committer: Karl Schimpf <kschimpf@google.com> 2013-08-26 09:29:51 -0700
commit: 685d11b718cf8a017665f241f45fc144e8d622f2 (patch)
tree: 0312b7fa4ea846b899029c500f45052568d8ecab
parent: bbdf86f69eebaad59f7338f645916ed984a88861 (diff)
6 files changed, 846 insertions, 194 deletions
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
index a38b18afa1..fbe1fc0165 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -36,7 +36,6 @@ void NaClBitcodeReader::FreeState() {
   std::vector<Type*>().swap(TypeList);
   ValueList.clear();
 
-  std::vector<BasicBlock*>().swap(FunctionBBs);
   std::vector<Function*>().swap(FunctionsWithBodies);
   DeferredFunctionInfo.clear();
 }
@@ -1284,40 +1283,56 @@ bool NaClBitcodeReader::InstallInstruction(
   return false;
 }
 
-Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, BasicBlock *BB) {
+CastInst *
+NaClBitcodeReader::CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+                              Type *CT, Value *V, bool DeferInsertion) {
+  if (BBIndex >= FunctionBBs.size())
+    report_fatal_error("CreateCast on unknown basic block");
+  BasicBlockInfo &BBInfo = FunctionBBs[BBIndex];
+  NaClBitcodeReaderCast ModeledCast(Op, CT, V);
+  CastInst *Cast = BBInfo.CastMap[ModeledCast];
+  if (Cast == NULL) {
+    Cast = CastInst::Create(Op, V, CT);
+    BBInfo.CastMap[ModeledCast] = Cast;
+    if (DeferInsertion) {
+      BBInfo.PhiCasts.push_back(Cast);
+    }
+  }
+  if (!DeferInsertion && Cast->getParent() == 0) {
+    InstallInstruction(BBInfo.BB, Cast);
+  }
+  return Cast;
+}
+
+Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, unsigned BBIndex,
+                                            bool DeferInsertion) {
   if (Op->getType()->isPointerTy()) {
-    Instruction *Conversion = new PtrToIntInst(Op, IntPtrType);
-    InstallInstruction(BB, Conversion);
-    return Conversion;
+    return CreateCast(BBIndex, Instruction::PtrToInt, IntPtrType, Op,
+                      DeferInsertion);
   }
   return Op;
 }
 
-Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
+Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T,
+                                          unsigned BBIndex) {
   // Note: Currently only knows how to add inttoptr and bitcast type
   // conversions for non-phi nodes, since these are the only elided
   // instructions in the bitcode writer.
   //
   // TODO(kschimpf): Generalize this as we expand elided conversions.
-  Instruction *Conversion = 0;
   Type *OpTy = Op->getType();
   if (OpTy == T) return Op;
 
   if (OpTy->isPointerTy()) {
-    Conversion = new BitCastInst(Op, T);
+    return CreateCast(BBIndex, Instruction::BitCast, T, Op);
   } else if (OpTy == IntPtrType) {
-    Conversion = new IntToPtrInst(Op, T);
+    return CreateCast(BBIndex, Instruction::IntToPtr, T, Op);
   }
 
-  if (Conversion == 0) {
-    std::string Message;
-    raw_string_ostream StrM(Message);
-    StrM << "Can't convert " << *Op << " to type " << *T << "\n";
-    Error(StrM.str());
-  } else {
-    InstallInstruction(BB, Conversion);
-  }
-  return Conversion;
+  std::string Message;
+  raw_string_ostream StrM(Message);
+  StrM << "Can't convert " << *Op << " to type " << *T << "\n";
+  report_fatal_error(StrM.str());
 }
 
 Type *NaClBitcodeReader::ConvertTypeToScalarType(Type *T) {
@@ -1396,9 +1411,11 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
         return Error("Invalid DECLAREBLOCKS record");
       // Create all the basic blocks for the function.
       FunctionBBs.resize(Record[0]);
-      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
-        FunctionBBs[i] = BasicBlock::Create(Context, "", F);
-      CurBB = FunctionBBs[0];
+      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) {
+        BasicBlockInfo &BBInfo = FunctionBBs[i];
+        BBInfo.BB = BasicBlock::Create(Context, "", F);
+      }
+      CurBB = FunctionBBs.at(0).BB;
       continue;
 
     case naclbitc::FUNC_CODE_INST_BINOP: {
@@ -1410,8 +1427,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 > Record.size())
         return Error("Invalid BINOP record");
 
-      LHS = ConvertOpToScalar(LHS, CurBB);
-      RHS = ConvertOpToScalar(RHS, CurBB);
+      LHS = ConvertOpToScalar(LHS, CurBBNo);
+      RHS = ConvertOpToScalar(RHS, CurBBNo);
 
       int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
       if (Opc == -1) return Error("Invalid BINOP record");
@@ -1472,7 +1489,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
         case Instruction::SExt:
         case Instruction::UIToFP:
         case Instruction::SIToFP:
-          Op = ConvertOpToScalar(Op, CurBB);
+          Op = ConvertOpToScalar(Op, CurBBNo);
           break;
         default:
           break;
@@ -1493,8 +1510,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           popValue(Record, &OpNum, NextValueNo, &Cond))
         return Error("Invalid SELECT record");
 
-      TrueVal = ConvertOpToScalar(TrueVal, CurBB);
-      FalseVal = ConvertOpToScalar(FalseVal, CurBB);
+      TrueVal = ConvertOpToScalar(TrueVal, CurBBNo);
+      FalseVal = ConvertOpToScalar(FalseVal, CurBBNo);
 
       // expect i1
       if (Cond->getType() != Type::getInt1Ty(Context))
@@ -1514,8 +1531,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 != Record.size())
         return Error("Invalid CMP record");
 
-      LHS = ConvertOpToScalar(LHS, CurBB);
-      RHS = ConvertOpToScalar(RHS, CurBB);
+      LHS = ConvertOpToScalar(LHS, CurBBNo);
+      RHS = ConvertOpToScalar(RHS, CurBBNo);
 
       if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -1622,9 +1639,6 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty) return Error("Invalid PHI record");
 
-      // TODO(kschimpf): Fix handling of converting types for values,
-      // to handle elided casts, once the bitcode writer knows how.
-
       PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
 
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
@@ -1636,8 +1650,16 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           V = getValueSigned(Record, 1+i, NextValueNo);
         else
           V = getValue(Record, 1+i, NextValueNo);
-        BasicBlock *BB = getBasicBlock(Record[2+i]);
+        unsigned BBIndex = Record[2+i];
+        BasicBlock *BB = getBasicBlock(BBIndex);
         if (!V || !BB) return Error("Invalid PHI record");
+        if (GetPNaClVersion() == 2 && Ty == IntPtrType) {
+          // Delay installing scalar casts until all instructions of
+          // the function are rendered. This guarantees that we insert
+          // the conversion just before the incoming edge (or use an
+          // existing conversion if already installed).
+          V = ConvertOpToScalar(V, BBIndex, /* DeferInsertion = */ true);
+        }
         PN->addIncoming(V, BB);
       }
       I = PN;
@@ -1672,7 +1694,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           Type *T = getTypeByID(Record[2]);
           if (T == 0)
             return Error("Invalid type for load instruction");
-          Op = ConvertOpToType(Op, T->getPointerTo(), CurBB);
+          Op = ConvertOpToType(Op, T->getPointerTo(), CurBBNo);
           if (Op == 0) return true;
 	  I = new LoadInst(Op, "", false, (1 << Record[OpNum]) >> 1);
 	  break;
@@ -1697,8 +1719,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       case 2:
 	if (OpNum+1 != Record.size())
 	  return Error("Invalid STORE record");
-	Val = ConvertOpToScalar(Val, CurBB);
-	Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBB);
+        Val = ConvertOpToScalar(Val, CurBBNo);
+        Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBBNo);
 	I = new StoreInst(Val, Ptr, false, (1 << Record[OpNum]) >> 1);
 	break;
       }
@@ -1767,7 +1789,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
     // If this was a terminator instruction, move to the next block.
     if (isa<TerminatorInst>(I)) {
       ++CurBBNo;
-      CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+      CurBB = getBasicBlock(CurBBNo);
     }
 
     // Non-void values get registered in the value table for future use.
@@ -1777,6 +1799,24 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
 
 OutOfRecordLoop:
 
+  // Add PHI conversions to corresponding incoming block, if not
+  // already in the block. Also clear all conversions after fixing
+  // PHI conversions.
+  for (unsigned I = 0, NumBBs = FunctionBBs.size(); I < NumBBs; ++I) {
+    BasicBlockInfo &BBInfo = FunctionBBs[I];
+    std::vector<CastInst*> &PhiCasts = BBInfo.PhiCasts;
+    for (std::vector<CastInst*>::iterator Iter = PhiCasts.begin(),
+           IterEnd = PhiCasts.end(); Iter != IterEnd; ++Iter) {
+      CastInst *Cast = *Iter;
+      if (Cast->getParent() == 0) {
+        BasicBlock *BB = BBInfo.BB;
+        BB->getInstList().insert(BB->getTerminator(), Cast);
+      }
+    }
+    PhiCasts.clear();
+    BBInfo.CastMap.clear();
+  }
+
   // Check the function list for unresolved values.
   if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
     if (A->getParent() == 0) {
@@ -1793,7 +1833,7 @@ OutOfRecordLoop:
 
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
-  std::vector<BasicBlock*>().swap(FunctionBBs);
+  FunctionBBs.clear();
   DEBUG(dbgs() << "-> ParseFunctionBody\n");
   return false;
 }
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
index 814ef44efb..762088887f 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
@@ -21,6 +21,7 @@
 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
 #include "llvm/GVMaterializer.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/ValueHandle.h"
@@ -29,6 +30,46 @@
 namespace llvm {
   class MemoryBuffer;
   class LLVMContext;
+  class CastInst;
+
+// Models a Cast.  Used to cache casts created in a basic block by the
+// PNaCl bitcode reader.
+struct NaClBitcodeReaderCast {
+  // Fields of the conversion.
+  Instruction::CastOps Op;
+  Type *Ty;
+  Value *Val;
+
+  NaClBitcodeReaderCast(Instruction::CastOps Op, Type *Ty, Value *Val)
+    : Op(Op), Ty(Ty), Val(Val) {}
+};
+
+// Models the data structure used to hash/compare Casts in a DenseMap.
+template<>
+struct DenseMapInfo<NaClBitcodeReaderCast> {
+public:
+  static NaClBitcodeReaderCast getEmptyKey() {
+    return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+                                 DenseMapInfo<Type*>::getEmptyKey(),
+                                 DenseMapInfo<Value*>::getEmptyKey());
+  }
+  static NaClBitcodeReaderCast getTombstoneKey() {
+    return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+                                 DenseMapInfo<Type*>::getTombstoneKey(),
+                                 DenseMapInfo<Value*>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const NaClBitcodeReaderCast &C) {
+    std::pair<int, std::pair<Type*, Value*> > Tuple;
+    Tuple.first = C.Op;
+    Tuple.second.first = C.Ty;
+    Tuple.second.second = C.Val;
+    return DenseMapInfo<std::pair<int, std::pair<Type*, Value*> > >::getHashValue(Tuple);
+  }
+  static bool isEqual(const NaClBitcodeReaderCast &LHS,
+                      const NaClBitcodeReaderCast &RHS) {
+    return LHS.Op == RHS.Op && LHS.Ty == RHS.Ty && LHS.Val == RHS.Val;
+  }
+};
 
 //===----------------------------------------------------------------------===//
 //                          NaClBitcodeReaderValueList Class
@@ -83,8 +124,8 @@ public:
   // already been declared.
   bool createValueFwdRef(unsigned Idx, Type *Ty);
 
-  // Declares the type of the forward-referenced constant Idx. Returns
-  // 0 if an error occurred.
+  // Declares the type of the forward-referenced constant Idx.
+  // Returns 0 if an error occurred.
   // TODO(kschimpf) Convert these to be like createValueFwdRef and
   // getValueFwdRef.
   Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
@@ -103,7 +144,7 @@ public:
   // was forward referenced).
   void AssignValue(Value *V, unsigned Idx);
 
-  // Assigns Idx to the given global variable. If the Idx currently has
+  // Assigns Idx to the given global variable.  If the Idx currently has
   // a forward reference (built by createGlobalVarFwdRef(unsigned Idx)),
   // replaces uses of the global variable forward reference with the
   // value GV.
@@ -133,9 +174,20 @@ class NaClBitcodeReader : public GVMaterializer {
   NaClBitcodeReaderValueList ValueList;
   SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
 
+  // Holds information about each BasicBlock in the function being read.
+  struct BasicBlockInfo {
+    // A basic block within the function being modeled.
+    BasicBlock *BB;
+    // The set of generated conversions.
+    DenseMap<NaClBitcodeReaderCast, CastInst*> CastMap;
+    // The set of generated conversions that were added for phi nodes,
+    // and may need thier parent basic block defined.
+    std::vector<CastInst*> PhiCasts;
+  };
+
   /// FunctionBBs - While parsing a function body, this is a list of the basic
   /// blocks for the function.
-  std::vector<BasicBlock*> FunctionBBs;
+  std::vector<BasicBlockInfo> FunctionBBs;
 
   // When reading the module header, this list is populated with functions that
   // have bodies later in the file.
@@ -147,7 +199,7 @@ class NaClBitcodeReader : public GVMaterializer {
   UpgradedIntrinsicMap UpgradedIntrinsics;
 
   // Several operations happen after the module header has been read, but
-  // before function bodies are processed. This keeps track of whether
+  // before function bodies are processed.  This keeps track of whether
   // we've done this yet.
   bool SeenFirstFunctionBody;
 
@@ -226,14 +278,14 @@ private:
     return Header.GetPNaClVersion();
   }
   Type *getTypeByID(unsigned ID);
-  // Returns the value associated with ID. The value must already exist,
+  // Returns the value associated with ID.  The value must already exist,
   // or a forward referenced value created by getOrCreateFnVaueByID.
   Value *getFnValueByID(unsigned ID) {
     return ValueList.getValueFwdRef(ID);
   }
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
-    return FunctionBBs[ID];
+    return FunctionBBs[ID].BB;
   }
 
   /// \brief Read a value out of the specified record from slot '*Slot'.
@@ -273,18 +325,30 @@ private:
     return getFnValueByID(ValNo);
   }
 
-  /// \brief Add instructions to cast Op to the given type T into block BB.
-  /// Follows rules for pointer conversion as defined in
-  /// llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
+  /// \brief Create an (elided) cast instruction for basic block
+  /// BBIndex.  Op is the type of cast.  V is the value to cast.  CT
+  /// is the type to convert V to.  DeferInsertion defines whether the
+  /// generated conversion should also be installed into basic block
+  /// BBIndex.  Note: For PHI nodes, we don't insert when created
+  /// (i.e. DeferInsertion=true), since they must be inserted at the end
+  /// of the corresponding incoming basic block.
+  CastInst *CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+                       Type *CT, Value *V, bool DeferInsertion = false);
+
+  /// \brief Add instructions to cast Op to the given type T into
+  /// block BBIndex.  Follows rules for pointer conversion as defined
+  /// in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
   ///
   /// Returns 0 if unable to generate conversion value (also generates
   /// an appropriate error message and calls Error).
-  Value *ConvertOpToType(Value *Op, Type *T, BasicBlock *BB);
-
-  /// \brief If Op is a scalar value, this is a nop. If Op is a
-  /// pointer value, a PtrToInt instruction is inserted (in BB) to
-  /// convert Op to an integer.
-  Value *ConvertOpToScalar(Value *Op, BasicBlock *BB);
+  Value *ConvertOpToType(Value *Op, Type *T, unsigned BBIndex);
+
+  /// \brief If Op is a scalar value, this is a nop.  If Op is a
+  /// pointer value, a PtrToInt instruction is inserted (in BBIndex)
+  /// to convert Op to an integer.  For defaults on DeferInsertion,
+  /// see comments for method CreateCast.
+  Value *ConvertOpToScalar(Value *Op, unsigned BBIndex,
+                           bool DeferInsertion = false);
 
   /// \brief Returns the corresponding, PNaCl non-pointer equivalent
   /// for the given type.
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index bee36e2631..060a6d63f4 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -479,6 +479,7 @@ static bool ExpectsScalarValue(const Value *V, const Instruction *Arg) {
     switch (I->getOpcode()) {
     default:
       return false;
+    case Instruction::PHI:
     case Instruction::Trunc:
     case Instruction::ZExt:
     case Instruction::SExt:
@@ -497,7 +498,6 @@ static bool ExpectsScalarValue(const Value *V, const Instruction *Arg) {
     // instructions:
     // case Instruction::IntToPtr:
     // case Instruction::BitCast:
-    // case Instruction::PHI:
     // case Instruction::Call:
   }
 }
diff --git a/test/NaCl/Bitcode/bitcast-elide.ll b/test/NaCl/Bitcode/bitcast-elide.ll
index eeee69ffef..383673d684 100644
--- a/test/NaCl/Bitcode/bitcast-elide.ll
+++ b/test/NaCl/Bitcode/bitcast-elide.ll
@@ -17,17 +17,19 @@
 
 ; ------------------------------------------------------
 
-@bytes = internal global [7 x i8] c"abcdefg"
+@bytes = internal global [4 x i8] c"abcd"
+
+; ------------------------------------------------------
 
 ; Test that we elide the simple case of global.
 define void @SimpleLoad() {
-  %1 = bitcast [7 x i8]* @bytes to i32*
+  %1 = bitcast [4 x i8]* @bytes to i32*
   %2 = load i32* %1, align 4
   ret void
 }
 
 ; TD1:      define void @SimpleLoad() {
-; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   %2 = load i32* %1, align 4
 ; TD1-NEXT:   ret void
 ; TD1-NEXT: }
@@ -40,7 +42,7 @@ define void @SimpleLoad() {
 ; PF1-NEXT:  </FUNCTION_BLOCK>
 
 ; TD2:      define void @SimpleLoad() {
-; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
 ; TD2-NEXT:   ret void
 ; TD2-NEXT: }
@@ -51,6 +53,8 @@ define void @SimpleLoad() {
 ; PF2-NEXT:    <INST_RET/>
 ; PF2-NEXT:  </FUNCTION_BLOCK>
 
+; ------------------------------------------------------
+
 ; Test that we elide the simple case of an alloca.
 define void @SimpleLoadAlloca() {
   %1 = alloca i8, i32 4, align 4
@@ -67,8 +71,6 @@ define void @SimpleLoadAlloca() {
 ; TD1-NEXT: }
 
 ; PF1:        <FUNCTION_BLOCK>
-; PF1-NEXT:     <DECLAREBLOCKS op0=1/>
-; PF1-NEXT:     <CONSTANTS_BLOCK
 ; PF1:          </CONSTANTS_BLOCK>
 ; PF1-NEXT:     <INST_ALLOCA op0=1 op1=3/>
 ; PF1-NEXT:     <INST_CAST op0=1 op1=1 op2=11/>
@@ -84,23 +86,23 @@ define void @SimpleLoadAlloca() {
 ; TD2-NEXT: }
 
 ; PF2:        <FUNCTION_BLOCK>
-; PF2-NEXT:     <DECLAREBLOCKS op0=1/>
-; PF2-NEXT:     <CONSTANTS_BLOCK
 ; PF2:          </CONSTANTS_BLOCK>
 ; PF2-NEXT:     <INST_ALLOCA op0=1 op1=3/>
 ; PF2-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
 ; PF2-NEXT:     <INST_RET/>
 ; PF2-NEXT:   </FUNCTION_BLOCK>
 
+; ------------------------------------------------------
+
 ; Test that we don't elide an bitcast if one of its uses is not a load.
 define i32* @NonsimpleLoad(i32 %i) {
-  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %1 = bitcast [4 x i8]* @bytes to i32*       
   %2 = load i32* %1, align 4
   ret i32* %1
 }
 
 ; TD1:      define i32* @NonsimpleLoad(i32 %i) {
-; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   %2 = load i32* %1, align 4
 ; TD1-NEXT:   ret i32* %1
 ; TD1-NEXT: }
@@ -113,7 +115,7 @@ define i32* @NonsimpleLoad(i32 %i) {
 ; PF1:       </FUNCTION_BLOCK>
 
 ; TD2:      define i32* @NonsimpleLoad(i32 %i) {
-; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
 ; TD2-NEXT:   ret i32* %1
 ; TD2-NEXT: }
@@ -125,20 +127,22 @@ define i32* @NonsimpleLoad(i32 %i) {
 ; PF2-NEXT:    <INST_RET op0=2/>
 ; PF2:       </FUNCTION_BLOCK>
 
+; ------------------------------------------------------
+
 ; Test that we can handle multiple bitcasts.
 define i32 @TwoLoads(i32 %i) {
-  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %1 = bitcast [4 x i8]* @bytes to i32*       
   %2 = load i32* %1, align 4
-  %3 = bitcast [7 x i8]* @bytes to i32*       
+  %3 = bitcast [4 x i8]* @bytes to i32*       
   %4 = load i32* %3, align 4
   %5 = add i32 %2, %4
   ret i32 %5
 }
 
 ; TD1:      define i32 @TwoLoads(i32 %i) {
-; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   %2 = load i32* %1, align 4
-; TD1-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   %4 = load i32* %3, align 4
 ; TD1-NEXT:   %5 = add i32 %2, %4
 ; TD1-NEXT:   ret i32 %5
@@ -155,12 +159,11 @@ define i32 @TwoLoads(i32 %i) {
 ; PF1:       </FUNCTION_BLOCK>
 
 ; TD2:      define i32 @TwoLoads(i32 %i) {
-; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
-; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
-; TD2-NEXT:   %4 = load i32* %3, align 4
-; TD2-NEXT:   %5 = add i32 %2, %4
-; TD2-NEXT:   ret i32 %5
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
 ; TD2-NEXT: }
 
 ; PF2:       <FUNCTION_BLOCK>
@@ -171,17 +174,20 @@ define i32 @TwoLoads(i32 %i) {
 ; PF2-NEXT:    <INST_RET op0=1/>
 ; PF2:       </FUNCTION_BLOCK>
 
-; Test how we duplicate bitcasts, even if optimized in the input file.
-define i32 @TwoLoadOpt(i32 %i) {
-  %1 = bitcast [7 x i8]* @bytes to i32*       
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file.  This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*       
   %2 = load i32* %1, align 4
   %3 = load i32* %1, align 4
   %4 = add i32 %2, %3
   ret i32 %4
 }
 
-; TD1:      define i32 @TwoLoadOpt(i32 %i) {
-; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1:      define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   %2 = load i32* %1, align 4
 ; TD1-NEXT:   %3 = load i32* %1, align 4
 ; TD1-NEXT:   %4 = add i32 %2, %3
@@ -197,13 +203,12 @@ define i32 @TwoLoadOpt(i32 %i) {
 ; PF1-NEXT:    <INST_RET op0=1/>
 ; PF1:       </FUNCTION_BLOCK>
 
-; TD2:      define i32 @TwoLoadOpt(i32 %i) {
-; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2:      define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
-; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
-; TD2-NEXT:   %4 = load i32* %3, align 4
-; TD2-NEXT:   %5 = add i32 %2, %4
-; TD2-NEXT:   ret i32 %5
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
 ; TD2-NEXT: }
 
 ; PF2:       <FUNCTION_BLOCK>
@@ -214,15 +219,87 @@ define i32 @TwoLoadOpt(i32 %i) {
 ; PF2-NEXT:    <INST_RET op0=1/>
 ; PF2:       </FUNCTION_BLOCK>
 
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file.  This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  br label %BB
+
+BB:
+  %5 = load i32* %1, align 4
+  %6 = load i32* %1, align 4
+  %7 = add i32 %5, %6
+  ret i32 %4
+}
+
+; TD1:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = load i32* %1, align 4
+; TD1-NEXT:   %4 = add i32 %2, %3
+; TD1-NEXT:   br label %BB
+; TD1:      BB:
+; TD1-NEXT:   %5 = load i32* %1, align 4
+; TD1-NEXT:   %6 = load i32* %1, align 4
+; TD1-NEXT:   %7 = add i32 %5, %6
+; TD1-NEXT:   ret i32 %4
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=1 op2=11/>
+; PF1-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
+; PF1-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT:     <INST_BR op0=1/>
+; PF1-NEXT:     <INST_LOAD op0=4 op1=3 op2=0/>
+; PF1-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT:     <INST_RET op0=4/>
+; PF1:        </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   br label %BB
+; TD2:      BB:
+; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %6 = load i32* %5, align 4
+; TD2-NEXT:   %7 = load i32* %5, align 4
+; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=3 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=6 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_RET op0=4/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
 ; Test that we elide the simple case of bitcast for a store.
 define void @SimpleStore(i32 %i) {
-  %1 = bitcast [7 x i8]* @bytes to i32*
+  %1 = bitcast [4 x i8]* @bytes to i32*
   store i32 %i, i32* %1, align 4
   ret void
 }
 
 ; TD1:      define void @SimpleStore(i32 %i) {
-; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD1-NEXT:   store i32 %i, i32* %1, align 4
 ; TD1-NEXT:   ret void
 ; TD1-NEXT: }
@@ -235,7 +312,7 @@ define void @SimpleStore(i32 %i) {
 ; PF1:        </FUNCTION_BLOCK>
 
 ; TD2:      define void @SimpleStore(i32 %i) {
-; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   store i32 %i, i32* %1, align 4
 ; TD2-NEXT:   ret void
 ; TD2-NEXT: }
diff --git a/test/NaCl/Bitcode/inttoptr-elide.ll b/test/NaCl/Bitcode/inttoptr-elide.ll
index 029f67adef..679f5f1d47 100644
--- a/test/NaCl/Bitcode/inttoptr-elide.ll
+++ b/test/NaCl/Bitcode/inttoptr-elide.ll
@@ -118,13 +118,11 @@ define i32 @TwoLoads(i32 %i) {
 ; TD2:      define i32 @TwoLoads(i32 %i) {
 ; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
-; TD2-NEXT:   %3 = inttoptr i32 %i to i32*
-; TD2-NEXT:   %4 = load i32* %3, align 4
-; TD2-NEXT:   %5 = add i32 %2, %4
-; TD2-NEXT:   ret i32 %5
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
 ; TD2-NEXT: }
 
-
 ; PF2:       <FUNCTION_BLOCK>
 ; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
 ; PF2-NEXT:    <INST_LOAD op0=1 op1=3 op2=0/>
@@ -135,8 +133,9 @@ define i32 @TwoLoads(i32 %i) {
 
 ; ------------------------------------------------------
 
-; Test how we duplicate inttoptrs, even if optimized in the input file.
-define i32 @TwoLoadOpt(i32 %i) {
+; Test how we handle inttoptrs, if optimized in the input file. This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
   %1 = inttoptr i32 %i to i32*
   %2 = load i32* %1, align 4
   %3 = load i32* %1, align 4
@@ -144,7 +143,7 @@ define i32 @TwoLoadOpt(i32 %i) {
   ret i32 %4
 }
 
-; TD1:      define i32 @TwoLoadOpt(i32 %i) {
+; TD1:      define i32 @TwoLoadOptOneBlock(i32 %i) {
 ; TD1-NEXT:   %1 = inttoptr i32 %i to i32*
 ; TD1-NEXT:   %2 = load i32* %1, align 4
 ; TD1-NEXT:   %3 = load i32* %1, align 4
@@ -161,13 +160,12 @@ define i32 @TwoLoadOpt(i32 %i) {
 ; PF1-NEXT:    <INST_RET op0=1/>
 ; PF1:       </FUNCTION_BLOCK>
 
-; TD2:      define i32 @TwoLoadOpt(i32 %i) {
+; TD2:      define i32 @TwoLoadOptOneBlock(i32 %i) {
 ; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
 ; TD2-NEXT:   %2 = load i32* %1, align 4
-; TD2-NEXT:   %3 = inttoptr i32 %i to i32*
-; TD2-NEXT:   %4 = load i32* %3, align 4
-; TD2-NEXT:   %5 = add i32 %2, %4
-; TD2-NEXT:   ret i32 %5
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
 ; TD2-NEXT: }
 
 ; PF2:       <FUNCTION_BLOCK>
@@ -180,6 +178,76 @@ define i32 @TwoLoadOpt(i32 %i) {
 
 ; ------------------------------------------------------
 
+; Test how we handle inttoptrs if optimized in the input file.  This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  br label %BB
+
+BB:
+  %5 = load i32* %1, align 4
+  %6 = load i32* %1, align 4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+; TD1:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD1-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = load i32* %1, align 4
+; TD1-NEXT:   %4 = add i32 %2, %3
+; TD1-NEXT:   br label %BB
+; TD1:      BB:
+; TD1-NEXT:   %5 = load i32* %1, align 4
+; TD1-NEXT:   %6 = load i32* %1, align 4
+; TD1-NEXT:   %7 = add i32 %5, %6
+; TD1-NEXT:   ret i32 %7
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=1 op2=10/>
+; PF1-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
+; PF1-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT:     <INST_BR op0=1/>
+; PF1-NEXT:     <INST_LOAD op0=4 op1=3 op2=0/>
+; PF1-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT:     <INST_RET op0=1/>
+; PF1:        </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   br label %BB
+; TD2:      BB:
+; TD2-NEXT:   %5 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %6 = load i32* %5, align 4
+; TD2-NEXT:   %7 = load i32* %5, align 4
+; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   ret i32 %8
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF2-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_LOAD op0=4 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_RET op0=1/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
 ; Test that we elide the simple case of inttoptr for a store.
 define void @SimpleStore(i32 %i) {
   %1 = inttoptr i32 %i to i32*
@@ -210,4 +278,4 @@ define void @SimpleStore(i32 %i) {
 ; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
 ; PF2-NEXT:   <INST_STORE op0=1 op1=1 op2=3/>
 ; PF2-NEXT:   <INST_RET/>
-; PF2T:     </FUNCTION_BLOCK>
+; PF2:      </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/ptrtoint-elide.ll b/test/NaCl/Bitcode/ptrtoint-elide.ll
index 10504a8577..43a82a0802 100644
--- a/test/NaCl/Bitcode/ptrtoint-elide.ll
+++ b/test/NaCl/Bitcode/ptrtoint-elide.ll
@@ -153,8 +153,8 @@ define void @AllocCastDelete() {
 ; ------------------------------------------------------
 
 ; Show case where we have optimized the ptrtoint (and bitcast) into a
-; single instruction, but will get duplicated after reading back the
-; bitcode file, since we insert elided casts immediately before each use.
+; single instruction, and will only be inserted before the first use
+; in the block.
 define void @AllocCastOpt() {
   %1 = alloca i8, i32 4, align 8
   %2 = bitcast [4 x i8]* @bytes to i32*
@@ -177,7 +177,7 @@ define void @AllocCastOpt() {
 ; PF1:          </CONSTANTS_BLOCK>
 ; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
 ; PF1-NEXT:     <INST_CAST op0=3 op1=4 op2=11/>
-; PF1-NEXT:     <INST_CAST  op0=2 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=0 op2=9/>
 ; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
 ; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
 ; PF1-NEXT:     <INST_RET/>
@@ -188,9 +188,7 @@ define void @AllocCastOpt() {
 ; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
 ; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
 ; TD2-NEXT:   store i32 %2, i32* %3, align 1
-; TD2-NEXT:   %4 = ptrtoint i8* %1 to i32
-; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
-; TD2-NEXT:   store i32 %4, i32* %5, align 1
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
 ; TD2-NEXT:   ret void
 ; TD2-NEXT: }
 
@@ -366,7 +364,6 @@ define i32 @StoreGlobalMovePtr2Int() {
 ; PF1-NEXT:     <INST_RET op0=4/>
 ; PF1-NEXT:   </FUNCTION_BLOCK>
 
-
 ; TD2:      define i32 @StoreGlobalMovePtr2Int() {
 ; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
 ; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
@@ -430,11 +427,8 @@ define void @CastAddAlloca() {
 ; TD2-NEXT:   %2 = add i32 1, 2
 ; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
 ; TD2-NEXT:   %4 = add i32 %3, 2
-; TD2-NEXT:   %5 = ptrtoint i8* %1 to i32
-; TD2-NEXT:   %6 = add i32 1, %5
-; TD2-NEXT:   %7 = ptrtoint i8* %1 to i32
-; TD2-NEXT:   %8 = ptrtoint i8* %1 to i32
-; TD2-NEXT:   %9 = add i32 %7, %8
+; TD2-NEXT:   %5 = add i32 1, %3
+; TD2-NEXT:   %6 = add i32 %3, %3
 ; TD2-NEXT:   ret void
 ; TD2-NEXT: }
 
@@ -491,11 +485,8 @@ define void @CastAddGlobal() {
 ; TD2-NEXT:   %1 = add i32 1, 2
 ; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
 ; TD2-NEXT:   %3 = add i32 %2, 2
-; TD2-NEXT:   %4 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT:   %5 = add i32 1, %4
-; TD2-NEXT:   %6 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT:   %7 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   %4 = add i32 1, %2
+; TD2-NEXT:   %5 = add i32 %2, %2
 ; TD2-NEXT:   ret void
 ; TD2-NEXT: }
 
@@ -571,36 +562,16 @@ define void @CastBinop() {
 ; TD2-NE
author	Karl Schimpf <kschimpf@google.com>	2013-08-26 09:29:51 -0700
committer	Karl Schimpf <kschimpf@google.com>	2013-08-26 09:29:51 -0700
commit	685d11b718cf8a017665f241f45fc144e8d622f2 (patch)
tree	0312b7fa4ea846b899029c500f45052568d8ecab
parent	bbdf86f69eebaad59f7338f645916ed984a88861 (diff)