7 files changed, 1046 insertions, 17 deletions
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
index 1eb188a83e..d415b85de8 100644
--- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -25,6 +25,9 @@ namespace llvm {
   class Module;
   class raw_ostream;
 
+  /// \brief Defines the integer bit size used to model pointers in PNaCl.
+  static const unsigned PNaClIntPtrTypeBitSize = 32;
+
   /// getNaClLazyBitcodeModule - Read the header of the specified bitcode buffer
   /// and prepare for lazy deserialization of function bodies.  If successful,
   /// this takes ownership of 'buffer' and returns a non-null pointer.  On
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
index f9d479767b..1bbbf4516a 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -1314,6 +1314,15 @@ bool NaClBitcodeReader::InstallInstruction(
   return false;
 }
 
+Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, BasicBlock *BB) {
+  if (Op->getType()->isPointerTy()) {
+    Instruction *Conversion = new PtrToIntInst(Op, IntPtrType);
+    InstallInstruction(BB, Conversion);
+    return Conversion;
+  }
+  return Op;
+}
+
 Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
   // Note: Currently only knows how to add inttoptr and bitcast type
   // conversions for non-phi nodes, since these are the only elided
@@ -1326,7 +1335,7 @@ Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
 
   if (OpTy->isPointerTy()) {
     Conversion = new BitCastInst(Op, T);
-  } else if (OpTy->isIntegerTy()) {
+  } else if (OpTy == IntPtrType) {
     Conversion = new IntToPtrInst(Op, T);
   }
 
@@ -1341,6 +1350,10 @@ Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
   return Conversion;
 }
 
+Type *NaClBitcodeReader::ConvertTypeToScalarType(Type *T) {
+  return T->isPointerTy() ? IntPtrType : T;
+}
+
 /// ParseFunctionBody - Lazily parse the specified function body block.
 bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
   DEBUG(dbgs() << "-> ParseFunctionBody\n");
@@ -1427,6 +1440,9 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 > Record.size())
         return Error("Invalid BINOP record");
 
+      LHS = ConvertOpToScalar(LHS, CurBB);
+      RHS = ConvertOpToScalar(RHS, CurBB);
+
       int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
       if (Opc == -1) return Error("Invalid BINOP record");
       I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -1475,6 +1491,24 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
       if (Opc == -1 || ResTy == 0)
         return Error("Invalid CAST record");
+
+      if (GetPNaClVersion() == 2) {
+        // If a ptrtoint cast was elided on the argument of the cast,
+        // add it back. Note: The casts allowed here should match the
+        // casts in NaClValueEnumerator::ExpectsScalarValue.
+        switch (Opc) {
+        case Instruction::Trunc:
+        case Instruction::ZExt:
+        case Instruction::SExt:
+        case Instruction::UIToFP:
+        case Instruction::SIToFP:
+          Op = ConvertOpToScalar(Op, CurBB);
+          break;
+        default:
+          break;
+        }
+      }
+
       I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
       break;
     }
@@ -1489,6 +1523,9 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           popValue(Record, &OpNum, NextValueNo, &Cond))
         return Error("Invalid SELECT record");
 
+      TrueVal = ConvertOpToScalar(TrueVal, CurBB);
+      FalseVal = ConvertOpToScalar(FalseVal, CurBB);
+
       // expect i1
       if (Cond->getType() != Type::getInt1Ty(Context))
         return Error("Invalid SELECT condition type");
@@ -1507,6 +1544,9 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 != Record.size())
         return Error("Invalid CMP record");
 
+      LHS = ConvertOpToScalar(LHS, CurBB);
+      RHS = ConvertOpToScalar(RHS, CurBB);
+
       if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
       else
@@ -1612,6 +1652,9 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty) return Error("Invalid PHI record");
 
+      // TODO(kschimpf): Fix handling of converting types for values,
+      // to handle elided casts, once the bitcode writer knows how.
+
       PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
 
       for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
@@ -1684,6 +1727,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       case 2:
 	if (OpNum+1 != Record.size())
 	  return Error("Invalid STORE record");
+	Val = ConvertOpToScalar(Val, CurBB);
 	Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBB);
 	I = new StoreInst(Val, Ptr, false, (1 << Record[OpNum]) >> 1);
 	break;
@@ -1695,6 +1739,9 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 2)
         return Error("Invalid CALL record");
 
+      // TODO(kschimpf): Fix handling of type conversion to arguments for PNaCl,
+      // to handle elided casts, once the bitcode writer knows how.
+
       unsigned CCInfo = Record[0];
 
       unsigned OpNum = 1;
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
index 805bddf0b2..814ef44efb 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
@@ -20,6 +20,7 @@
 #include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
 #include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
 #include "llvm/GVMaterializer.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/ValueHandle.h"
@@ -167,6 +168,9 @@ class NaClBitcodeReader : public GVMaterializer {
   /// \brief True if we should only accept supported bitcode format.
   bool AcceptSupportedBitcodeOnly;
 
+  /// \brief Integer type use for PNaCl conversion of pointers.
+  Type *IntPtrType;
+
 public:
   explicit NaClBitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
                              bool AcceptSupportedOnly = true)
@@ -174,7 +178,8 @@ public:
       LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
       ValueList(C),
       SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      AcceptSupportedBitcodeOnly(AcceptSupportedOnly) {
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly),
+      IntPtrType(IntegerType::get(C, PNaClIntPtrTypeBitSize)) {
   }
   explicit NaClBitcodeReader(DataStreamer *streamer, LLVMContext &C,
                              bool AcceptSupportedOnly = true)
@@ -182,7 +187,8 @@ public:
       LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
       ValueList(C),
       SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      AcceptSupportedBitcodeOnly(AcceptSupportedOnly) {
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly),
+      IntPtrType(IntegerType::get(C, PNaClIntPtrTypeBitSize)) {
   }
   ~NaClBitcodeReader() {
     FreeState();
@@ -275,6 +281,15 @@ private:
   /// an appropriate error message and calls Error).
   Value *ConvertOpToType(Value *Op, Type *T, BasicBlock *BB);
 
+  /// \brief If Op is a scalar value, this is a nop. If Op is a
+  /// pointer value, a PtrToInt instruction is inserted (in BB) to
+  /// convert Op to an integer.
+  Value *ConvertOpToScalar(Value *Op, BasicBlock *BB);
+
+  /// \brief Returns the corresponding, PNaCl non-pointer equivalent
+  /// for the given type.
+  Type *ConvertTypeToScalarType(Type *T);
+
   /// \brief Install instruction I into basic block BB.
   bool InstallInstruction(BasicBlock *BB, Instruction *I);
 
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
index f6b85108e6..047e0c84db 100644
--- a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
@@ -413,7 +413,7 @@ void WriteGlobalInit(const Constant *C, unsigned GlobalVarID,
     }
     return;
   }
-  if (C->getType()->isIntegerTy(32)) {
+  if (VE.IsIntPtrType(C->getType())) {
     // This constant defines a relocation. Start by verifying the
     // relocation is of the right form.
     const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index 34f4f2bbe9..bee36e2631 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -44,6 +44,8 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M, uint32_t PNaClVersion)
   TypeCountMapType count_map;
   TypeCountMap = &count_map;
 
+  IntPtrType = IntegerType::get(M->getContext(), PNaClIntPtrTypeBitSize);
+
   // Enumerate the functions. Note: We do this before global
   // variables, so that global variable initializations can refer to
   // the functions without a forward reference.
@@ -429,10 +431,10 @@ void NaClValueEnumerator::purgeFunction() {
 }
 
 // Returns true if the bitcode writer can assume that the given
-// argument of the given operation can accept a normalized pointer.
+// argument of the given operation expects a normalized pointer in PNaCl.
 // Note: This function is based on the concept of NormalizedPtr as
 // defined in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
-static bool AllowsNormalizedPtr(const Value *V, const Instruction *Arg) {
+static bool ExpectsNormalizedPtr(const Value *V, const Instruction *Arg) {
   const Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return false;
 
@@ -443,22 +445,19 @@ static bool AllowsNormalizedPtr(const Value *V, const Instruction *Arg) {
   default:
     return false;
   case Instruction::Load:
-    // Verify it is the ptr argument of the load.  Note: This check is
-    // not really necessary in that a load only has one argument.
     return I->getOperand(0) == Arg;
   case Instruction::Store:
-    // Verify it is the ptr argument of the store.
     return I->getOperand(1) == Arg;
   }
 }
 
 // Returns true if the bitcode reader and writer can assume that the
-// uses of the given inttotpr I2P allow normalized pointers (as
+// uses of the given inttotpr I2P expect normalized pointers (as
 // defined in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
-static bool AllUsesAllowNormalizedPtr(const Instruction *I2P) {
+static bool AllUsesExpectsNormalizedPtr(const Instruction *I2P) {
   for (Value::const_use_iterator u = I2P->use_begin(), e = I2P->use_end();
        u != e; ++u) {
-    if (!AllowsNormalizedPtr(cast<Value>(*u), I2P)) return false;
+    if (!ExpectsNormalizedPtr(cast<Value>(*u), I2P)) return false;
   }
   // If reached, either all uses have a normalized pointer (and hence
   // we know how to automatically add it back), or there were no uses (and
@@ -466,6 +465,57 @@ static bool AllUsesAllowNormalizedPtr(const Instruction *I2P) {
   return true;
 }
 
+// Given Value that uses scalar value Arg, returns true if the bitcode
+// writer can assume that Value always expects Arg to be scalar.  This
+// function is used to infer cases where PtrToInt casts can be
+// removed.
+static bool ExpectsScalarValue(const Value *V, const Instruction *Arg) {
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return false;
+
+  if (I->isBinaryOp())
+    return true;
+  else {
+    switch (I->getOpcode()) {
+    default:
+      return false;
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::ICmp:
+      return true;
+    case Instruction::Store:
+      return Arg == I->getOperand(0);
+    case Instruction::Select: {
+      const SelectInst *Op = dyn_cast<SelectInst>(I);
+      return Arg == Op->getTrueValue() || Arg == Op->getFalseValue();
+    }
+    }
+    // TODO(kschimpf): Need to think more about how to handle following
+    // instructions:
+    // case Instruction::IntToPtr:
+    // case Instruction::BitCast:
+    // case Instruction::PHI:
+    // case Instruction::Call:
+  }
+}
+
+// Returns true if the bitcode reader and writer can assume that the
+// uses of the given PtrToInt expect scalar values (i.e. non-pointer),
+// and hence, we can elide the PtrToInt cast.
+static bool AllUsesExpectsScalarValue(const Instruction *I) {
+  for (Value::const_use_iterator Use = I->use_begin(), UseEnd = I->use_end();
+       Use != UseEnd; ++Use) {
+    if (!ExpectsScalarValue(*Use, I)) return false;
+  }
+  // If reached, all uses expect a scalar value (and hence we know how
+  // to automatically add it back), or there were no uses (and hence
+  // represents dead code).
+  return true;
+}
+
 // Returns true if the value is an InherentPtr (as defined in
 // llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
 static inline bool IsInherentPtr(const Value *V) {
@@ -483,14 +533,21 @@ const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
       break;
     case Instruction::BitCast:
       if (I->getType()->isPointerTy() &&
-	  AllUsesAllowNormalizedPtr(I) &&
-	  IsInherentPtr(I->getOperand(0))) {
-	return ElideCasts(I->getOperand(0));
+          IsInherentPtr(I->getOperand(0)) &&
+          AllUsesExpectsNormalizedPtr(I)) {
+        V = I->getOperand(0);
       }
       break;
     case Instruction::IntToPtr:
-      if (AllUsesAllowNormalizedPtr(I)) {
-        return ElideCasts(I->getOperand(0));
+      if (AllUsesExpectsNormalizedPtr(I)) {
+        V = ElideCasts(I->getOperand(0));
+      }
+      break;
+    case Instruction::PtrToInt:
+      if (IsIntPtrType(I->getType()) &&
+          IsInherentPtr(I->getOperand(0)) &&
+          AllUsesExpectsScalarValue(I)) {
+        V = I->getOperand(0);
       }
       break;
     }
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
index 22de263c4b..71638dd4eb 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
 #include <vector>
 
 namespace llvm {
@@ -83,6 +84,9 @@ private:
   // The version of PNaCl bitcode to generate.
   uint32_t PNaClVersion;
 
+  /// \brief Integer type use for PNaCl conversion of pointers.
+  Type *IntPtrType;
+
   NaClValueEnumerator(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
   void operator=(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
 public:
@@ -144,6 +148,12 @@ public:
     return V != ElideCasts(V);
   }
 
+  /// \brief Returns true if the type of V is the integer used to
+  /// model pointers in PNaCl.
+  bool IsIntPtrType(Type *T) const {
+    return T == IntPtrType;
+  }
+
 private:
   void OptimizeTypes(const Module *M);
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
diff --git a/test/NaCl/Bitcode/ptrtoint-elide.ll b/test/NaCl/Bitcode/ptrtoint-elide.ll
new file mode 100644
index 0000000000..10504a8577
--- /dev/null
+++ b/test/NaCl/Bitcode/ptrtoint-elide.ll
@@ -0,0 +1,897 @@
+; Test how we handle eliding ptrtoint instructions.
+; TODO(kschimpf) Expand these tests as further CL's are added for issue 3544.
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+declare i32 @bar(i32)
+
+@bytes = internal global [4 x i8] c"abcd"
+
+; ------------------------------------------------------
+
+; Show simple case where we use ptrtoint
+define void @AllocCastSimple() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; TD1:      define void @AllocCastSimple() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT:   store i32 %2, i32* %3, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=4 op1=4 op2=11/>
+; PF1-NEXT:     <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @AllocCastSimple() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Same as above, but with the cast order changed. Shows
+; that we always inject casts back in a fixed order. Hence,
+; in PNaCl version 2, the casts will be reversed.
+define void @AllocCastSimpleReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast [4 x i8]* @bytes to i32*
+  %3 = ptrtoint i8* %1 to i32
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD1:      define void @AllocCastSimpleReversed() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   store i32 %3, i32* %2, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=3 op1=4 op2=11/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=0 op2=9/>
+; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @AllocCastSimpleReversed() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where we delete ptrtoint because they aren't used.
+define void @AllocCastDelete() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = alloca i8, i32 4, align 8
+  %4 = ptrtoint i8* %3 to i32
+  ret void
+}
+
+; TD1:      define void @AllocCastDelete() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %4 = ptrtoint i8* %3 to i32
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_ALLOCA op0=3 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @AllocCastDelete() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where we have optimized the ptrtoint (and bitcast) into a
+; single instruction, but will get duplicated after reading back the
+; bitcode file, since we insert elided casts immediately before each use.
+define void @AllocCastOpt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast [4 x i8]* @bytes to i32*
+  %3 = ptrtoint i8* %1 to i32
+  store i32 %3, i32* %2, align 1
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD1:      define void @AllocCastOpt() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   store i32 %3, i32* %2, align 1
+; TD1-NEXT:   store i32 %3, i32* %2, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=3 op1=4 op2=11/>
+; PF1-NEXT:     <INST_CAST  op0=2 op1=0 op2=9/>
+; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
+; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @AllocCastOpt() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   %4 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %4, i32* %5, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where ptrtoint (and bitcast) for store are not immediately
+; before the store, the casts will be moved to the store.
+define void @AllocCastMove(i32) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  %4 = ptrtoint i8* %2 to i32
+  %5 = add i32 %0, 1
+  store i32 %4, i32* %3, align 1
+  ret void
+}
+
+; TD1:      define void @AllocCastMove(i32) {
+; TD1-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT:   %4 = ptrtoint i8* %2 to i32
+; TD1-NEXT:   %5 = add i32 %0, 1
+; TD1-NEXT:   store i32 %4, i32* %3, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=5 op1=4 op2=11/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=0 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=6 op1=4 op2=0/>
+; PF1-NEXT:     <INST_STORE op0=3 op1=2 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @AllocCastMove(i32) {
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %3 = add i32 %0, 1
+; TD2-NEXT:   %4 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %4, i32* %5, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=2 op2=0/>
+; PF2-NEXT:     <INST_STORE op0=6 op1=2 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where ptrtoint on global variable is merged in a store, and
+; order is kept.
+define void @StoreGlobal() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = bitcast i8* %1 to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; TD1:      define void @StoreGlobal() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD1-NEXT:   store i32 %2, i32* %3, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=3 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=4 op2=11/>
+; PF1-NEXT:     <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @StoreGlobal() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Same as above, but with cast order reversed.
+define void @StoreGlobalCastsReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast i8* %1 to i32*
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD1:      define void @StoreGlobalCastsReversed() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD1-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   store i32 %3, i32* %2, align 1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=4 op2=11/>
+; PF1-NEXT:     <INST_CAST op0=4 op1=0 op2=9/>
+; PF1-NEXT:     <INST_STORE op0=2 op1=1 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @StoreGlobalCastsReversed() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we will move the ptrtoint of a global to the use.
+define i32 @StoreGlobalMovePtr2Int() {
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast i8* %2 to i32*
+  store i32 %1, i32* %3, align 1
+  ret i32 0
+}
+
+; TD1:      define i32 @StoreGlobalMovePtr2Int() {
+; TD1-NEXT:   %1 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %3 = bitcast i8* %2 to i32*
+; TD1-NEXT:   store i32 %1, i32* %3, align 1
+; TD1-NEXT:   ret i32 0
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_CAST op0=3 op1=0 op2=9/>
+; PF1-NEXT:     <INST_ALLOCA op0=3 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=4 op2=11/>
+; PF1-NEXT:     <INST_STORE op0=1 op1=3 op2=1 op3=0/>
+; PF1-NEXT:     <INST_RET op0=4/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+
+; TD2:      define i32 @StoreGlobalMovePtr2Int() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret i32 0
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=4 op2=1/>
+; PF2-NEXT:     <INST_RET op0=2/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle add instructions with pointer casts.
+define void @CastAddAlloca() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+
+  ; Simple add.
+  %3 = add i32 1, 2
+
+  ; Cast first.
+  %4 = add i32 %2, 2
+
+  ; Cast second.
+  %5 = add i32 1, %2
+
+  ; Cast both.
+  %6 = add i32 %2, %2
+
+  ret void
+}
+
+; TD1:      define void @CastAddAlloca() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = add i32 1, 2
+; TD1-NEXT:   %4 = add i32 %2, 2
+; TD1-NEXT:   %5 = add i32 1, %2
+; TD1-NEXT:   %6 = add i32 %2, %2
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=5 op1=4 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=5 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=7 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=4 op1=4 op2=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @CastAddAlloca() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = add i32 1, 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = add i32 %3, 2
+; TD2-NEXT:   %5 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %6 = add i32 1, %5
+; TD2-NEXT:   %7 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %8 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %9 = add i32 %7, %8
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=4 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=4 op2=0/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle add instructions with pointer casts.
+define void @CastAddGlobal() {
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+
+  ; Simple Add.
+  %2 = add i32 1, 2
+
+  ; Cast first.
+  %3 = add i32 %1, 2
+
+  ; Cast Second.
+  %4 = add i32 1, %1
+
+  ; Cast both.
+  %5 = add i32 %1, %1
+  ret void
+}
+
+; TD1:      define void @CastAddGlobal() {
+; TD1-NEXT:   %1 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %2 = add i32 1, 2
+; TD1-NEXT:   %3 = add i32 %1, 2
+; TD1-NEXT:   %4 = add i32 1, %1
+; TD1-NEXT:   %5 = add i32 %1, %1
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_CAST op0=3 op1=0 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=3 op1=2 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=5 op1=3 op2=0/>
+; PF1-NEXT:     <INST_BINOP op0=4 op1=4 op2=0/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @CastAddGlobal() {
+; TD2-NEXT:   %1 = add i32 1, 2
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = add i32 %2, 2
+; TD2-NEXT:   %4 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %5 = add i32 1, %4
+; TD2-NEXT:   %6 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %7 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=2 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=5 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=6 op2=0/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for other scalar binary operators.
+define void @CastBinop() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = sub i32 %2, %3
+  %5 = mul i32 %2, %3
+  %6 = udiv i32 %2, %3
+  %7 = urem i32 %2, %3
+  %8 = srem i32 %2, %3
+  %9 = shl i32 %2, %3
+  %10 = lshr i32 %2, %3
+  %11 = ashr i32 %2, %3
+  %12 = and i32 %2, %3
+  %13 = or i32 %2, %3
+  %14 = xor i32 %2, %3
+  ret void
+}
+
+; TD1:      define void @CastBinop() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %4 = sub i32 %2, %3
+; TD1-NEXT:   %5 = mul i32 %2, %3
+; TD1-NEXT:   %6 = udiv i32 %2, %3
+; TD1-NEXT:   %7 = urem i32 %2, %3
+; TD1-NEXT:   %8 = srem i32 %2, %3
+; TD1-NEXT:   %9 = shl i32 %2, %3
+; TD1-NEXT:   %10 = lshr i32 %2, %3
+; TD1-NEXT:   %11 = ashr i32 %2, %3
+; TD1-NEXT:   %12 = and i32 %2, %3
+; TD1-NEXT:   %13 = or i32 %2, %3
+; TD1-NEXT:   %14 = xor i32 %2, %3
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=4 op1=0 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=2 op1=1 op2=1/>
+; PF1-NEXT:     <INST_BINOP op0=3 op1=2 op2=2/>
+; PF1-NEXT:     <INST_BINOP op0=4 op1=3 op2=3/>
+; PF1-NEXT:     <INST_BINOP op0=5 op1=4 op2=5/>
+; PF1-NEXT:     <INST_BINOP op0=6 op1=5 op2=6/>
+; PF1-NEXT:     <INST_BINOP op0=7 op1=6 op2=7/>
+; PF1-NEXT:     <INST_BINOP op0=8 op1=7 op2=8/>
+; PF1-NEXT:     <INST_BINOP op0=9 op1=8 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=10 op1=9 op2=10/>
+; PF1-NEXT:     <INST_BINOP op0=11 op1=10 op2=11/>
+; PF1-NEXT:     <INST_BINOP op0=12 op1=11 op2=12/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @CastBinop() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %4 = sub i32 %2, %3
+; TD2-NEXT:   %5 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %6 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %7 = mul i32 %5, %6
+; TD2-NEXT:   %8 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %9 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %10 = udiv i32 %8, %9
+; TD2-NEXT:   %11 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %12 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %13 = urem i32 %11, %12
+; TD2-NEXT:   %14 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %15 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %16 = srem i32 %14, %15
+; TD2-NEXT:   %17 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %18 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %19 = shl i32 %17, %18
+; TD2-NEXT:   %20 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %21 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %22 = lshr i32 %20, %21
+; TD2-NEXT:   %23 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %24 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %25 = ashr i32 %23, %24
+; TD2-NEXT:   %26 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %27 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %28 = and i32 %26, %27
+; TD2-NEXT:   %29 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %30 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %31 = or i32 %29, %30
+; TD2-NEXT:   %32 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %33 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %34 = xor i32 %32, %33
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=4 op2=2/>
+; PF2-NEXT:     <INST_BINOP op0=3 op1=5 op2=3/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=6 op2=5/>
+; PF2-NEXT:     <INST_BINOP op0=5 op1=7 op2=6/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=8 op2=7/>
+; PF2-NEXT:     <INST_BINOP op0=7 op1=9 op2=8/>
+; PF2-NEXT:     <INST_BINOP op0=8 op1=10 op2=9/>
+; PF2-NEXT:     <INST_BINOP op0=9 op1=11 op2=10/>
+; PF2-NEXT:     <INST_BINOP op0=10 op1=12 op2=11/>
+; PF2-NEXT:     <INST_BINOP op0=11 op1=13 op2=12/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle (non-special) bitcasts by converting pointer
+; casts to integer.
+define void @TestCasts() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+
+  %3 = trunc i32 257 to i8
+  %4 = trunc i32 %2 to i8
+
+  %5 = zext i32 257 to i64
+  %6 = zext i32 %2 to i64
+
+  %7 = sext i32 -1 to i64
+  %8 = sext i32 %2 to i64
+
+  %9 = uitofp i32 1 to float
+  %10 = uitofp i32 %2 to float
+
+  %11 = sitofp i32 -1 to float
+  %12 = sitofp i32 %2 to float
+  ret void
+}
+
+; TD1:      define void @TestCasts() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = trunc i32 257 to i8
+; TD1-NEXT:   %4 = trunc i32 %2 to i8
+; TD1-NEXT:   %5 = zext i32 257 to i64
+; TD1-NEXT:   %6 = zext i32 %2 to i64
+; TD1-NEXT:   %7 = sext i32 -1 to i64
+; TD1-NEXT:   %8 = sext i32 %2 to i64
+; TD1-NEXT:   %9 = uitofp i32 1 to float
+; TD1-NEXT:   %10 = uitofp i32 %2 to float
+; TD1-NEXT:   %11 = sitofp i32 -1 to float
+; TD1-NEXT:   %12 = sitofp i32 %2 to float
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=6 op1=1 op2=0/>
+; PF1-NEXT:     <INST_CAST op0=2 op1=1 op2=0/>
+; PF1-NEXT:     <INST_CAST op0=8 op1=10 op2=1/>
+; PF1-NEXT:     <INST_CAST op0=4 op1=10 op2=1/>
+; PF1-NEXT:     <INST_CAST op0=9 op1=10 op2=2/>
+; PF1-NEXT:     <INST_CAST op0=6 op1=10 op2=2/>
+; PF1-NEXT:     <INST_CAST op0=9 op1=11 op2=5/>
+; PF1-NEXT:     <INST_CAST op0=8 op1=11 op2=5/>
+; PF1-NEXT:     <INST_CAST op0=13 op1=11 op2=6/>
+; PF1-NEXT:     <INST_CAST op0=10 op1=11 op2=6/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @TestCasts() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = trunc i32 257 to i8
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = trunc i32 %3 to i8
+; TD2-NEXT:   %5 = zext i32 257 to i64
+; TD2-NEXT:   %6 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %7 = zext i32 %6 to i64
+; TD2-NEXT:   %8 = sext i32 -1 to i64
+; TD2-NEXT:   %9 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %10 = sext i32 %9 to i64
+; TD2-NEXT:   %11 = uitofp i32 1 to float
+; TD2-NEXT:   %12 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %13 = uitofp i32 %12 to float
+; TD2-NEXT:   %14 = sitofp i32 -1 to float
+; TD2-NEXT:   %15 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %16 = sitofp i32 %15 to float
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_CAST op0=5 op1=1 op2=0/>
+; PF2-NEXT:     <INST_CAST op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_CAST op0=7 op1=10 op2=1/>
+; PF2-NEXT:     <INST_CAST op0=4 op1=10 op2=1/>
+; PF2-NEXT:     <INST_CAST op0=8 op1=10 op2=2/>
+; PF2-NEXT:     <INST_CAST op0=6 op1=10 op2=2/>
+; PF2-NEXT:     <INST_CAST op0=8 op1=11 op2=5/>
+; PF2-NEXT:     <INST_CAST op0=8 op1=11 op2=5/>
+; PF2-NEXT:     <INST_CAST op0=12 op1=11 op2=6/>
+; PF2-NEXT:     <INST_CAST op0=10 op1=11 op2=6/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that if a ptrtoint is used in something other than known scalar operations,
+; it gets copied to the bitcode file
+; TODO(kschimpf): Remove this once all scalar operations have been handled.
+define void @TestSavedPtrToInt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = add i32 %2, 0
+  %4 = call i32 @bar(i32 %2)
+  ret void
+}
+
+; TD1:      define void @TestSavedPtrToInt() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = add i32 %2, 0
+; TD1-NEXT:   %4 = call i32 @bar(i32 %2)
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_BINOP op0=1 op1=3 op2=0/>
+; PF1-NEXT:     <INST_CALL op0=0 op1=22 op2=2/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @TestSavedPtrToInt() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = add i32 %2, 0
+; TD2-NEXT:   %4 = call i32 @bar(i32 %2)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF2-NEXT:     <INST_BINOP op0=1 op1=3 op2=0/>
+; PF2-NEXT:     <INST_CALL op0=0 op1=22 op2=2/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for icmp.
+define void @CastIcmp() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = icmp eq i32 1, 2
+  %5 = icmp eq i32 %2, 2
+  %6 = icmp eq i32 1, %3
+  %7 = icmp eq i32 %2, %3
+  %8 = icmp eq i32 %3, %2
+  ret void
+}
+
+; TD1:      define void @CastIcmp() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %4 = icmp eq i32 1, 2
+; TD1-NEXT:   %5 = icmp eq i32 %2, 2
+; TD1-NEXT:   %6 = icmp eq i32 1, %3
+; TD1-NEXT:   %7 = icmp eq i32 %2, %3
+; TD1-NEXT:   %8 = icmp eq i32 %3, %2
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=6 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CMP2 op0=6 op1=5 op2=32/>
+; PF1-NEXT:     <INST_CMP2 op0=3 op1=6 op2=32/>
+; PF1-NEXT:     <INST_CMP2 op0=8 op1=3 op2=32/>
+; PF1-NEXT:     <INST_CMP2 op0=5 op1=4 op2=32/>
+; PF1-NEXT:     <INST_CMP2 op0=5 op1=6 op2=32/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @CastIcmp() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = icmp eq i32 1, 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = icmp eq i32 %3, 2
+; TD2-NEXT:   %5 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %6 = icmp eq i32 1, %5
+; TD2-NEXT:   %7 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %8 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %9 = icmp eq i32 %7, %8
+; TD2-NEXT:   %10 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %11 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %12 = icmp eq i32 %10, %11
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_CMP2 op0=4 op1=3 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=2 op1=4 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=6 op1=7 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=4 op1=8 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=9 op1=5 op2=32/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for Select.
+define void @CastSelect() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = select i1 true, i32 1, i32 2
+  %5 = select i1 true, i32 %2, i32 2
+  %6 = select i1 true, i32 1, i32 %3
+  %7 = select i1 true, i32 %2, i32 %3
+  %8 = select i1 true, i32 %3, i32 %2
+  ret void
+}
+
+; TD1:      define void @CastSelect() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD1-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD1-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD1-NEXT:   %4 = select i1 true, i32 1, i32 2
+; TD1-NEXT:   %5 = select i1 true, i32 %2, i32 2
+; TD1-NEXT:   %6 = select i1 true, i32 1, i32 %3
+; TD1-NEXT:   %7 = select i1 true, i32 %2, i32 %3
+; TD1-NEXT:   %8 = select i1 true, i32 %3, i32 %2
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK>
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF1-NEXT:     <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT:     <INST_CAST op0=7 op1=0 op2=9/>
+; PF1-NEXT:     <INST_VSELECT op0=7 op1=6 op2=4/>
+; PF1-NEXT:     <INST_VSELECT op0=3 op1=7 op2=5/>
+; PF1-NEXT:     <INST_VSELECT op0=9 op1=3 op2=6/>
+; PF1-NEXT:     <INST_VSELECT op0=5 op1=4 op2=7/>
+; PF1-NEXT:     <INST_VSELECT op0=5 op1=6 op2=8/>
+; PF1-NEXT:     <INST_RET/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @CastSelect() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = select i1 true, i32 1, i32 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = select i1 true, i32 %3, i32 2
+; TD2-NEXT:   %5 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %6 = select i1 true, i32 1, i32 %5
+; TD2-NEXT:   %7 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %8 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %9 = select i1 true, i32 %7, i32 %8
+; TD2-NEXT:   %10 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %11 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %12 = select i1 true, i32 %10, i32 %11
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_VSELECT op0=5 op1=4 op2=2/>
+; PF2-NEXT:     <INST_VSELECT op0=2 op1=5 op2=3/>
+; PF2-NEXT:     <INST_VSELECT op0=7 op1=8 op2=4/>
+; PF2-NEXT:     <INST_VSELECT op0=4 op1=9 op2=5/>
+; PF2-NEXT:     <INST_VSELECT op0=10 op1=5 op2=6/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>