3 files changed, 241 insertions, 14 deletions
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
index 40f8842b3b..ede06b2457 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -1315,28 +1315,28 @@ bool NaClBitcodeReader::InstallInstruction(
 }
 
 Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
-  // Note: Currently only knows how to add inttoptr type conversion, since
-  // this is the only elided instruction in the bitcode writer.
+  // Note: Currently only knows how to add inttoptr and bitcast type
+  // conversions for non-phi nodes, since these are the only elided
+  // instructions in the bitcode writer.
+  //
   // TODO(kschimpf): Generalize this as we expand elided conversions.
-  Value *Conversion = 0;
+  Instruction *Conversion = 0;
   Type *OpTy = Op->getType();
   if (OpTy == T) return Op;
 
-  // Following while loop is only run once. It is used to break on
-  // erroneous conditions.
-  while (true) {
-    if (!OpTy->isIntegerTy()) break;
-    if (!T->isPointerTy()) break;
-    Instruction *I = CastInst::Create(Instruction::IntToPtr, Op, T);
-    if (InstallInstruction(BB, I)) break;
-    Conversion = I;
-    break;
+  if (OpTy->isPointerTy()) {
+    Conversion = new BitCastInst(Op, T);
+  } else if (OpTy->isIntegerTy()) {
+    Conversion = new IntToPtrInst(Op, T);
   }
+
   if (Conversion == 0) {
     std::string Message;
     raw_string_ostream StrM(Message);
     StrM << "Can't convert " << *Op << " to type " << *T << "\n";
     Error(StrM.str());
+  } else {
+    InstallInstruction(BB, Conversion);
   }
   return Conversion;
 }
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index 7da01f005e..11e1161bf8 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -450,7 +450,7 @@ static bool AllowsNormalizedPtr(const Value *V, const Instruction *Arg) {
 // Returns true if the bitcode reader and writer can assume that the
 // uses of the given inttotpr I2P allow normalized pointers (as
 // defined in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
-static bool IntToPtrUsesAllowEliding(const Instruction *I2P) {
+static bool AllUsesAllowNormalizedPtr(const Instruction *I2P) {
   for (Value::const_use_iterator u = I2P->use_begin(), e = I2P->use_end();
        u != e; ++u) {
     if (!AllowsNormalizedPtr(cast<Value>(*u), I2P)) return false;
@@ -461,6 +461,12 @@ static bool IntToPtrUsesAllowEliding(const Instruction *I2P) {
   return true;
 }
 
+// Returns true if the value is an InherentPtr (as defined in
+// llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
+static inline bool IsInherentPtr(const Value *V) {
+  return isa<AllocaInst>(V) || isa<GlobalValue>(V);
+}
+
 // Note: This function is based on the comments in
 // llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
 const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
@@ -470,10 +476,18 @@ const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
     switch (I->getOpcode()) {
     default:
       break;
+    case Instruction::BitCast:
+      if (I->getType()->isPointerTy() &&
+	  AllUsesAllowNormalizedPtr(I) &&
+	  IsInherentPtr(I->getOperand(0))) {
+	return ElideCasts(I->getOperand(0));
+      }
+      break;
     case Instruction::IntToPtr:
-      if (IntToPtrUsesAllowEliding(I)) {
+      if (AllUsesAllowNormalizedPtr(I)) {
         return ElideCasts(I->getOperand(0));
       }
+      break;
     }
   }
   return V;
diff --git a/test/NaCl/Bitcode/bitcast-elide.ll b/test/NaCl/Bitcode/bitcast-elide.ll
new file mode 100644
index 0000000000..1f8ff586ea
--- /dev/null
+++ b/test/NaCl/Bitcode/bitcast-elide.ll
@@ -0,0 +1,213 @@
+; Test how we handle eliding (pointer) bitcast instructions.
+; TODO(kschimpf) Expand these tests as further CL's are added for issue 3544.
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=PF1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+@bytes = internal global [7 x i8] c"abcdefg"
+
+; Test that we elide the simple case of global.
+define void @SimpleLoad() {
+  %1 = bitcast [7 x i8]* @bytes to i32*
+  %2 = load i32* %1, align 4
+  ret void
+}
+
+; TD1:      define void @SimpleLoad() {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=2 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=1 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=8/>
+; PF1-NEXT:  </FUNCTION_BLOCK>
+
+; TD2:      define void @SimpleLoad() {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=2 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_RET abbrevid=8/>
+; PF2-NEXT:  </FUNCTION_BLOCK>
+
+; Test that we elide the simple case of an alloca.
+define void @SimpleLoadAlloca() {
+  %1 = alloca i8, i32 4, align 4
+  %2 = bitcast i8* %1 to i32*
+  %3 = load i32* %2, align 4
+  ret void
+}
+
+; TD1:      define void @SimpleLoadAlloca() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 4
+; TD1-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD1-NEXT:   %3 = load i32* %2, align 4
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF1-NEXT:     <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:     <CONSTANTS_BLOCK
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=3/>
+; PF1-NEXT:     <INST_CAST abbrevid=7 op0=1 op1=1 op2=11/>
+; PF1-NEXT:     <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:     <INST_RET abbrevid=8/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @SimpleLoadAlloca() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 4
+; TD2-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD2-NEXT:   %3 = load i32* %2, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF2-NEXT:     <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:     <CONSTANTS_BLOCK
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=3/>
+; PF2-NEXT:     <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:     <INST_RET abbrevid=8/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; Test that we don't elide an bitcast if one of its uses is not a load.
+define i32* @NonsimpleLoad(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  ret i32* %1
+}
+
+; TD1:      define i32* @NonsimpleLoad(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   ret i32* %1
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=2/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32* @NonsimpleLoad(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret i32* %1
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=2/>
+; PF2:       </FUNCTION_BLOCK>
+
+; Test that we can handle multiple bitcasts.
+define i32 @TwoLoads(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = bitcast [7 x i8]* @bytes to i32*       
+  %4 = load i32* %3, align 4
+  %5 = add i32 %2, %4
+  ret i32 %5
+}
+
+; TD1:      define i32 @TwoLoads(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %4 = load i32* %3, align 4
+; TD1-NEXT:   %5 = add i32 %2, %4
+; TD1-NEXT:   ret i32 %5
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=8 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=4 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_BINOP abbrevid=5 op0=3 op1=1 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoads(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %4 = load i32* %3, align 4
+; TD2-NEXT:   %5 = add i32 %2, %4
+; TD2-NEXT:   ret i32 %5
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=3 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF2:       </FUNCTION_BLOCK>
+
+; Test how we we duplicate bitcasts, even if optimized in the input file.
+define i32 @TwoLoadOpt(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; TD1:      define i32 @TwoLoadOpt(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = load i32* %1, align 4
+; TD1-NEXT:   %4 = add i32 %2, %3
+; TD1-NEXT:   ret i32 %4
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0/>
+; PF1-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoadOpt(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %4 = load i32* %3, align 4
+; TD2-NEXT:   %5 = add i32 %2, %4
+; TD2-NEXT:   ret i32 %5
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=3 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF2:  </FUNCTION_BLOCK>