Remove the bitcast (of global values) from load instructions.

Adds the eliding of bitcasts that are used as an argument to instructions that expect normalized pointers. Currently, the checked in code only checks normalized pointers for load instructions. Hence, the restriction to load instructions. As more instructions are modified to check for normalized pointers, this code will apply to those instructions. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3544 R=mseaborn@chromium.org Review URL: https://codereview.chromium.org/21614002
author: Karl Schimpf <kschimpf@google.com> 2013-08-02 09:46:26 -0700
committer: Karl Schimpf <kschimpf@google.com> 2013-08-02 09:46:26 -0700
commit: 7578d662dba568ead351604240cfd476993a67a4 (patch)
tree: 6c530fb7c4c4eca3545cf927c76fef4571881607
parent: a7665e96f34c4a981d59c78b0b872b8f0b100cb9 (diff)
3 files changed, 241 insertions, 14 deletions
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
index 40f8842b3b..ede06b2457 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -1315,28 +1315,28 @@ bool NaClBitcodeReader::InstallInstruction(
 }
 
 Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
-  // Note: Currently only knows how to add inttoptr type conversion, since
-  // this is the only elided instruction in the bitcode writer.
+  // Note: Currently only knows how to add inttoptr and bitcast type
+  // conversions for non-phi nodes, since these are the only elided
+  // instructions in the bitcode writer.
+  //
   // TODO(kschimpf): Generalize this as we expand elided conversions.
-  Value *Conversion = 0;
+  Instruction *Conversion = 0;
   Type *OpTy = Op->getType();
   if (OpTy == T) return Op;
 
-  // Following while loop is only run once. It is used to break on
-  // erroneous conditions.
-  while (true) {
-    if (!OpTy->isIntegerTy()) break;
-    if (!T->isPointerTy()) break;
-    Instruction *I = CastInst::Create(Instruction::IntToPtr, Op, T);
-    if (InstallInstruction(BB, I)) break;
-    Conversion = I;
-    break;
+  if (OpTy->isPointerTy()) {
+    Conversion = new BitCastInst(Op, T);
+  } else if (OpTy->isIntegerTy()) {
+    Conversion = new IntToPtrInst(Op, T);
   }
+
   if (Conversion == 0) {
     std::string Message;
     raw_string_ostream StrM(Message);
     StrM << "Can't convert " << *Op << " to type " << *T << "\n";
     Error(StrM.str());
+  } else {
+    InstallInstruction(BB, Conversion);
   }
   return Conversion;
 }
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index 7da01f005e..11e1161bf8 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -450,7 +450,7 @@ static bool AllowsNormalizedPtr(const Value *V, const Instruction *Arg) {
 // Returns true if the bitcode reader and writer can assume that the
 // uses of the given inttotpr I2P allow normalized pointers (as
 // defined in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
-static bool IntToPtrUsesAllowEliding(const Instruction *I2P) {
+static bool AllUsesAllowNormalizedPtr(const Instruction *I2P) {
   for (Value::const_use_iterator u = I2P->use_begin(), e = I2P->use_end();
        u != e; ++u) {
     if (!AllowsNormalizedPtr(cast<Value>(*u), I2P)) return false;
@@ -461,6 +461,12 @@ static bool IntToPtrUsesAllowEliding(const Instruction *I2P) {
   return true;
 }
 
+// Returns true if the value is an InherentPtr (as defined in
+// llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp).
+static inline bool IsInherentPtr(const Value *V) {
+  return isa<AllocaInst>(V) || isa<GlobalValue>(V);
+}
+
 // Note: This function is based on the comments in
 // llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
 const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
@@ -470,10 +476,18 @@ const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
     switch (I->getOpcode()) {
     default:
       break;
+    case Instruction::BitCast:
+      if (I->getType()->isPointerTy() &&
+	  AllUsesAllowNormalizedPtr(I) &&
+	  IsInherentPtr(I->getOperand(0))) {
+	return ElideCasts(I->getOperand(0));
+      }
+      break;
     case Instruction::IntToPtr:
-      if (IntToPtrUsesAllowEliding(I)) {
+      if (AllUsesAllowNormalizedPtr(I)) {
         return ElideCasts(I->getOperand(0));
       }
+      break;
     }
   }
   return V;
diff --git a/test/NaCl/Bitcode/bitcast-elide.ll b/test/NaCl/Bitcode/bitcast-elide.ll
new file mode 100644
index 0000000000..1f8ff586ea
--- /dev/null
+++ b/test/NaCl/Bitcode/bitcast-elide.ll
@@ -0,0 +1,213 @@
+; Test how we handle eliding (pointer) bitcast instructions.
+; TODO(kschimpf) Expand these tests as further CL's are added for issue 3544.
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=PF1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=1 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD1
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze --pnacl-version=2 | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+@bytes = internal global [7 x i8] c"abcdefg"
+
+; Test that we elide the simple case of global.
+define void @SimpleLoad() {
+  %1 = bitcast [7 x i8]* @bytes to i32*
+  %2 = load i32* %1, align 4
+  ret void
+}
+
+; TD1:      define void @SimpleLoad() {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=2 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=1 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=8/>
+; PF1-NEXT:  </FUNCTION_BLOCK>
+
+; TD2:      define void @SimpleLoad() {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=2 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_RET abbrevid=8/>
+; PF2-NEXT:  </FUNCTION_BLOCK>
+
+; Test that we elide the simple case of an alloca.
+define void @SimpleLoadAlloca() {
+  %1 = alloca i8, i32 4, align 4
+  %2 = bitcast i8* %1 to i32*
+  %3 = load i32* %2, align 4
+  ret void
+}
+
+; TD1:      define void @SimpleLoadAlloca() {
+; TD1-NEXT:   %1 = alloca i8, i32 4, align 4
+; TD1-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD1-NEXT:   %3 = load i32* %2, align 4
+; TD1-NEXT:   ret void
+; TD1-NEXT: }
+
+; PF1:        <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF1-NEXT:     <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:     <CONSTANTS_BLOCK
+; PF1:          </CONSTANTS_BLOCK>
+; PF1-NEXT:     <INST_ALLOCA op0=1 op1=3/>
+; PF1-NEXT:     <INST_CAST abbrevid=7 op0=1 op1=1 op2=11/>
+; PF1-NEXT:     <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:     <INST_RET abbrevid=8/>
+; PF1-NEXT:   </FUNCTION_BLOCK>
+
+; TD2:      define void @SimpleLoadAlloca() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 4
+; TD2-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD2-NEXT:   %3 = load i32* %2, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF2-NEXT:     <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:     <CONSTANTS_BLOCK
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=3/>
+; PF2-NEXT:     <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:     <INST_RET abbrevid=8/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; Test that we don't elide an bitcast if one of its uses is not a load.
+define i32* @NonsimpleLoad(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  ret i32* %1
+}
+
+; TD1:      define i32* @NonsimpleLoad(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   ret i32* %1
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=2/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32* @NonsimpleLoad(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret i32* %1
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=6 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=2/>
+; PF2:       </FUNCTION_BLOCK>
+
+; Test that we can handle multiple bitcasts.
+define i32 @TwoLoads(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = bitcast [7 x i8]* @bytes to i32*       
+  %4 = load i32* %3, align 4
+  %5 = add i32 %2, %4
+  ret i32 %5
+}
+
+; TD1:      define i32 @TwoLoads(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %4 = load i32* %3, align 4
+; TD1-NEXT:   %5 = add i32 %2, %4
+; TD1-NEXT:   ret i32 %5
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=8 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=4 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_BINOP abbrevid=5 op0=3 op1=1 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoads(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %4 = load i32* %3, align 4
+; TD2-NEXT:   %5 = add i32 %2, %4
+; TD2-NEXT:   ret i32 %5
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=3 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF2:       </FUNCTION_BLOCK>
+
+; Test how we we duplicate bitcasts, even if optimized in the input file.
+define i32 @TwoLoadOpt(i32 %i) {
+  %1 = bitcast [7 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; TD1:      define i32 @TwoLoadOpt(i32 %i) {
+; TD1-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT:   %2 = load i32* %1, align 4
+; TD1-NEXT:   %3 = load i32* %1, align 4
+; TD1-NEXT:   %4 = add i32 %2, %3
+; TD1-NEXT:   ret i32 %4
+; TD1-NEXT: }
+
+; PF1:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF1-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF1-NEXT:    <INST_CAST abbrevid=7 op0=2 op1=1 op2=11/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=1 op1=3 op2=0/>
+; PF1-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0/>
+; PF1-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF1-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF1:       </FUNCTION_BLOCK>
+
+; TD2:      define i32 @TwoLoadOpt(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT:   %4 = load i32* %3, align 4
+; TD2-NEXT:   %5 = add i32 %2, %4
+; TD2-NEXT:   ret i32 %5
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK NumWords=7 BlockCodeSize=4>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=2 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_LOAD abbrevid=4 op0=3 op1=3 op2=0 op3=0/>
+; PF2-NEXT:    <INST_BINOP abbrevid=5 op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET abbrevid=9 op0=1/>
+; PF2:  </FUNCTION_BLOCK>
author	Karl Schimpf <kschimpf@google.com>	2013-08-02 09:46:26 -0700
committer	Karl Schimpf <kschimpf@google.com>	2013-08-02 09:46:26 -0700
commit	7578d662dba568ead351604240cfd476993a67a4 (patch)
tree	6c530fb7c4c4eca3545cf927c76fef4571881607
parent	a7665e96f34c4a981d59c78b0b872b8f0b100cb9 (diff)