aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp114
-rw-r--r--lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h96
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp2
-rw-r--r--test/NaCl/Bitcode/bitcast-elide.ll145
-rw-r--r--test/NaCl/Bitcode/inttoptr-elide.ll96
-rw-r--r--test/NaCl/Bitcode/ptrtoint-elide.ll587
6 files changed, 846 insertions, 194 deletions
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
index a38b18afa1..fbe1fc0165 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -36,7 +36,6 @@ void NaClBitcodeReader::FreeState() {
std::vector<Type*>().swap(TypeList);
ValueList.clear();
- std::vector<BasicBlock*>().swap(FunctionBBs);
std::vector<Function*>().swap(FunctionsWithBodies);
DeferredFunctionInfo.clear();
}
@@ -1284,40 +1283,56 @@ bool NaClBitcodeReader::InstallInstruction(
return false;
}
-Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, BasicBlock *BB) {
+CastInst *
+NaClBitcodeReader::CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+ Type *CT, Value *V, bool DeferInsertion) {
+ if (BBIndex >= FunctionBBs.size())
+ report_fatal_error("CreateCast on unknown basic block");
+ BasicBlockInfo &BBInfo = FunctionBBs[BBIndex];
+ NaClBitcodeReaderCast ModeledCast(Op, CT, V);
+ CastInst *Cast = BBInfo.CastMap[ModeledCast];
+ if (Cast == NULL) {
+ Cast = CastInst::Create(Op, V, CT);
+ BBInfo.CastMap[ModeledCast] = Cast;
+ if (DeferInsertion) {
+ BBInfo.PhiCasts.push_back(Cast);
+ }
+ }
+ if (!DeferInsertion && Cast->getParent() == 0) {
+ InstallInstruction(BBInfo.BB, Cast);
+ }
+ return Cast;
+}
+
+Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, unsigned BBIndex,
+ bool DeferInsertion) {
if (Op->getType()->isPointerTy()) {
- Instruction *Conversion = new PtrToIntInst(Op, IntPtrType);
- InstallInstruction(BB, Conversion);
- return Conversion;
+ return CreateCast(BBIndex, Instruction::PtrToInt, IntPtrType, Op,
+ DeferInsertion);
}
return Op;
}
-Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T, BasicBlock *BB) {
+Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T,
+ unsigned BBIndex) {
// Note: Currently only knows how to add inttoptr and bitcast type
// conversions for non-phi nodes, since these are the only elided
// instructions in the bitcode writer.
//
// TODO(kschimpf): Generalize this as we expand elided conversions.
- Instruction *Conversion = 0;
Type *OpTy = Op->getType();
if (OpTy == T) return Op;
if (OpTy->isPointerTy()) {
- Conversion = new BitCastInst(Op, T);
+ return CreateCast(BBIndex, Instruction::BitCast, T, Op);
} else if (OpTy == IntPtrType) {
- Conversion = new IntToPtrInst(Op, T);
+ return CreateCast(BBIndex, Instruction::IntToPtr, T, Op);
}
- if (Conversion == 0) {
- std::string Message;
- raw_string_ostream StrM(Message);
- StrM << "Can't convert " << *Op << " to type " << *T << "\n";
- Error(StrM.str());
- } else {
- InstallInstruction(BB, Conversion);
- }
- return Conversion;
+ std::string Message;
+ raw_string_ostream StrM(Message);
+ StrM << "Can't convert " << *Op << " to type " << *T << "\n";
+ report_fatal_error(StrM.str());
}
Type *NaClBitcodeReader::ConvertTypeToScalarType(Type *T) {
@@ -1396,9 +1411,11 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
return Error("Invalid DECLAREBLOCKS record");
// Create all the basic blocks for the function.
FunctionBBs.resize(Record[0]);
- for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
- FunctionBBs[i] = BasicBlock::Create(Context, "", F);
- CurBB = FunctionBBs[0];
+ for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) {
+ BasicBlockInfo &BBInfo = FunctionBBs[i];
+ BBInfo.BB = BasicBlock::Create(Context, "", F);
+ }
+ CurBB = FunctionBBs.at(0).BB;
continue;
case naclbitc::FUNC_CODE_INST_BINOP: {
@@ -1410,8 +1427,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
OpNum+1 > Record.size())
return Error("Invalid BINOP record");
- LHS = ConvertOpToScalar(LHS, CurBB);
- RHS = ConvertOpToScalar(RHS, CurBB);
+ LHS = ConvertOpToScalar(LHS, CurBBNo);
+ RHS = ConvertOpToScalar(RHS, CurBBNo);
int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
if (Opc == -1) return Error("Invalid BINOP record");
@@ -1472,7 +1489,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
case Instruction::SExt:
case Instruction::UIToFP:
case Instruction::SIToFP:
- Op = ConvertOpToScalar(Op, CurBB);
+ Op = ConvertOpToScalar(Op, CurBBNo);
break;
default:
break;
@@ -1493,8 +1510,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, &OpNum, NextValueNo, &Cond))
return Error("Invalid SELECT record");
- TrueVal = ConvertOpToScalar(TrueVal, CurBB);
- FalseVal = ConvertOpToScalar(FalseVal, CurBB);
+ TrueVal = ConvertOpToScalar(TrueVal, CurBBNo);
+ FalseVal = ConvertOpToScalar(FalseVal, CurBBNo);
// expect i1
if (Cond->getType() != Type::getInt1Ty(Context))
@@ -1514,8 +1531,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
OpNum+1 != Record.size())
return Error("Invalid CMP record");
- LHS = ConvertOpToScalar(LHS, CurBB);
- RHS = ConvertOpToScalar(RHS, CurBB);
+ LHS = ConvertOpToScalar(LHS, CurBBNo);
+ RHS = ConvertOpToScalar(RHS, CurBBNo);
if (LHS->getType()->isFPOrFPVectorTy())
I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -1622,9 +1639,6 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
Type *Ty = getTypeByID(Record[0]);
if (!Ty) return Error("Invalid PHI record");
- // TODO(kschimpf): Fix handling of converting types for values,
- // to handle elided casts, once the bitcode writer knows how.
-
PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
@@ -1636,8 +1650,16 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
V = getValueSigned(Record, 1+i, NextValueNo);
else
V = getValue(Record, 1+i, NextValueNo);
- BasicBlock *BB = getBasicBlock(Record[2+i]);
+ unsigned BBIndex = Record[2+i];
+ BasicBlock *BB = getBasicBlock(BBIndex);
if (!V || !BB) return Error("Invalid PHI record");
+ if (GetPNaClVersion() == 2 && Ty == IntPtrType) {
+ // Delay installing scalar casts until all instructions of
+ // the function are rendered. This guarantees that we insert
+ // the conversion just before the incoming edge (or use an
+ // existing conversion if already installed).
+ V = ConvertOpToScalar(V, BBIndex, /* DeferInsertion = */ true);
+ }
PN->addIncoming(V, BB);
}
I = PN;
@@ -1672,7 +1694,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
Type *T = getTypeByID(Record[2]);
if (T == 0)
return Error("Invalid type for load instruction");
- Op = ConvertOpToType(Op, T->getPointerTo(), CurBB);
+ Op = ConvertOpToType(Op, T->getPointerTo(), CurBBNo);
if (Op == 0) return true;
I = new LoadInst(Op, "", false, (1 << Record[OpNum]) >> 1);
break;
@@ -1697,8 +1719,8 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
case 2:
if (OpNum+1 != Record.size())
return Error("Invalid STORE record");
- Val = ConvertOpToScalar(Val, CurBB);
- Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBB);
+ Val = ConvertOpToScalar(Val, CurBBNo);
+ Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBBNo);
I = new StoreInst(Val, Ptr, false, (1 << Record[OpNum]) >> 1);
break;
}
@@ -1767,7 +1789,7 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
// If this was a terminator instruction, move to the next block.
if (isa<TerminatorInst>(I)) {
++CurBBNo;
- CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+ CurBB = getBasicBlock(CurBBNo);
}
// Non-void values get registered in the value table for future use.
@@ -1777,6 +1799,24 @@ bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
OutOfRecordLoop:
+ // Add PHI conversions to corresponding incoming block, if not
+ // already in the block. Also clear all conversions after fixing
+ // PHI conversions.
+ for (unsigned I = 0, NumBBs = FunctionBBs.size(); I < NumBBs; ++I) {
+ BasicBlockInfo &BBInfo = FunctionBBs[I];
+ std::vector<CastInst*> &PhiCasts = BBInfo.PhiCasts;
+ for (std::vector<CastInst*>::iterator Iter = PhiCasts.begin(),
+ IterEnd = PhiCasts.end(); Iter != IterEnd; ++Iter) {
+ CastInst *Cast = *Iter;
+ if (Cast->getParent() == 0) {
+ BasicBlock *BB = BBInfo.BB;
+ BB->getInstList().insert(BB->getTerminator(), Cast);
+ }
+ }
+ PhiCasts.clear();
+ BBInfo.CastMap.clear();
+ }
+
// Check the function list for unresolved values.
if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
if (A->getParent() == 0) {
@@ -1793,7 +1833,7 @@ OutOfRecordLoop:
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
- std::vector<BasicBlock*>().swap(FunctionBBs);
+ FunctionBBs.clear();
DEBUG(dbgs() << "-> ParseFunctionBody\n");
return false;
}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
index 814ef44efb..762088887f 100644
--- a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
@@ -21,6 +21,7 @@
#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
#include "llvm/GVMaterializer.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ValueHandle.h"
@@ -29,6 +30,46 @@
namespace llvm {
class MemoryBuffer;
class LLVMContext;
+ class CastInst;
+
+// Models a Cast. Used to cache casts created in a basic block by the
+// PNaCl bitcode reader.
+struct NaClBitcodeReaderCast {
+ // Fields of the conversion.
+ Instruction::CastOps Op;
+ Type *Ty;
+ Value *Val;
+
+ NaClBitcodeReaderCast(Instruction::CastOps Op, Type *Ty, Value *Val)
+ : Op(Op), Ty(Ty), Val(Val) {}
+};
+
+// Models the data structure used to hash/compare Casts in a DenseMap.
+template<>
+struct DenseMapInfo<NaClBitcodeReaderCast> {
+public:
+ static NaClBitcodeReaderCast getEmptyKey() {
+ return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+ DenseMapInfo<Type*>::getEmptyKey(),
+ DenseMapInfo<Value*>::getEmptyKey());
+ }
+ static NaClBitcodeReaderCast getTombstoneKey() {
+ return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+ DenseMapInfo<Type*>::getTombstoneKey(),
+ DenseMapInfo<Value*>::getTombstoneKey());
+ }
+ static unsigned getHashValue(const NaClBitcodeReaderCast &C) {
+ std::pair<int, std::pair<Type*, Value*> > Tuple;
+ Tuple.first = C.Op;
+ Tuple.second.first = C.Ty;
+ Tuple.second.second = C.Val;
+ return DenseMapInfo<std::pair<int, std::pair<Type*, Value*> > >::getHashValue(Tuple);
+ }
+ static bool isEqual(const NaClBitcodeReaderCast &LHS,
+ const NaClBitcodeReaderCast &RHS) {
+ return LHS.Op == RHS.Op && LHS.Ty == RHS.Ty && LHS.Val == RHS.Val;
+ }
+};
//===----------------------------------------------------------------------===//
// NaClBitcodeReaderValueList Class
@@ -83,8 +124,8 @@ public:
// already been declared.
bool createValueFwdRef(unsigned Idx, Type *Ty);
- // Declares the type of the forward-referenced constant Idx. Returns
- // 0 if an error occurred.
+ // Declares the type of the forward-referenced constant Idx.
+ // Returns 0 if an error occurred.
// TODO(kschimpf) Convert these to be like createValueFwdRef and
// getValueFwdRef.
Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
@@ -103,7 +144,7 @@ public:
// was forward referenced).
void AssignValue(Value *V, unsigned Idx);
- // Assigns Idx to the given global variable. If the Idx currently has
+ // Assigns Idx to the given global variable. If the Idx currently has
// a forward reference (built by createGlobalVarFwdRef(unsigned Idx)),
// replaces uses of the global variable forward reference with the
// value GV.
@@ -133,9 +174,20 @@ class NaClBitcodeReader : public GVMaterializer {
NaClBitcodeReaderValueList ValueList;
SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
+ // Holds information about each BasicBlock in the function being read.
+ struct BasicBlockInfo {
+ // A basic block within the function being modeled.
+ BasicBlock *BB;
+ // The set of generated conversions.
+ DenseMap<NaClBitcodeReaderCast, CastInst*> CastMap;
+ // The set of generated conversions that were added for phi nodes,
+ // and may need thier parent basic block defined.
+ std::vector<CastInst*> PhiCasts;
+ };
+
/// FunctionBBs - While parsing a function body, this is a list of the basic
/// blocks for the function.
- std::vector<BasicBlock*> FunctionBBs;
+ std::vector<BasicBlockInfo> FunctionBBs;
// When reading the module header, this list is populated with functions that
// have bodies later in the file.
@@ -147,7 +199,7 @@ class NaClBitcodeReader : public GVMaterializer {
UpgradedIntrinsicMap UpgradedIntrinsics;
// Several operations happen after the module header has been read, but
- // before function bodies are processed. This keeps track of whether
+ // before function bodies are processed. This keeps track of whether
// we've done this yet.
bool SeenFirstFunctionBody;
@@ -226,14 +278,14 @@ private:
return Header.GetPNaClVersion();
}
Type *getTypeByID(unsigned ID);
- // Returns the value associated with ID. The value must already exist,
+ // Returns the value associated with ID. The value must already exist,
// or a forward referenced value created by getOrCreateFnVaueByID.
Value *getFnValueByID(unsigned ID) {
return ValueList.getValueFwdRef(ID);
}
BasicBlock *getBasicBlock(unsigned ID) const {
if (ID >= FunctionBBs.size()) return 0; // Invalid ID
- return FunctionBBs[ID];
+ return FunctionBBs[ID].BB;
}
/// \brief Read a value out of the specified record from slot '*Slot'.
@@ -273,18 +325,30 @@ private:
return getFnValueByID(ValNo);
}
- /// \brief Add instructions to cast Op to the given type T into block BB.
- /// Follows rules for pointer conversion as defined in
- /// llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
+ /// \brief Create an (elided) cast instruction for basic block
+ /// BBIndex. Op is the type of cast. V is the value to cast. CT
+ /// is the type to convert V to. DeferInsertion defines whether the
+ /// generated conversion should also be installed into basic block
+ /// BBIndex. Note: For PHI nodes, we don't insert when created
+ /// (i.e. DeferInsertion=true), since they must be inserted at the end
+ /// of the corresponding incoming basic block.
+ CastInst *CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+ Type *CT, Value *V, bool DeferInsertion = false);
+
+ /// \brief Add instructions to cast Op to the given type T into
+ /// block BBIndex. Follows rules for pointer conversion as defined
+ /// in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
///
/// Returns 0 if unable to generate conversion value (also generates
/// an appropriate error message and calls Error).
- Value *ConvertOpToType(Value *Op, Type *T, BasicBlock *BB);
-
- /// \brief If Op is a scalar value, this is a nop. If Op is a
- /// pointer value, a PtrToInt instruction is inserted (in BB) to
- /// convert Op to an integer.
- Value *ConvertOpToScalar(Value *Op, BasicBlock *BB);
+ Value *ConvertOpToType(Value *Op, Type *T, unsigned BBIndex);
+
+ /// \brief If Op is a scalar value, this is a nop. If Op is a
+ /// pointer value, a PtrToInt instruction is inserted (in BBIndex)
+ /// to convert Op to an integer. For defaults on DeferInsertion,
+ /// see comments for method CreateCast.
+ Value *ConvertOpToScalar(Value *Op, unsigned BBIndex,
+ bool DeferInsertion = false);
/// \brief Returns the corresponding, PNaCl non-pointer equivalent
/// for the given type.
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index bee36e2631..060a6d63f4 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -479,6 +479,7 @@ static bool ExpectsScalarValue(const Value *V, const Instruction *Arg) {
switch (I->getOpcode()) {
default:
return false;
+ case Instruction::PHI:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -497,7 +498,6 @@ static bool ExpectsScalarValue(const Value *V, const Instruction *Arg) {
// instructions:
// case Instruction::IntToPtr:
// case Instruction::BitCast:
- // case Instruction::PHI:
// case Instruction::Call:
}
}
diff --git a/test/NaCl/Bitcode/bitcast-elide.ll b/test/NaCl/Bitcode/bitcast-elide.ll
index eeee69ffef..383673d684 100644
--- a/test/NaCl/Bitcode/bitcast-elide.ll
+++ b/test/NaCl/Bitcode/bitcast-elide.ll
@@ -17,17 +17,19 @@
; ------------------------------------------------------
-@bytes = internal global [7 x i8] c"abcdefg"
+@bytes = internal global [4 x i8] c"abcd"
+
+; ------------------------------------------------------
; Test that we elide the simple case of global.
define void @SimpleLoad() {
- %1 = bitcast [7 x i8]* @bytes to i32*
+ %1 = bitcast [4 x i8]* @bytes to i32*
%2 = load i32* %1, align 4
ret void
}
; TD1: define void @SimpleLoad() {
-; TD1-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: %2 = load i32* %1, align 4
; TD1-NEXT: ret void
; TD1-NEXT: }
@@ -40,7 +42,7 @@ define void @SimpleLoad() {
; PF1-NEXT: </FUNCTION_BLOCK>
; TD2: define void @SimpleLoad() {
-; TD2-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -51,6 +53,8 @@ define void @SimpleLoad() {
; PF2-NEXT: <INST_RET/>
; PF2-NEXT: </FUNCTION_BLOCK>
+; ------------------------------------------------------
+
; Test that we elide the simple case of an alloca.
define void @SimpleLoadAlloca() {
%1 = alloca i8, i32 4, align 4
@@ -67,8 +71,6 @@ define void @SimpleLoadAlloca() {
; TD1-NEXT: }
; PF1: <FUNCTION_BLOCK>
-; PF1-NEXT: <DECLAREBLOCKS op0=1/>
-; PF1-NEXT: <CONSTANTS_BLOCK
; PF1: </CONSTANTS_BLOCK>
; PF1-NEXT: <INST_ALLOCA op0=1 op1=3/>
; PF1-NEXT: <INST_CAST op0=1 op1=1 op2=11/>
@@ -84,23 +86,23 @@ define void @SimpleLoadAlloca() {
; TD2-NEXT: }
; PF2: <FUNCTION_BLOCK>
-; PF2-NEXT: <DECLAREBLOCKS op0=1/>
-; PF2-NEXT: <CONSTANTS_BLOCK
; PF2: </CONSTANTS_BLOCK>
; PF2-NEXT: <INST_ALLOCA op0=1 op1=3/>
; PF2-NEXT: <INST_LOAD op0=1 op1=3 op2=0/>
; PF2-NEXT: <INST_RET/>
; PF2-NEXT: </FUNCTION_BLOCK>
+; ------------------------------------------------------
+
; Test that we don't elide an bitcast if one of its uses is not a load.
define i32* @NonsimpleLoad(i32 %i) {
- %1 = bitcast [7 x i8]* @bytes to i32*
+ %1 = bitcast [4 x i8]* @bytes to i32*
%2 = load i32* %1, align 4
ret i32* %1
}
; TD1: define i32* @NonsimpleLoad(i32 %i) {
-; TD1-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: %2 = load i32* %1, align 4
; TD1-NEXT: ret i32* %1
; TD1-NEXT: }
@@ -113,7 +115,7 @@ define i32* @NonsimpleLoad(i32 %i) {
; PF1: </FUNCTION_BLOCK>
; TD2: define i32* @NonsimpleLoad(i32 %i) {
-; TD2-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
; TD2-NEXT: ret i32* %1
; TD2-NEXT: }
@@ -125,20 +127,22 @@ define i32* @NonsimpleLoad(i32 %i) {
; PF2-NEXT: <INST_RET op0=2/>
; PF2: </FUNCTION_BLOCK>
+; ------------------------------------------------------
+
; Test that we can handle multiple bitcasts.
define i32 @TwoLoads(i32 %i) {
- %1 = bitcast [7 x i8]* @bytes to i32*
+ %1 = bitcast [4 x i8]* @bytes to i32*
%2 = load i32* %1, align 4
- %3 = bitcast [7 x i8]* @bytes to i32*
+ %3 = bitcast [4 x i8]* @bytes to i32*
%4 = load i32* %3, align 4
%5 = add i32 %2, %4
ret i32 %5
}
; TD1: define i32 @TwoLoads(i32 %i) {
-; TD1-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: %2 = load i32* %1, align 4
-; TD1-NEXT: %3 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT: %3 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: %4 = load i32* %3, align 4
; TD1-NEXT: %5 = add i32 %2, %4
; TD1-NEXT: ret i32 %5
@@ -155,12 +159,11 @@ define i32 @TwoLoads(i32 %i) {
; PF1: </FUNCTION_BLOCK>
; TD2: define i32 @TwoLoads(i32 %i) {
-; TD2-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
-; TD2-NEXT: %3 = bitcast [7 x i8]* @bytes to i32*
-; TD2-NEXT: %4 = load i32* %3, align 4
-; TD2-NEXT: %5 = add i32 %2, %4
-; TD2-NEXT: ret i32 %5
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: ret i32 %4
; TD2-NEXT: }
; PF2: <FUNCTION_BLOCK>
@@ -171,17 +174,20 @@ define i32 @TwoLoads(i32 %i) {
; PF2-NEXT: <INST_RET op0=1/>
; PF2: </FUNCTION_BLOCK>
-; Test how we duplicate bitcasts, even if optimized in the input file.
-define i32 @TwoLoadOpt(i32 %i) {
- %1 = bitcast [7 x i8]* @bytes to i32*
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file. This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
+ %1 = bitcast [4 x i8]* @bytes to i32*
%2 = load i32* %1, align 4
%3 = load i32* %1, align 4
%4 = add i32 %2, %3
ret i32 %4
}
-; TD1: define i32 @TwoLoadOpt(i32 %i) {
-; TD1-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1: define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: %2 = load i32* %1, align 4
; TD1-NEXT: %3 = load i32* %1, align 4
; TD1-NEXT: %4 = add i32 %2, %3
@@ -197,13 +203,12 @@ define i32 @TwoLoadOpt(i32 %i) {
; PF1-NEXT: <INST_RET op0=1/>
; PF1: </FUNCTION_BLOCK>
-; TD2: define i32 @TwoLoadOpt(i32 %i) {
-; TD2-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2: define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
-; TD2-NEXT: %3 = bitcast [7 x i8]* @bytes to i32*
-; TD2-NEXT: %4 = load i32* %3, align 4
-; TD2-NEXT: %5 = add i32 %2, %4
-; TD2-NEXT: ret i32 %5
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: ret i32 %4
; TD2-NEXT: }
; PF2: <FUNCTION_BLOCK>
@@ -214,15 +219,87 @@ define i32 @TwoLoadOpt(i32 %i) {
; PF2-NEXT: <INST_RET op0=1/>
; PF2: </FUNCTION_BLOCK>
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file. This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+ %1 = bitcast [4 x i8]* @bytes to i32*
+ %2 = load i32* %1, align 4
+ %3 = load i32* %1, align 4
+ %4 = add i32 %2, %3
+ br label %BB
+
+BB:
+ %5 = load i32* %1, align 4
+ %6 = load i32* %1, align 4
+ %7 = add i32 %5, %6
+ ret i32 %4
+}
+
+; TD1: define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT: %2 = load i32* %1, align 4
+; TD1-NEXT: %3 = load i32* %1, align 4
+; TD1-NEXT: %4 = add i32 %2, %3
+; TD1-NEXT: br label %BB
+; TD1: BB:
+; TD1-NEXT: %5 = load i32* %1, align 4
+; TD1-NEXT: %6 = load i32* %1, align 4
+; TD1-NEXT: %7 = add i32 %5, %6
+; TD1-NEXT: ret i32 %4
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1-NEXT: <DECLAREBLOCKS op0=2/>
+; PF1-NEXT: <INST_CAST op0=2 op1=1 op2=11/>
+; PF1-NEXT: <INST_LOAD op0=1 op1=3 op2=0/>
+; PF1-NEXT: <INST_LOAD op0=2 op1=3 op2=0/>
+; PF1-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT: <INST_BR op0=1/>
+; PF1-NEXT: <INST_LOAD op0=4 op1=3 op2=0/>
+; PF1-NEXT: <INST_LOAD op0=5 op1=3 op2=0/>
+; PF1-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT: <INST_RET op0=4/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: %2 = load i32* %1, align 4
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: br label %BB
+; TD2: BB:
+; TD2-NEXT: %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: %6 = load i32* %5, align 4
+; TD2-NEXT: %7 = load i32* %5, align 4
+; TD2-NEXT: %8 = add i32 %6, %7
+; TD2-NEXT: ret i32 %4
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2-NEXT: <DECLAREBLOCKS op0=2/>
+; PF2-NEXT: <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT: <INST_LOAD op0=3 op1=3 op2=0/>
+; PF2-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT: <INST_BR op0=1/>
+; PF2-NEXT: <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT: <INST_LOAD op0=6 op1=3 op2=0/>
+; PF2-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT: <INST_RET op0=4/>
+; PF2: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
; Test that we elide the simple case of bitcast for a store.
define void @SimpleStore(i32 %i) {
- %1 = bitcast [7 x i8]* @bytes to i32*
+ %1 = bitcast [4 x i8]* @bytes to i32*
store i32 %i, i32* %1, align 4
ret void
}
; TD1: define void @SimpleStore(i32 %i) {
-; TD1-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD1-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD1-NEXT: store i32 %i, i32* %1, align 4
; TD1-NEXT: ret void
; TD1-NEXT: }
@@ -235,7 +312,7 @@ define void @SimpleStore(i32 %i) {
; PF1: </FUNCTION_BLOCK>
; TD2: define void @SimpleStore(i32 %i) {
-; TD2-NEXT: %1 = bitcast [7 x i8]* @bytes to i32*
+; TD2-NEXT: %1 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: store i32 %i, i32* %1, align 4
; TD2-NEXT: ret void
; TD2-NEXT: }
diff --git a/test/NaCl/Bitcode/inttoptr-elide.ll b/test/NaCl/Bitcode/inttoptr-elide.ll
index 029f67adef..679f5f1d47 100644
--- a/test/NaCl/Bitcode/inttoptr-elide.ll
+++ b/test/NaCl/Bitcode/inttoptr-elide.ll
@@ -118,13 +118,11 @@ define i32 @TwoLoads(i32 %i) {
; TD2: define i32 @TwoLoads(i32 %i) {
; TD2-NEXT: %1 = inttoptr i32 %i to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
-; TD2-NEXT: %3 = inttoptr i32 %i to i32*
-; TD2-NEXT: %4 = load i32* %3, align 4
-; TD2-NEXT: %5 = add i32 %2, %4
-; TD2-NEXT: ret i32 %5
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: ret i32 %4
; TD2-NEXT: }
-
; PF2: <FUNCTION_BLOCK>
; PF2-NEXT: <DECLAREBLOCKS op0=1/>
; PF2-NEXT: <INST_LOAD op0=1 op1=3 op2=0/>
@@ -135,8 +133,9 @@ define i32 @TwoLoads(i32 %i) {
; ------------------------------------------------------
-; Test how we duplicate inttoptrs, even if optimized in the input file.
-define i32 @TwoLoadOpt(i32 %i) {
+; Test how we handle inttoptrs, if optimized in the input file. This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
%1 = inttoptr i32 %i to i32*
%2 = load i32* %1, align 4
%3 = load i32* %1, align 4
@@ -144,7 +143,7 @@ define i32 @TwoLoadOpt(i32 %i) {
ret i32 %4
}
-; TD1: define i32 @TwoLoadOpt(i32 %i) {
+; TD1: define i32 @TwoLoadOptOneBlock(i32 %i) {
; TD1-NEXT: %1 = inttoptr i32 %i to i32*
; TD1-NEXT: %2 = load i32* %1, align 4
; TD1-NEXT: %3 = load i32* %1, align 4
@@ -161,13 +160,12 @@ define i32 @TwoLoadOpt(i32 %i) {
; PF1-NEXT: <INST_RET op0=1/>
; PF1: </FUNCTION_BLOCK>
-; TD2: define i32 @TwoLoadOpt(i32 %i) {
+; TD2: define i32 @TwoLoadOptOneBlock(i32 %i) {
; TD2-NEXT: %1 = inttoptr i32 %i to i32*
; TD2-NEXT: %2 = load i32* %1, align 4
-; TD2-NEXT: %3 = inttoptr i32 %i to i32*
-; TD2-NEXT: %4 = load i32* %3, align 4
-; TD2-NEXT: %5 = add i32 %2, %4
-; TD2-NEXT: ret i32 %5
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: ret i32 %4
; TD2-NEXT: }
; PF2: <FUNCTION_BLOCK>
@@ -180,6 +178,76 @@ define i32 @TwoLoadOpt(i32 %i) {
; ------------------------------------------------------
+; Test how we handle inttoptrs if optimized in the input file. This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+ %1 = inttoptr i32 %i to i32*
+ %2 = load i32* %1, align 4
+ %3 = load i32* %1, align 4
+ %4 = add i32 %2, %3
+ br label %BB
+
+BB:
+ %5 = load i32* %1, align 4
+ %6 = load i32* %1, align 4
+ %7 = add i32 %5, %6
+ ret i32 %7
+}
+
+; TD1: define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD1-NEXT: %1 = inttoptr i32 %i to i32*
+; TD1-NEXT: %2 = load i32* %1, align 4
+; TD1-NEXT: %3 = load i32* %1, align 4
+; TD1-NEXT: %4 = add i32 %2, %3
+; TD1-NEXT: br label %BB
+; TD1: BB:
+; TD1-NEXT: %5 = load i32* %1, align 4
+; TD1-NEXT: %6 = load i32* %1, align 4
+; TD1-NEXT: %7 = add i32 %5, %6
+; TD1-NEXT: ret i32 %7
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1-NEXT: <DECLAREBLOCKS op0=2/>
+; PF1-NEXT: <INST_CAST op0=1 op1=1 op2=10/>
+; PF1-NEXT: <INST_LOAD op0=1 op1=3 op2=0/>
+; PF1-NEXT: <INST_LOAD op0=2 op1=3 op2=0/>
+; PF1-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT: <INST_BR op0=1/>
+; PF1-NEXT: <INST_LOAD op0=4 op1=3 op2=0/>
+; PF1-NEXT: <INST_LOAD op0=5 op1=3 op2=0/>
+; PF1-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT: <INST_RET op0=1/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT: %1 = inttoptr i32 %i to i32*
+; TD2-NEXT: %2 = load i32* %1, align 4
+; TD2-NEXT: %3 = load i32* %1, align 4
+; TD2-NEXT: %4 = add i32 %2, %3
+; TD2-NEXT: br label %BB
+; TD2: BB:
+; TD2-NEXT: %5 = inttoptr i32 %i to i32*
+; TD2-NEXT: %6 = load i32* %5, align 4
+; TD2-NEXT: %7 = load i32* %5, align 4
+; TD2-NEXT: %8 = add i32 %6, %7
+; TD2-NEXT: ret i32 %8
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2-NEXT: <DECLAREBLOCKS op0=2/>
+; PF2-NEXT: <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT: <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT: <INST_BR op0=1/>
+; PF2-NEXT: <INST_LOAD op0=4 op1=3 op2=0/>
+; PF2-NEXT: <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT: <INST_RET op0=1/>
+; PF2: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
; Test that we elide the simple case of inttoptr for a store.
define void @SimpleStore(i32 %i) {
%1 = inttoptr i32 %i to i32*
@@ -210,4 +278,4 @@ define void @SimpleStore(i32 %i) {
; PF2-NEXT: <DECLAREBLOCKS op0=1/>
; PF2-NEXT: <INST_STORE op0=1 op1=1 op2=3/>
; PF2-NEXT: <INST_RET/>
-; PF2T: </FUNCTION_BLOCK>
+; PF2: </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/ptrtoint-elide.ll b/test/NaCl/Bitcode/ptrtoint-elide.ll
index 10504a8577..43a82a0802 100644
--- a/test/NaCl/Bitcode/ptrtoint-elide.ll
+++ b/test/NaCl/Bitcode/ptrtoint-elide.ll
@@ -153,8 +153,8 @@ define void @AllocCastDelete() {
; ------------------------------------------------------
; Show case where we have optimized the ptrtoint (and bitcast) into a
-; single instruction, but will get duplicated after reading back the
-; bitcode file, since we insert elided casts immediately before each use.
+; single instruction, and will only be inserted before the first use
+; in the block.
define void @AllocCastOpt() {
%1 = alloca i8, i32 4, align 8
%2 = bitcast [4 x i8]* @bytes to i32*
@@ -177,7 +177,7 @@ define void @AllocCastOpt() {
; PF1: </CONSTANTS_BLOCK>
; PF1-NEXT: <INST_ALLOCA op0=1 op1=4/>
; PF1-NEXT: <INST_CAST op0=3 op1=4 op2=11/>
-; PF1-NEXT: <INST_CAST op0=2 op1=0 op2=9/>
+; PF1-NEXT: <INST_CAST op0=2 op1=0 op2=9/>
; PF1-NEXT: <INST_STORE op0=2 op1=1 op2=1 op3=0/>
; PF1-NEXT: <INST_STORE op0=2 op1=1 op2=1 op3=0/>
; PF1-NEXT: <INST_RET/>
@@ -188,9 +188,7 @@ define void @AllocCastOpt() {
; TD2-NEXT: %2 = ptrtoint i8* %1 to i32
; TD2-NEXT: %3 = bitcast [4 x i8]* @bytes to i32*
; TD2-NEXT: store i32 %2, i32* %3, align 1
-; TD2-NEXT: %4 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %5 = bitcast [4 x i8]* @bytes to i32*
-; TD2-NEXT: store i32 %4, i32* %5, align 1
+; TD2-NEXT: store i32 %2, i32* %3, align 1
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -366,7 +364,6 @@ define i32 @StoreGlobalMovePtr2Int() {
; PF1-NEXT: <INST_RET op0=4/>
; PF1-NEXT: </FUNCTION_BLOCK>
-
; TD2: define i32 @StoreGlobalMovePtr2Int() {
; TD2-NEXT: %1 = alloca i8, i32 4, align 8
; TD2-NEXT: %2 = ptrtoint [4 x i8]* @bytes to i32
@@ -430,11 +427,8 @@ define void @CastAddAlloca() {
; TD2-NEXT: %2 = add i32 1, 2
; TD2-NEXT: %3 = ptrtoint i8* %1 to i32
; TD2-NEXT: %4 = add i32 %3, 2
-; TD2-NEXT: %5 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %6 = add i32 1, %5
-; TD2-NEXT: %7 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %8 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %9 = add i32 %7, %8
+; TD2-NEXT: %5 = add i32 1, %3
+; TD2-NEXT: %6 = add i32 %3, %3
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -491,11 +485,8 @@ define void @CastAddGlobal() {
; TD2-NEXT: %1 = add i32 1, 2
; TD2-NEXT: %2 = ptrtoint [4 x i8]* @bytes to i32
; TD2-NEXT: %3 = add i32 %2, 2
-; TD2-NEXT: %4 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %5 = add i32 1, %4
-; TD2-NEXT: %6 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %7 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %8 = add i32 %6, %7
+; TD2-NEXT: %4 = add i32 1, %2
+; TD2-NEXT: %5 = add i32 %2, %2
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -571,36 +562,16 @@ define void @CastBinop() {
; TD2-NEXT: %2 = ptrtoint i8* %1 to i32
; TD2-NEXT: %3 = ptrtoint [4 x i8]* @bytes to i32
; TD2-NEXT: %4 = sub i32 %2, %3
-; TD2-NEXT: %5 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %6 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %7 = mul i32 %5, %6
-; TD2-NEXT: %8 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %9 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %10 = udiv i32 %8, %9
-; TD2-NEXT: %11 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %12 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %13 = urem i32 %11, %12
-; TD2-NEXT: %14 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %15 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %16 = srem i32 %14, %15
-; TD2-NEXT: %17 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %18 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %19 = shl i32 %17, %18
-; TD2-NEXT: %20 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %21 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %22 = lshr i32 %20, %21
-; TD2-NEXT: %23 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %24 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %25 = ashr i32 %23, %24
-; TD2-NEXT: %26 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %27 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %28 = and i32 %26, %27
-; TD2-NEXT: %29 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %30 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %31 = or i32 %29, %30
-; TD2-NEXT: %32 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %33 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %34 = xor i32 %32, %33
+; TD2-NEXT: %5 = mul i32 %2, %3
+; TD2-NEXT: %6 = udiv i32 %2, %3
+; TD2-NEXT: %7 = urem i32 %2, %3
+; TD2-NEXT: %8 = srem i32 %2, %3
+; TD2-NEXT: %9 = shl i32 %2, %3
+; TD2-NEXT: %10 = lshr i32 %2, %3
+; TD2-NEXT: %11 = ashr i32 %2, %3
+; TD2-NEXT: %12 = and i32 %2, %3
+; TD2-NEXT: %13 = or i32 %2, %3
+; TD2-NEXT: %14 = xor i32 %2, %3
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -666,16 +637,16 @@ define void @TestCasts() {
; PF1: </CONSTANTS_BLOCK>
; PF1-NEXT: <INST_ALLOCA op0=2 op1=4/>
; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
-; PF1-NEXT: <INST_CAST op0=6 op1=1 op2=0/>
-; PF1-NEXT: <INST_CAST op0=2 op1=1 op2=0/>
-; PF1-NEXT: <INST_CAST op0=8 op1=10 op2=1/>
-; PF1-NEXT: <INST_CAST op0=4 op1=10 op2=1/>
-; PF1-NEXT: <INST_CAST op0=9 op1=10 op2=2/>
-; PF1-NEXT: <INST_CAST op0=6 op1=10 op2=2/>
-; PF1-NEXT: <INST_CAST op0=9 op1=11 op2=5/>
-; PF1-NEXT: <INST_CAST op0=8 op1=11 op2=5/>
-; PF1-NEXT: <INST_CAST op0=13 op1=11 op2=6/>
-; PF1-NEXT: <INST_CAST op0=10 op1=11 op2=6/>
+; PF1-NEXT: <INST_CAST op0=6 op1=2 op2=0/>
+; PF1-NEXT: <INST_CAST op0=2 op1=2 op2=0/>
+; PF1-NEXT: <INST_CAST op0=8 op1=13 op2=1/>
+; PF1-NEXT: <INST_CAST op0=4 op1=13 op2=1/>
+; PF1-NEXT: <INST_CAST op0=9 op1=13 op2=2/>
+; PF1-NEXT: <INST_CAST op0=6 op1=13 op2=2/>
+; PF1-NEXT: <INST_CAST op0=9 op1=14 op2=5/>
+; PF1-NEXT: <INST_CAST op0=8 op1=14 op2=5/>
+; PF1-NEXT: <INST_CAST op0=13 op1=14 op2=6/>
+; PF1-NEXT: <INST_CAST op0=10 op1=14 op2=6/>
; PF1-NEXT: <INST_RET/>
; PF1-NEXT: </FUNCTION_BLOCK>
@@ -685,33 +656,29 @@ define void @TestCasts() {
; TD2-NEXT: %3 = ptrtoint i8* %1 to i32
; TD2-NEXT: %4 = trunc i32 %3 to i8
; TD2-NEXT: %5 = zext i32 257 to i64
-; TD2-NEXT: %6 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %7 = zext i32 %6 to i64
-; TD2-NEXT: %8 = sext i32 -1 to i64
-; TD2-NEXT: %9 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %10 = sext i32 %9 to i64
-; TD2-NEXT: %11 = uitofp i32 1 to float
-; TD2-NEXT: %12 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %13 = uitofp i32 %12 to float
-; TD2-NEXT: %14 = sitofp i32 -1 to float
-; TD2-NEXT: %15 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %16 = sitofp i32 %15 to float
+; TD2-NEXT: %6 = zext i32 %3 to i64
+; TD2-NEXT: %7 = sext i32 -1 to i64
+; TD2-NEXT: %8 = sext i32 %3 to i64
+; TD2-NEXT: %9 = uitofp i32 1 to float
+; TD2-NEXT: %10 = uitofp i32 %3 to float
+; TD2-NEXT: %11 = sitofp i32 -1 to float
+; TD2-NEXT: %12 = sitofp i32 %3 to float
; TD2-NEXT: ret void
; TD2-NEXT: }
; PF2: <FUNCTION_BLOCK>
; PF2: </CONSTANTS_BLOCK>
; PF2-NEXT: <INST_ALLOCA op0=2 op1=4/>
-; PF2-NEXT: <INST_CAST op0=5 op1=1 op2=0/>
-; PF2-NEXT: <INST_CAST op0=2 op1=1 op2=0/>
-; PF2-NEXT: <INST_CAST op0=7 op1=10 op2=1/>
-; PF2-NEXT: <INST_CAST op0=4 op1=10 op2=1/>
-; PF2-NEXT: <INST_CAST op0=8 op1=10 op2=2/>
-; PF2-NEXT: <INST_CAST op0=6 op1=10 op2=2/>
-; PF2-NEXT: <INST_CAST op0=8 op1=11 op2=5/>
-; PF2-NEXT: <INST_CAST op0=8 op1=11 op2=5/>
-; PF2-NEXT: <INST_CAST op0=12 op1=11 op2=6/>
-; PF2-NEXT: <INST_CAST op0=10 op1=11 op2=6/>
+; PF2-NEXT: <INST_CAST op0=5 op1=2 op2=0/>
+; PF2-NEXT: <INST_CAST op0=2 op1=2 op2=0/>
+; PF2-NEXT: <INST_CAST op0=7 op1=13 op2=1/>
+; PF2-NEXT: <INST_CAST op0=4 op1=13 op2=1/>
+; PF2-NEXT: <INST_CAST op0=8 op1=13 op2=2/>
+; PF2-NEXT: <INST_CAST op0=6 op1=13 op2=2/>
+; PF2-NEXT: <INST_CAST op0=8 op1=14 op2=5/>
+; PF2-NEXT: <INST_CAST op0=8 op1=14 op2=5/>
+; PF2-NEXT: <INST_CAST op0=12 op1=14 op2=6/>
+; PF2-NEXT: <INST_CAST op0=10 op1=14 op2=6/>
; PF2-NEXT: <INST_RET/>
; PF2-NEXT: </FUNCTION_BLOCK>
@@ -741,7 +708,7 @@ define void @TestSavedPtrToInt() {
; PF1-NEXT: <INST_ALLOCA op0=2 op1=4/>
; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
; PF1-NEXT: <INST_BINOP op0=1 op1=3 op2=0/>
-; PF1-NEXT: <INST_CALL op0=0 op1=22 op2=2/>
+; PF1-NEXT: <INST_CALL op0=0 op1=26 op2=2/>
; PF1-NEXT: <INST_RET/>
; PF1-NEXT: </FUNCTION_BLOCK>
@@ -758,7 +725,7 @@ define void @TestSavedPtrToInt() {
; PF2-NEXT: <INST_ALLOCA op0=2 op1=4/>
; PF2-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
; PF2-NEXT: <INST_BINOP op0=1 op1=3 op2=0/>
-; PF2-NEXT: <INST_CALL op0=0 op1=22 op2=2/>
+; PF2-NEXT: <INST_CALL op0=0 op1=26 op2=2/>
; PF2-NEXT: <INST_RET/>
; PF2-NEXT: </FUNCTION_BLOCK>
@@ -809,12 +776,8 @@ define void @CastIcmp() {
; TD2-NEXT: %4 = icmp eq i32 %3, 2
; TD2-NEXT: %5 = ptrtoint [4 x i8]* @bytes to i32
; TD2-NEXT: %6 = icmp eq i32 1, %5
-; TD2-NEXT: %7 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %8 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %9 = icmp eq i32 %7, %8
-; TD2-NEXT: %10 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %11 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %12 = icmp eq i32 %10, %11
+; TD2-NEXT: %7 = icmp eq i32 %3, %5
+; TD2-NEXT: %8 = icmp eq i32 %5, %3
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -876,12 +839,8 @@ define void @CastSelect() {
; TD2-NEXT: %4 = select i1 true, i32 %3, i32 2
; TD2-NEXT: %5 = ptrtoint [4 x i8]* @bytes to i32
; TD2-NEXT: %6 = select i1 true, i32 1, i32 %5
-; TD2-NEXT: %7 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %8 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %9 = select i1 true, i32 %7, i32 %8
-; TD2-NEXT: %10 = ptrtoint [4 x i8]* @bytes to i32
-; TD2-NEXT: %11 = ptrtoint i8* %1 to i32
-; TD2-NEXT: %12 = select i1 true, i32 %10, i32 %11
+; TD2-NEXT: %7 = select i1 true, i32 %3, i32 %5
+; TD2-NEXT: %8 = select i1 true, i32 %5, i32 %3
; TD2-NEXT: ret void
; TD2-NEXT: }
@@ -895,3 +854,447 @@ define void @CastSelect() {
; PF2-NEXT: <INST_VSELECT op0=10 op1=5 op2=6/>
; PF2-NEXT: <INST_RET/>
; PF2-NEXT: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that if a phi node refers to a pointer cast, we add
+; them at the end of the incoming block.
+define void @PhiBackwardRefs(i1) {
+ %2 = alloca i8, i32 4, align 8
+ %3 = bitcast i8* %2 to i32*
+ %4 = alloca i8, i32 4, align 8
+ %5 = ptrtoint i8* %4 to i32
+ br i1 %0, label %true, label %false
+
+true:
+ %6 = load i32* %3
+ br label %merge
+
+false:
+ %7 = load i32* %3
+ br label %merge
+
+merge:
+ %8 = phi i32 [%5, %true], [%5, %false]
+ %9 = phi i32 [%6, %true], [%7, %false]
+ ret void
+}
+
+; TD1: define void @PhiBackwardRefs(i1) {
+; TD1-NEXT: %2 = alloca i8, i32 4, align 8
+; TD1-NEXT: %3 = bitcast i8* %2 to i32*
+; TD1-NEXT: %4 = alloca i8, i32 4, align 8
+; TD1-NEXT: %5 = ptrtoint i8* %4 to i32
+; TD1-NEXT: br i1 %0, label %true, label %false
+; TD1: true:
+; TD1-NEXT: %6 = load i32* %3
+; TD1-NEXT: br label %merge
+; TD1: false:
+; TD1-NEXT: %7 = load i32* %3
+; TD1-NEXT: br label %merge
+; TD1: merge:
+; TD1-NEXT: %8 = phi i32 [ %5, %true ], [ %5, %false ]
+; TD1-NEXT: %9 = phi i32 [ %6, %true ], [ %7, %false ]
+; TD1-NEXT: ret void
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1: </CONSTANTS_BLOCK>
+; PF1-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=4 op2=11/>
+; PF1-NEXT: <INST_ALLOCA op0=3 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT: <INST_BR op0=1 op1=2 op2=6/>
+; PF1-NEXT: <INST_LOAD op0=3 op1=0 op2=0/>
+; PF1-NEXT: <INST_BR op0=3/>
+; PF1-NEXT: <INST_LOAD op0=4 op1=0 op2=0/>
+; PF1-NEXT: <INST_BR op0=3/>
+; PF1-NEXT: <INST_PHI op0=0 op1=6 op2=1 op3=6 op4=2/>
+; PF1-NEXT: <INST_PHI op0=0 op1=6 op2=1 op3=4 op4=2/>
+; PF1-NEXT: <INST_RET/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define void @PhiBackwardRefs(i1) {
+; TD2-NEXT: %2 = alloca i8, i32 4, align 8
+; TD2-NEXT: %3 = alloca i8, i32 4, align 8
+; TD2-NEXT: br i1 %0, label %true, label %false
+; TD2: true:
+; TD2-NEXT: %4 = bitcast i8* %2 to i32*
+; TD2-NEXT: %5 = load i32* %4
+; TD2-NEXT: %6 = ptrtoint i8* %3 to i32
+; TD2-NEXT: br label %merge
+; TD2: false:
+; TD2-NEXT: %7 = bitcast i8* %2 to i32*
+; TD2-NEXT: %8 = load i32* %7
+; TD2-NEXT: %9 = ptrtoint i8* %3 to i32
+; TD2-NEXT: br label %merge
+; TD2: merge:
+; TD2-NEXT: %10 = phi i32 [ %6, %true ], [ %9, %false ]
+; TD2-NEXT: %11 = phi i32 [ %5, %true ], [ %8, %false ]
+; TD2-NEXT: ret void
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2: </CONSTANTS_BLOCK>
+; PF2-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT: <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT: <INST_BR op0=1 op1=2 op2=4/>
+; PF2-NEXT: <INST_LOAD op0=2 op1=0 op2=0/>
+; PF2-NEXT: <INST_BR op0=3/>
+; PF2-NEXT: <INST_LOAD op0=3 op1=0 op2=0/>
+; PF2-NEXT: <INST_BR op0=3/>
+; PF2-NEXT: <INST_PHI op0=0 op1=6 op2=1 op3=6 op4=2/>
+; PF2-NEXT: <INST_PHI op0=0 op1=6 op2=1 op3=4 op4=2/>
+; PF2-NEXT: <INST_RET/>
+; PF2: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Like PhiBackwardRefs except the phi nodes forward reference
+; instructions instead of backwards references.
+define void @PhiForwardRefs(i1) {
+ br label %start
+
+merge:
+ %2 = phi i32 [%9, %true], [%9, %false]
+ %3 = phi i32 [%4, %true], [%5, %false]
+ ret void
+
+true:
+ %4 = load i32* %7
+ br label %merge
+
+false:
+ %5 = load i32* %7
+ br label %merge
+
+start:
+ %6 = alloca i8, i32 4, align 8
+ %7 = bitcast i8* %6 to i32*
+ %8 = alloca i8, i32 4, align 8
+ %9 = ptrtoint i8* %8 to i32
+ br i1 %0, label %true, label %false
+}
+
+; TD1: define void @PhiForwardRefs(i1) {
+; TD1-NEXT: br label %start
+; TD1: merge:
+; TD1-NEXT: %2 = phi i32 [ %9, %true ], [ %9, %false ]
+; TD1-NEXT: %3 = phi i32 [ %4, %true ], [ %5, %false ]
+; TD1-NEXT: ret void
+; TD1: true:
+; TD1-NEXT: %4 = load i32* %7
+; TD1-NEXT: br label %merge
+; TD1: false:
+; TD1-NEXT: %5 = load i32* %7
+; TD1-NEXT: br label %merge
+; TD1: start:
+; TD1-NEXT: %6 = alloca i8, i32 4, align 8
+; TD1-NEXT: %7 = bitcast i8* %6 to i32*
+; TD1-NEXT: %8 = alloca i8, i32 4, align 8
+; TD1-NEXT: %9 = ptrtoint i8* %8 to i32
+; TD1-NEXT: br i1 %0, label %true, label %false
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1: </CONSTANTS_BLOCK>
+; PF1-NEXT: <INST_BR op0=4/>
+; PF1-NEXT: <FORWARDTYPEREF op0=30 op1=0/>
+; PF1-NEXT: <INST_PHI op0=0 op1=15 op2=2 op3=15 op4=3/>
+; PF1-NEXT: <FORWARDTYPEREF op0=25 op1=0/>
+; PF1-NEXT: <FORWARDTYPEREF op0=26 op1=0/>
+; PF1-NEXT: <INST_PHI op0=0 op1=3 op2=2 op3=5 op4=3/>
+; PF1-NEXT: <INST_RET/>
+; PF1-NEXT: <FORWARDTYPEREF op0=28 op1=4/>
+; PF1-NEXT: <INST_LOAD op0=4294967293 op1=0 op2=0/>
+; PF1-NEXT: <INST_BR op0=1/>
+; PF1-NEXT: <INST_LOAD op0=4294967294 op1=0 op2=0/>
+; PF1-NEXT: <INST_BR op0=1/>
+; PF1-NEXT: <INST_ALLOCA op0=5 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=4 op2=11/>
+; PF1-NEXT: <INST_ALLOCA op0=7 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT: <INST_BR op0=2 op1=3 op2=10/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define void @PhiForwardRefs(i1) {
+; TD2-NEXT: br label %start
+; TD2: merge
+; TD2-NEXT: %2 = phi i32 [ %6, %true ], [ %9, %false ]
+; TD2-NEXT: %3 = phi i32 [ %5, %true ], [ %8, %false ]
+; TD2-NEXT: ret void
+; TD2: true:
+; TD2-NEXT: %4 = bitcast i8* %10 to i32*
+; TD2-NEXT: %5 = load i32* %4
+; TD2-NEXT: %6 = ptrtoint i8* %11 to i32
+; TD2-NEXT: br label %merge
+; TD2: false:
+; TD2-NEXT: %7 = bitcast i8* %10 to i32*
+; TD2-NEXT: %8 = load i32* %7
+; TD2-NEXT: %9 = ptrtoint i8* %11 to i32
+; TD2-NEXT: br label %merge
+; TD2: start:
+; TD2-NEXT: %10 = alloca i8, i32 4, align 8
+; TD2-NEXT: %11 = alloca i8, i32 4, align 8
+; TD2-NEXT: br i1 %0, label %true, label %false
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2: </CONSTANTS_BLOCK>
+; PF2-NEXT: <INST_BR op0=4/>
+; PF2-NEXT: <FORWARDTYPEREF op0=28 op1=3/>
+; PF2-NEXT: <INST_PHI op0=0 op1=11 op2=2 op3=11 op4=3/>
+; PF2-NEXT: <FORWARDTYPEREF op0=25 op1=0/>
+; PF2-NEXT: <FORWARDTYPEREF op0=26 op1=0/>
+; PF2-NEXT: <INST_PHI op0=0 op1=3 op2=2 op3=5 op4=3/>
+; PF2-NEXT: <INST_RET/>
+; PF2-NEXT: <FORWARDTYPEREF op0=27 op1=3/>
+; PF2-NEXT: <INST_LOAD op0=4294967294 op1=0 op2=0/>
+; PF2-NEXT: <INST_BR op0=1/>
+; PF2-NEXT: <INST_LOAD op0=4294967295 op1=0 op2=0/>
+; PF2-NEXT: <INST_BR op0=1/>
+; PF2-NEXT: <INST_ALLOCA op0=5 op1=4/>
+; PF2-NEXT: <INST_ALLOCA op0=6 op1=4/>
+; PF2-NEXT: <INST_BR op0=2 op1=3 op2=8/>
+; PF2: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that if a phi node incoming block already has a pointer cast,
+; we use it instead of adding one at the end of the block. In this
+; example, we reuse instruction %7 in block true for phi node %10.
+define void @PhiMergeCast(i1) {
+ %2 = alloca i8, i32 4, align 8
+ %3 = bitcast i8* %2 to i32*
+ %4 = alloca i8, i32 4, align 8
+ %5 = ptrtoint i8* %4 to i32
+ br i1 %0, label %true, label %false
+
+true:
+ %6 = load i32* %3
+ %7 = ptrtoint i8* %4 to i32
+ %8 = add i32 %6, %7
+ br label %merge
+
+false:
+ %9 = load i32* %3
+ br label %merge
+
+merge:
+ %10 = phi i32 [%5, %true], [%5, %false]
+ %11 = phi i32 [%6, %true], [%9, %false]
+ ret void
+}
+
+; TD1: define void @PhiMergeCast(i1) {
+; TD1-NEXT: %2 = alloca i8, i32 4, align 8
+; TD1-NEXT: %3 = bitcast i8* %2 to i32*
+; TD1-NEXT: %4 = alloca i8, i32 4, align 8
+; TD1-NEXT: %5 = ptrtoint i8* %4 to i32
+; TD1-NEXT: br i1 %0, label %true, label %false
+; TD1: true:
+; TD1-NEXT: %6 = load i32* %3
+; TD1-NEXT: %7 = ptrtoint i8* %4 to i32
+; TD1-NEXT: %8 = add i32 %6, %7
+; TD1-NEXT: br label %merge
+; TD1: false:
+; TD1-NEXT: %9 = load i32* %3
+; TD1-NEXT: br label %merge
+; TD1: merge:
+; TD1-NEXT: %10 = phi i32 [ %5, %true ], [ %5, %false ]
+; TD1-NEXT: %11 = phi i32 [ %6, %true ], [ %9, %false ]
+; TD1-NEXT: ret void
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1: </CONSTANTS_BLOCK>
+; PF1-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=4 op2=11/>
+; PF1-NEXT: <INST_ALLOCA op0=3 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT: <INST_BR op0=1 op1=2 op2=6/>
+; PF1-NEXT: <INST_LOAD op0=3 op1=0 op2=0/>
+; PF1-NEXT: <INST_CAST op0=3 op1=0 op2=9/>
+; PF1-NEXT: <INST_BINOP op0=2 op1=1 op2=0/>
+; PF1-NEXT: <INST_BR op0=3/>
+; PF1-NEXT: <INST_LOAD op0=6 op1=0 op2=0/>
+; PF1-NEXT: <INST_BR op0=3/>
+; PF1-NEXT: <INST_PHI op0=0 op1=10 op2=1 op3=10 op4=2/>
+; PF1-NEXT: <INST_PHI op0=0 op1=10 op2=1 op3=4 op4=2/>
+; PF1-NEXT: <INST_RET/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define void @PhiMergeCast(i1) {
+; TD2-NEXT: %2 = alloca i8, i32 4, align 8
+; TD2-NEXT: %3 = alloca i8, i32 4, align 8
+; TD2-NEXT: br i1 %0, label %true, label %false
+; TD2: true:
+; TD2-NEXT: %4 = bitcast i8* %2 to i32*
+; TD2-NEXT: %5 = load i32* %4
+; TD2-NEXT: %6 = ptrtoint i8* %3 to i32
+; TD2-NEXT: %7 = add i32 %5, %6
+; TD2-NEXT: br label %merge
+; TD2: false:
+; TD2-NEXT: %8 = bitcast i8* %2 to i32*
+; TD2-NEXT: %9 = load i32* %8
+; TD2-NEXT: %10 = ptrtoint i8* %3 to i32
+; TD2-NEXT: br label %merge
+; TD2: merge:
+; TD2-NEXT: %11 = phi i32 [ %6, %true ], [ %10, %false ]
+; TD2-NEXT: %12 = phi i32 [ %5, %true ], [ %9, %false ]
+; TD2-NEXT: ret void
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2: </CONSTANTS_BLOCK>
+; PF2-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT: <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT: <INST_BR op0=1 op1=2 op2=4/>
+; PF2-NEXT: <INST_LOAD op0=2 op1=0 op2=0/>
+; PF2-NEXT: <INST_BINOP op0=1 op1=2 op2=0/>
+; PF2-NEXT: <INST_BR op0=3/>
+; PF2-NEXT: <INST_LOAD op0=4 op1=0 op2=0/>
+; PF2-NEXT: <INST_BR op0=3/>
+; PF2-NEXT: <INST_PHI op0=0 op1=8 op2=1 op3=8 op4=2/>
+; PF2-NEXT: <INST_PHI op0=0 op1=8 op2=1 op3=4 op4=2/>
+; PF2-NEXT: <INST_RET/>
+; PF2: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we must introduce a cast reference for each
+; reachable block, but one is sufficient.
+define void @LongReachingCasts(i1) {
+ %2 = alloca i8, i32 4, align 8
+ %3 = ptrtoint i8* %2 to i32
+ %4 = bitcast [4 x i8]* @bytes to i32*
+ br i1 %0, label %Split1, label %Split2
+
+Split1:
+ br i1 %0, label %b1, label %b2
+
+Split2:
+ br i1 %0, label %b3, label %b4
+
+b1:
+ store i32 %3, i32* %4, align 1
+ store i32 %3, i32* %4, align 1
+ ret void
+
+b2:
+ store i32 %3, i32* %4, align 1
+ store i32 %3, i32* %4, align 1
+ ret void
+
+b3:
+ store i32 %3, i32* %4, align 1
+ store i32 %3, i32* %4, align 1
+ ret void
+
+b4:
+ store i32 %3, i32* %4, align 1
+ store i32 %3, i32* %4, align 1
+ ret void
+}
+
+; TD1: define void @LongReachingCasts(i1) {
+; TD1-NEXT: %2 = alloca i8, i32 4, align 8
+; TD1-NEXT: %3 = ptrtoint i8* %2 to i32
+; TD1-NEXT: %4 = bitcast [4 x i8]* @bytes to i32*
+; TD1-NEXT: br i1 %0, label %Split1, label %Split2
+; TD1: Split1:
+; TD1-NEXT: br i1 %0, label %b1, label %b2
+; TD1: Split2:
+; TD1-NEXT: br i1 %0, label %b3, label %b4
+; TD1: b1:
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: ret void
+; TD1: b2:
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: ret void
+; TD1: b3:
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: ret void
+; TD1: b4:
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: store i32 %3, i32* %4, align 1
+; TD1-NEXT: ret void
+; TD1-NEXT: }
+
+; PF1: <FUNCTION_BLOCK>
+; PF1: </CONSTANTS_BLOCK>
+; PF1-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF1-NEXT: <INST_CAST op0=1 op1=0 op2=9/>
+; PF1-NEXT: <INST_CAST op0=5 op1=4 op2=11/>
+; PF1-NEXT: <INST_BR op0=1 op1=2 op2=5/>
+; PF1-NEXT: <INST_BR op0=3 op1=4 op2=5/>
+; PF1-NEXT: <INST_BR op0=5 op1=6 op2=5/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_RET/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_RET/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_RET/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_STORE op0=1 op1=2 op2=1 op3=0/>
+; PF1-NEXT: <INST_RET/>
+; PF1: </FUNCTION_BLOCK>
+
+; TD2: define void @LongReachingCasts(i1) {
+; TD2-NEXT: %2 = alloca i8, i32 4, align 8
+; TD2-NEXT: br i1 %0, label %Split1, label %Split2
+; TD2: Split1:
+; TD2-NEXT: br i1 %0, label %b1, label %b2
+; TD2: Split2:
+; TD2-NEXT: br i1 %0, label %b3, label %b4
+; TD2: b1:
+; TD2-NEXT: %3 = ptrtoint i8* %2 to i32
+; TD2-NEXT: %4 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: store i32 %3, i32* %4, align 1
+; TD2-NEXT: store i32 %3, i32* %4, align 1
+; TD2-NEXT: ret void
+; TD2: b2:
+; TD2-NEXT: %5 = ptrtoint i8* %2 to i32
+; TD2-NEXT: %6 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: store i32 %5, i32* %6, align 1
+; TD2-NEXT: store i32 %5, i32* %6, align 1
+; TD2-NEXT: ret void
+; TD2: b3:
+; TD2-NEXT: %7 = ptrtoint i8* %2 to i32
+; TD2-NEXT: %8 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: store i32 %7, i32* %8, align 1
+; TD2-NEXT: store i32 %7, i32* %8, align 1
+; TD2-NEXT: ret void
+; TD2: b4:
+; TD2-NEXT: %9 = ptrtoint i8* %2 to i32
+; TD2-NEXT: %10 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT: store i32 %9, i32* %10, align 1
+; TD2-NEXT: store i32 %9, i32* %10, align 1
+; TD2-NEXT: ret void
+; TD2-NEXT: }
+
+; PF2: <FUNCTION_BLOCK>
+; PF2: </CONSTANTS_BLOCK>
+; PF2-NEXT: <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT: <INST_BR op0=1 op1=2 op2=3/>
+; PF2-NEXT: <INST_BR op0=3 op1=4 op2=3/>
+; PF2-NEXT: <INST_BR op0=5 op1=6 op2=3/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_RET/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_RET/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_RET/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT: <INST_RET/>
+; PF2: </FUNCTION_BLOCK>