//===- ReplacePtrsWithInts.cpp - Convert pointer values to integer values--===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This pass strips out aggregate pointer types and replaces them with // the integer type iPTR, which is i32 for PNaCl (though this pass // will allow iPTR to be i64 if the DataLayout specifies 64-bit // pointers). // // The pass converts IR to the following normal form: // // All inttoptr and ptrtoint instructions use the same integer size // (iPTR), so they do not implicitly truncate or zero-extend. // // alloca always has the result type i8*. // // Pointer types only appear in the following instructions: // * loads and stores: the pointer operand is a NormalizedPtr. // * function calls: the function operand is a NormalizedPtr. // * intrinsic calls: any pointer arguments are NormalizedPtrs. // * alloca // * bitcast and inttoptr: only used as part of a NormalizedPtr. // * ptrtoint: the operand is an InherentPtr. // // Where an InherentPtr is defined as a pointer value that is: // * an alloca; // * a GlobalValue (a function or global variable); or // * an intrinsic call. // // And a NormalizedPtr is defined as a pointer value that is: // * an inttoptr instruction; // * an InherentPtr; or // * a bitcast of an InherentPtr. // // This pass currently strips out lifetime markers (that is, calls to // the llvm.lifetime.start/end intrinsics) and invariant markers // (calls to llvm.invariant.start/end). // //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/NaCl.h" using namespace llvm; namespace { // This is a ModulePass because the pass must recreate functions in // order to change their argument and return types. struct ReplacePtrsWithInts : public ModulePass { static char ID; // Pass identification, replacement for typeid ReplacePtrsWithInts() : ModulePass(ID) { initializeReplacePtrsWithIntsPass(*PassRegistry::getPassRegistry()); } virtual bool runOnModule(Module &M); }; // FunctionConverter stores the state for mapping old instructions // (of pointer type) to converted instructions (of integer type) // within a function, and provides methods for doing the conversion. class FunctionConverter { // Int type that pointer types are to be replaced with, typically i32. Type *IntPtrType; struct RewrittenVal { RewrittenVal(): Placeholder(NULL), NewIntVal(NULL) {} Value *Placeholder; Value *NewIntVal; }; // Maps from old values (of pointer type) to converted values (of // IntPtrType type). DenseMap RewriteMap; public: FunctionConverter(Type *IntPtrType) : IntPtrType(IntPtrType) {} // Returns the normalized version of the given type, converting // pointer types to IntPtrType. Type *convertType(Type *Ty); // Returns the normalized version of the given function type by // normalizing the function's argument types. FunctionType *convertFuncType(FunctionType *FTy); // Records that 'To' is the normalized version of 'From'. If 'To' // is not of pointer type, no type conversion is required, so this // can take the short cut of replacing 'To' with 'From'. void recordConverted(Value *From, Value *To); void recordConvertedAndErase(Instruction *From, Value *To); // Returns Val with no-op casts (those that convert between // IntPtrType and pointer types) stripped off. Value *stripNoopCasts(Value *Val); // Returns the normalized version of the given value. // // If the conversion of Val has been deferred, this returns a // placeholder object, which will later be replaceAllUsesWith'd to // the final value. Since replaceAllUsesWith does not work on // references by metadata nodes, this can be bypassed using // BypassPlaceholder to get the real converted value, assuming it // is available. Value *convert(Value *Val, bool BypassPlaceholder = false); // Returns the NormalizedPtr form of the given pointer value. // Inserts conversion instructions at InsertPt. Value *convertBackToPtr(Value *Val, Instruction *InsertPt); // Returns the NormalizedPtr form of the given function pointer. // Inserts conversion instructions at InsertPt. Value *convertFunctionPtr(Value *Callee, Instruction *InsertPt); // Converts an instruction without recreating it, by wrapping its // operands and result. void convertInPlace(Instruction *Inst); void eraseReplacedInstructions(); // List of instructions whose deletion has been deferred. SmallVector ToErase; }; } Type *FunctionConverter::convertType(Type *Ty) { if (Ty->isPointerTy()) return IntPtrType; return Ty; } FunctionType *FunctionConverter::convertFuncType(FunctionType *FTy) { SmallVector ArgTypes; for (FunctionType::param_iterator ArgTy = FTy->param_begin(), E = FTy->param_end(); ArgTy != E; ++ArgTy) { ArgTypes.push_back(convertType(*ArgTy)); } return FunctionType::get(convertType(FTy->getReturnType()), ArgTypes, FTy->isVarArg()); } void FunctionConverter::recordConverted(Value *From, Value *To) { if (!From->getType()->isPointerTy()) { From->replaceAllUsesWith(To); return; } RewrittenVal *RV = &RewriteMap[From]; assert(!RV->NewIntVal); RV->NewIntVal = To; } void FunctionConverter::recordConvertedAndErase(Instruction *From, Value *To) { recordConverted(From, To); // There may still be references to this value, so defer deleting it. ToErase.push_back(From); } Value *FunctionConverter::stripNoopCasts(Value *Val) { SmallPtrSet Visited; for (;;) { if (!Visited.insert(Val)) { // It is possible to get a circular reference in unreachable // basic blocks. Handle this case for completeness. return UndefValue::get(Val->getType()); } if (CastInst *Cast = dyn_cast(Val)) { Value *Src = Cast->getOperand(0); if ((isa(Cast) && Cast->getType()->isPointerTy()) || (isa(Cast) && Cast->getType() == IntPtrType) || (isa(Cast) && Src->getType() == IntPtrType)) { Val = Src; continue; } } return Val; } } Value *FunctionConverter::convert(Value *Val, bool BypassPlaceholder) { Val = stripNoopCasts(Val); if (!Val->getType()->isPointerTy()) return Val; if (Constant *C = dyn_cast(Val)) return ConstantExpr::getPtrToInt(C, IntPtrType); RewrittenVal *RV = &RewriteMap[Val]; if (BypassPlaceholder) { assert(RV->NewIntVal); return RV->NewIntVal; } if (!RV->Placeholder) RV->Placeholder = new Argument(convertType(Val->getType())); return RV->Placeholder; } Value *FunctionConverter::convertBackToPtr(Value *Val, Instruction *InsertPt) { Type *NewTy = convertType(Val->getType()->getPointerElementType())->getPointerTo(); return new IntToPtrInst(convert(Val), NewTy, "", InsertPt); } Value *FunctionConverter::convertFunctionPtr(Value *Callee, Instruction *InsertPt) { FunctionType *FuncType = cast( Callee->getType()->getPointerElementType()); return new IntToPtrInst(convert(Callee), convertFuncType(FuncType)->getPointerTo(), "", InsertPt); } static bool ShouldLeaveAlone(Value *V) { if (Function *F = dyn_cast(V)) return F->isIntrinsic(); if (isa(V)) return true; return false; } void FunctionConverter::convertInPlace(Instruction *Inst) { // Convert operands. for (unsigned I = 0; I < Inst->getNumOperands(); ++I) { Value *Arg = Inst->getOperand(I); if (Arg->getType()->isPointerTy() && !ShouldLeaveAlone(Arg)) { Value *Conv = convert(Arg); Inst->setOperand(I, new IntToPtrInst(Conv, Arg->getType(), "", Inst)); } } // Convert result. if (Inst->getType()->isPointerTy()) { Instruction *Cast = new PtrToIntInst( Inst, convertType(Inst->getType()), Inst->getName() + ".asint"); Cast->insertAfter(Inst); recordConverted(Inst, Cast); } } void FunctionConverter::eraseReplacedInstructions() { bool Error = false; for (DenseMap::iterator I = RewriteMap.begin(), E = RewriteMap.end(); I != E; ++I) { if (I->second.Placeholder) { if (I->second.NewIntVal) { I->second.Placeholder->replaceAllUsesWith(I->second.NewIntVal); } else { errs() << "Not converted: " << *I->first << "\n"; Error = true; } } } if (Error) report_fatal_error("Case not handled in ReplacePtrsWithInts"); // Delete the placeholders in a separate pass. This means that if // one placeholder is accidentally rewritten to another, we will get // a useful error message rather than accessing a dangling pointer. for (DenseMap::iterator I = RewriteMap.begin(), E = RewriteMap.end(); I != E; ++I) { delete I->second.Placeholder; } // We must do dropAllReferences() before doing eraseFromParent(), // otherwise we will try to erase instructions that are still // referenced. for (SmallVectorImpl::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) { (*I)->dropAllReferences(); } for (SmallVectorImpl::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) { (*I)->eraseFromParent(); } } static void ConvertMetadataOperand(FunctionConverter *FC, IntrinsicInst *Call, int Index) { MDNode *MD = cast(Call->getArgOperand(Index)); if (MD->getNumOperands() != 1) return; Value *MDArg = MD->getOperand(0); if (MDArg && (isa(MDArg) || isa(MDArg))) { MDArg = FC->convert(MDArg, /* BypassPlaceholder= */ true); if (PtrToIntInst *Cast = dyn_cast(MDArg)) { // Unwrapping this is necessary for llvm.dbg.declare to work. MDArg = Cast->getPointerOperand(); } SmallVector Args; Args.push_back(MDArg); Call->setArgOperand(Index, MDNode::get(Call->getContext(), Args)); } } // Remove attributes that only apply to pointer arguments. Returns // the updated AttributeSet. static AttributeSet RemovePointerAttrs(LLVMContext &Context, AttributeSet Attrs) { SmallVector AttrList; for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) { unsigned Index = Attrs.getSlotIndex(Slot); AttrBuilder AB; for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot); Attr != E; ++Attr) { if (!Attr->isEnumAttribute()) { continue; } switch (Attr->getKindAsEnum()) { // ByVal and StructRet should already have been removed by the // ExpandByVal pass. case Attribute::ByVal: case Attribute::StructRet: case Attribute::Nest: Attrs.dump(); report_fatal_error("ReplacePtrsWithInts cannot handle " "byval, sret or nest attrs"); break; // Strip NoCapture and NoAlias because they are only allowed // on arguments of pointer type, and we are removing the // pointer types. case Attribute::NoCapture: case Attribute::NoAlias: break; default: AB.addAttribute(*Attr); } } AttrList.push_back(AttributeSet::get(Context, Index, AB)); } return AttributeSet::get(Context, AttrList); } static void ConvertInstruction(DataLayout *DL, Type *IntPtrType, FunctionConverter *FC, Instruction *Inst) { if (ReturnInst *Ret = dyn_cast(Inst)) { Value *Result = Ret->getReturnValue(); if (Result) Result = FC->convert(Result); CopyDebug(ReturnInst::Create(Ret->getContext(), Result, Ret), Inst); Ret->eraseFromParent(); } else if (PHINode *Phi = dyn_cast(Inst)) { PHINode *Phi2 = PHINode::Create(FC->convertType(Phi->getType()), Phi->getNumIncomingValues(), "", Phi); CopyDebug(Phi2, Phi); for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { Phi2->addIncoming(FC->convert(Phi->getIncomingValue(I)), Phi->getIncomingBlock(I)); } Phi2->takeName(Phi); FC->recordConvertedAndErase(Phi, Phi2); } else if (SelectInst *Op = dyn_cast(Inst)) { Instruction *Op2 = SelectInst::Create(Op->getCondition(), FC->convert(Op->getTrueValue()), FC->convert(Op->getFalseValue()), "", Op); CopyDebug(Op2, Op); Op2->takeName(Op); FC->recordConvertedAndErase(Op, Op2); } else if (isa(Inst) || isa(Inst)) { Value *Arg = FC->convert(Inst->getOperand(0)); Type *ResultTy = FC->convertType(Inst->getType()); unsigned ArgSize = Arg->getType()->getIntegerBitWidth(); unsigned ResultSize = ResultTy->getIntegerBitWidth(); Value *Result; // We avoid using IRBuilder's CreateZExtOrTrunc() here because it // constant-folds ptrtoint ConstantExprs. This leads to creating // ptrtoints of non-IntPtrType type, which is not what we want, // because we want truncation/extension to be done explicitly by // separate instructions. if (ArgSize == ResultSize) { Result = Arg; } else { Instruction::CastOps CastType = ArgSize > ResultSize ? Instruction::Trunc : Instruction::ZExt; Result = CopyDebug(CastInst::Create(CastType, Arg, ResultTy, "", Inst), Inst); } if (Result != Arg) Result->takeName(Inst); FC->recordConvertedAndErase(Inst, Result); } else if (isa(Inst)) { if (Inst->getType()->isPointerTy()) { FC->ToErase.push_back(Inst); } } else if (ICmpInst *Cmp = dyn_cast(Inst)) { Value *Cmp2 = CopyDebug(new ICmpInst(Inst, Cmp->getPredicate(), FC->convert(Cmp->getOperand(0)), FC->convert(Cmp->getOperand(1)), ""), Inst); Cmp2->takeName(Cmp); Cmp->replaceAllUsesWith(Cmp2); Cmp->eraseFromParent(); } else if (LoadInst *Load = dyn_cast(Inst)) { Value *Ptr = FC->convertBackToPtr(Load->getPointerOperand(), Inst); LoadInst *Result = new LoadInst(Ptr, "", Inst); Result->takeName(Inst); CopyDebug(Result, Inst); CopyLoadOrStoreAttrs(Result, Load); FC->recordConvertedAndErase(Inst, Result); } else if (StoreInst *Store = dyn_cast(Inst)) { Value *Ptr = FC->convertBackToPtr(Store->getPointerOperand(), Inst); StoreInst *Result = new StoreInst(FC->convert(Store->getValueOperand()), Ptr, Inst); CopyDebug(Result, Inst); CopyLoadOrStoreAttrs(Result, Store); Inst->eraseFromParent(); } else if (CallInst *Call = dyn_cast(Inst)) { if (IntrinsicInst *ICall = dyn_cast(Inst)) { if (ICall->getIntrinsicID() == Intrinsic::lifetime_start || ICall->getIntrinsicID() == Intrinsic::lifetime_end || ICall->getIntrinsicID() == Intrinsic::invariant_start || ICall->getIntrinsicID() == Intrinsic::invariant_end) { // Remove alloca lifetime markers for now. This is because // the GVN pass can introduce lifetime markers taking PHI // nodes as arguments. If ReplacePtrsWithInts converts the // PHI node to int type, we will render those lifetime markers // ineffective. But dropping a subset of lifetime markers is // not safe in general. So, until LLVM better defines the // semantics of lifetime markers, we drop them all. See: // https://code.google.com/p/nativeclient/issues/detail?id=3443 // We do the same for invariant.start/end because they work in // a similar way. Inst->eraseFromParent(); } else { FC->convertInPlace(Inst); } } else if (isa(Call->getCalledValue())) { FC->convertInPlace(Inst); } else { SmallVector Args; for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) Args.push_back(FC->convert(Call->getArgOperand(I))); CallInst *NewCall = CallInst::Create( FC->convertFunctionPtr(Call->getCalledValue(), Call), Args, "", Inst); CopyDebug(NewCall, Call); NewCall->setAttributes(RemovePointerAttrs(Call->getContext(), Call->getAttributes())); NewCall->setCallingConv(Call->getCallingConv()); NewCall->takeName(Call); FC->recordConvertedAndErase(Call, NewCall); } } else if (InvokeInst *Call = dyn_cast(Inst)) { SmallVector Args; for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) Args.push_back(FC->convert(Call->getArgOperand(I))); InvokeInst *NewCall = InvokeInst::Create( FC->convertFunctionPtr(Call->getCalledValue(), Call), Call->getNormalDest(), Call->getUnwindDest(), Args, "", Inst); CopyDebug(NewCall, Call); NewCall->setAttributes(RemovePointerAttrs(Call->getContext(), Call->getAttributes())); NewCall->setCallingConv(Call->getCallingConv()); NewCall->takeName(Call); FC->recordConvertedAndErase(Call, NewCall); } else if (AllocaInst *Alloca = dyn_cast(Inst)) { Type *ElementTy = Inst->getType()->getPointerElementType(); Constant *ElementSize = ConstantInt::get(IntPtrType, DL->getTypeAllocSize(ElementTy)); // Expand out alloca's built-in multiplication. Value *MulSize; if (ConstantInt *C = dyn_cast(Alloca->getArraySize())) { MulSize = ConstantExpr::getMul(ElementSize, C); } else { MulSize = BinaryOperator::Create( Instruction::Mul, ElementSize, Alloca->getArraySize(), Alloca->getName() + ".alloca_mul", Alloca); } unsigned Alignment = Alloca->getAlignment(); if (Alignment == 0) Alignment = DL->getPrefTypeAlignment(ElementTy); Value *Tmp = CopyDebug(new AllocaInst(Type::getInt8Ty(Inst->getContext()), MulSize, Alignment, "", Inst), Inst); Tmp->takeName(Alloca); Value *Alloca2 = new PtrToIntInst(Tmp, IntPtrType, Tmp->getName() + ".asint", Inst); FC->recordConvertedAndErase(Alloca, Alloca2); } else if (// Handle these instructions as a convenience to allow // the pass to be used in more situations, even though we // don't expect them in PNaCl's stable ABI. isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || // These atomics only operate on integer pointers, not // other pointers, so we don't need to recreate the // instruction. isa(Inst) || isa(Inst)) { FC->convertInPlace(Inst); } } // Convert ptrtoint+inttoptr to a bitcast because it's shorter and // because some intrinsics work on bitcasts but not on // ptrtoint+inttoptr, in particular: // * llvm.lifetime.start/end (although we strip these out) // * llvm.eh.typeid.for static void SimplifyCasts(Instruction *Inst, Type *IntPtrType) { if (IntToPtrInst *Cast1 = dyn_cast(Inst)) { if (PtrToIntInst *Cast2 = dyn_cast(Cast1->getOperand(0))) { assert(Cast2->getType() == IntPtrType); Value *V = Cast2->getPointerOperand(); if (V->getType() != Cast1->getType()) V = new BitCastInst(V, Cast1->getType(), V->getName() + ".bc", Cast1); Cast1->replaceAllUsesWith(V); if (Cast1->use_empty()) Cast1->eraseFromParent(); if (Cast2->use_empty()) Cast2->eraseFromParent(); } } } static void CleanUpFunction(Function *Func, Type *IntPtrType) { // Remove the ptrtoint/bitcast ConstantExprs we introduced for // referencing globals. FunctionPass *Pass = createExpandConstantExprPass(); Pass->runOnFunction(*Func); delete Pass; for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { SimplifyCasts(Iter++, IntPtrType); } } // Cleanup pass. for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { Instruction *Inst = Iter++; // Add names to inttoptrs to make the output more readable. The // placeholder values get in the way of doing this earlier when // the inttoptrs are created. if (isa(Inst)) Inst->setName(Inst->getOperand(0)->getName() + ".asptr"); // Remove ptrtoints that were introduced for allocas but not used. if (isa(Inst) && Inst->use_empty()) Inst->eraseFromParent(); } } } char ReplacePtrsWithInts::ID = 0; INITIALIZE_PASS(ReplacePtrsWithInts, "replace-ptrs-with-ints", "Convert pointer values to integer values", false, false) bool ReplacePtrsWithInts::runOnModule(Module &M) { DataLayout DL(&M); Type *IntPtrType = DL.getIntPtrType(M.getContext()); for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) { Function *OldFunc = Iter++; // Intrinsics' types must be left alone. if (OldFunc->isIntrinsic()) continue; FunctionConverter FC(IntPtrType); FunctionType *NFTy = FC.convertFuncType(OldFunc->getFunctionType()); OldFunc->setAttributes(RemovePointerAttrs(M.getContext(), OldFunc->getAttributes())); Function *NewFunc = RecreateFunction(OldFunc, NFTy); // Move the arguments across to the new function. for (Function::arg_iterator Arg = OldFunc->arg_begin(), E = OldFunc->arg_end(), NewArg = NewFunc->arg_begin(); Arg != E; ++Arg, ++NewArg) { FC.recordConverted(Arg, NewArg); NewArg->takeName(Arg); } // Convert the function body. for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); BB != E; ++BB) { for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) { ConvertInstruction(&DL, IntPtrType, &FC, Iter++); } } // Now that all the replacement instructions have been created, we // can update the debug intrinsic calls. for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end(); BB != E; ++BB) { for (BasicBlock::iterator Inst = BB->begin(), E = BB->end(); Inst != E; ++Inst) { if (IntrinsicInst *Call = dyn_cast(Inst)) { if (Call->getIntrinsicID() == Intrinsic::dbg_declare) { ConvertMetadataOperand(&FC, Call, 0); } } } } FC.eraseReplacedInstructions(); OldFunc->eraseFromParent(); } // Now that all functions have their normalized types, we can remove // various casts. for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) { CleanUpFunction(Func, IntPtrType); // Delete the now-unused bitcast ConstantExprs that we created so // that they don't interfere with StripDeadPrototypes. Func->removeDeadConstantUsers(); } return true; } ModulePass *llvm::createReplacePtrsWithIntsPass() { return new ReplacePtrsWithInts(); }