diff options
Diffstat (limited to 'lib/Bytecode/Reader/Reader.cpp')
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 2343 |
1 files changed, 2343 insertions, 0 deletions
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp new file mode 100644 index 0000000000..daf7577cf0 --- /dev/null +++ b/lib/Bytecode/Reader/Reader.cpp @@ -0,0 +1,2343 @@ +//===- Reader.cpp - Code to read bytecode files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Allow passing in an option to ignore the symbol table +// +//===----------------------------------------------------------------------===// + +#include "Reader.h" +#include "llvm/Bytecode/BytecodeHandler.h" +#include "llvm/BasicBlock.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/SymbolTable.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Config/alloca.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/Compressor.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/StringExtras.h" +#include <sstream> +#include <algorithm> +using namespace llvm; + +namespace { + /// @brief A class for maintaining the slot number definition + /// as a placeholder for the actual definition for forward constants defs. + class ConstantPlaceHolder : public ConstantExpr { + ConstantPlaceHolder(); // DO NOT IMPLEMENT + void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT + public: + Use Op; + ConstantPlaceHolder(const Type *Ty) + : ConstantExpr(Ty, Instruction::UserOp1, &Op, 1), + Op(UndefValue::get(Type::IntTy), this) { + } + }; +} + +// Provide some details on error +inline void BytecodeReader::error(std::string err) { + err += " (Vers=" ; + err += itostr(RevisionNum) ; + err += ", Pos=" ; + err += itostr(At-MemStart); + err += ")"; + throw err; +} + +//===----------------------------------------------------------------------===// +// Bytecode Reading Methods +//===----------------------------------------------------------------------===// + +/// Determine if the current block being read contains any more data. +inline bool BytecodeReader::moreInBlock() { + return At < BlockEnd; +} + +/// Throw an error if we've read past the end of the current block +inline void BytecodeReader::checkPastBlockEnd(const char * block_name) { + if (At > BlockEnd) + error(std::string("Attempt to read past the end of ") + block_name + + " block."); +} + +/// Align the buffer position to a 32 bit boundary +inline void BytecodeReader::align32() { + if (hasAlignment) { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if (At > Save) + if (Handler) Handler->handleAlignment(At - Save); + if (At > BlockEnd) + error("Ran out of data while aligning!"); + } +} + +/// Read a whole unsigned integer +inline unsigned BytecodeReader::read_uint() { + if (At+4 > BlockEnd) + error("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +/// Read a variable-bit-rate encoded unsigned integer +inline unsigned BytecodeReader::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + error("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (Handler) Handler->handleVBR32(At-Save); + return Result; +} + +/// Read a variable-bit-rate encoded unsigned 64-bit integer. +inline uint64_t BytecodeReader::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + error("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (Handler) Handler->handleVBR64(At-Save); + return Result; +} + +/// Read a variable-bit-rate encoded signed 64-bit integer. +inline int64_t BytecodeReader::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} + +/// Read a pascal-style string (length followed by text) +inline std::string BytecodeReader::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + error("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +/// Read an arbitrary block of data +inline void BytecodeReader::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + error("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} + +/// Read a float value in little-endian order +inline void BytecodeReader::read_float(float& FloatVal) { + /// FIXME: This isn't optimal, it has size problems on some platforms + /// where FP is not IEEE. + FloatVal = BitsToFloat(At[0] | (At[1] << 8) | (At[2] << 16) | (At[3] << 24)); + At+=sizeof(uint32_t); +} + +/// Read a double value in little-endian order +inline void BytecodeReader::read_double(double& DoubleVal) { + /// FIXME: This isn't optimal, it has size problems on some platforms + /// where FP is not IEEE. + DoubleVal = BitsToDouble((uint64_t(At[0]) << 0) | (uint64_t(At[1]) << 8) | + (uint64_t(At[2]) << 16) | (uint64_t(At[3]) << 24) | + (uint64_t(At[4]) << 32) | (uint64_t(At[5]) << 40) | + (uint64_t(At[6]) << 48) | (uint64_t(At[7]) << 56)); + At+=sizeof(uint64_t); +} + +/// Read a block header and obtain its type and size +inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { + if ( hasLongBlockHeaders ) { + Type = read_uint(); + Size = read_uint(); + switch (Type) { + case BytecodeFormat::Reserved_DoNotUse : + error("Reserved_DoNotUse used as Module Type?"); + Type = BytecodeFormat::ModuleBlockID; break; + case BytecodeFormat::Module: + Type = BytecodeFormat::ModuleBlockID; break; + case BytecodeFormat::Function: + Type = BytecodeFormat::FunctionBlockID; break; + case BytecodeFormat::ConstantPool: + Type = BytecodeFormat::ConstantPoolBlockID; break; + case BytecodeFormat::SymbolTable: + Type = BytecodeFormat::SymbolTableBlockID; break; + case BytecodeFormat::ModuleGlobalInfo: + Type = BytecodeFormat::ModuleGlobalInfoBlockID; break; + case BytecodeFormat::GlobalTypePlane: + Type = BytecodeFormat::GlobalTypePlaneBlockID; break; + case BytecodeFormat::InstructionList: + Type = BytecodeFormat::InstructionListBlockID; break; + case BytecodeFormat::CompactionTable: + Type = BytecodeFormat::CompactionTableBlockID; break; + case BytecodeFormat::BasicBlock: + /// This block type isn't used after version 1.1. However, we have to + /// still allow the value in case this is an old bc format file. + /// We just let its value creep thru. + break; + default: + error("Invalid block id found: " + utostr(Type)); + break; + } + } else { + Size = read_uint(); + Type = Size & 0x1F; // mask low order five bits + Size >>= 5; // get rid of five low order bits, leaving high 27 + } + BlockStart = At; + if (At + Size > BlockEnd) + error("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + if (Handler) Handler->handleBlock(Type, BlockStart, Size); +} + + +/// In LLVM 1.2 and before, Types were derived from Value and so they were +/// written as part of the type planes along with any other Value. In LLVM +/// 1.3 this changed so that Type does not derive from Value. Consequently, +/// the BytecodeReader's containers for Values can't contain Types because +/// there's no inheritance relationship. This means that the "Type Type" +/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3 +/// whenever a bytecode construct must have both types and values together, +/// the types are always read/written first and then the Values. Furthermore +/// since Type::TypeTyID no longer exists, its value (12) now corresponds to +/// Type::LabelTyID. In order to overcome this we must "sanitize" all the +/// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change. +/// For LLVM 1.2 and before, this function will decrement the type id by +/// one to account for the missing Type::TypeTyID enumerator if the value is +/// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this +/// function returns true, otherwise false. This helps detect situations +/// where the pre 1.3 bytecode is indicating that what follows is a type. +/// @returns true iff type id corresponds to pre 1.3 "type type" +inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) { + if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later + if (TypeId == Type::LabelTyID) { + TypeId = Type::VoidTyID; // sanitize it + return true; // indicate we got TypeTyID in pre 1.3 bytecode + } else if (TypeId > Type::LabelTyID) + --TypeId; // shift all planes down because type type plane is missing + } + return false; +} + +/// Reads a vbr uint to read in a type id and does the necessary +/// conversion on it by calling sanitizeTypeId. +/// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type" +/// @see sanitizeTypeId +inline bool BytecodeReader::read_typeid(unsigned &TypeId) { + TypeId = read_vbr_uint(); + if ( !has32BitTypes ) + if ( TypeId == 0x00FFFFFF ) + TypeId = read_vbr_uint(); + return sanitizeTypeId(TypeId); +} + +//===----------------------------------------------------------------------===// +// IR Lookup Methods +//===----------------------------------------------------------------------===// + +/// Determine if a type id has an implicit null value +inline bool BytecodeReader::hasImplicitNull(unsigned TyID) { + if (!hasExplicitPrimitiveZeros) + return TyID != Type::LabelTyID && TyID != Type::VoidTyID; + return TyID >= Type::FirstDerivedTyID; +} + +/// Obtain a type given a typeid and account for things like compaction tables, +/// function level vs module level, and the offsetting for the primitive types. +const Type *BytecodeReader::getType(unsigned ID) { + if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID)) + return T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + ID -= Type::FirstDerivedTyID; + + if (!CompactionTypes.empty()) { + if (ID >= CompactionTypes.size()) + error("Type ID out of range for compaction table!"); + return CompactionTypes[ID].first; + } + + // Is it a module-level type? + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); + + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); + + error("Illegal type reference!"); + return Type::VoidTy; +} + +/// Get a sanitized type id. This just makes sure that the \p ID +/// is both sanitized and not the "type type" of pre-1.3 bytecode. +/// @see sanitizeTypeId +inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) { + if (sanitizeTypeId(ID)) + error("Invalid type id encountered"); + return getType(ID); +} + +/// This method just saves some coding. It uses read_typeid to read +/// in a sanitized type id, errors that its not the type type, and +/// then calls getType to return the type value. +inline const Type* BytecodeReader::readSanitizedType() { + unsigned ID; + if (read_typeid(ID)) + error("Invalid type id encountered"); + return getType(ID); +} + +/// Get the slot number associated with a type accounting for primitive +/// types, compaction tables, and function level vs module level. +unsigned BytecodeReader::getTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); + + // Scan the compaction table for the type if needed. + if (!CompactionTypes.empty()) { + for (unsigned i = 0, e = CompactionTypes.size(); i != e; ++i) + if (CompactionTypes[i].first == Ty) + return Type::FirstDerivedTyID + i; + + error("Couldn't find type specified in compaction table!"); + } + + // Check the function level types first... + TypeListTy::iterator I = std::find(FunctionTypes.begin(), + FunctionTypes.end(), Ty); + + if (I != FunctionTypes.end()) + return Type::FirstDerivedTyID + ModuleTypes.size() + + (&*I - &FunctionTypes[0]); + + // If we don't have our cache yet, build it now. + if (ModuleTypeIDCache.empty()) { + unsigned N = 0; + ModuleTypeIDCache.reserve(ModuleTypes.size()); + for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end(); + I != E; ++I, ++N) + ModuleTypeIDCache.push_back(std::make_pair(*I, N)); + + std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end()); + } + + // Binary search the cache for the entry. + std::vector<std::pair<const Type*, unsigned> >::iterator IT = + std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(), + std::make_pair(Ty, 0U)); + if (IT == ModuleTypeIDCache.end() || IT->first != Ty) + error("Didn't find type in ModuleTypes."); + + return Type::FirstDerivedTyID + IT->second; +} + +/// This is just like getType, but when a compaction table is in use, it is +/// ignored. It also ignores function level types. +/// @see getType +const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot); + if (!Ty) + error("Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypes.size()) + error("Illegal compaction table type reference!"); + return ModuleTypes[Slot]; +} + +/// This is just like getTypeSlot, but when a compaction table is in use, it +/// is ignored. It also ignores function level types. +unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); + + // If we don't have our cache yet, build it now. + if (ModuleTypeIDCache.empty()) { + unsigned N = 0; + ModuleTypeIDCache.reserve(ModuleTypes.size()); + for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end(); + I != E; ++I, ++N) + ModuleTypeIDCache.push_back(std::make_pair(*I, N)); + + std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end()); + } + + // Binary search the cache for the entry. + std::vector<std::pair<const Type*, unsigned> >::iterator IT = + std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(), + std::make_pair(Ty, 0U)); + if (IT == ModuleTypeIDCache.end() || IT->first != Ty) + error("Didn't find type in ModuleTypes."); + + return Type::FirstDerivedTyID + IT->second; +} + +/// Retrieve a value of a given type and slot number, possibly creating +/// it if it doesn't already exist. +Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { + assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); + unsigned Num = oNum; + + // If there is a compaction table active, it defines the low-level numbers. + // If not, the module values define the low-level numbers. + if (CompactionValues.size() > type && !CompactionValues[type].empty()) { + if (Num < CompactionValues[type].size()) + return CompactionValues[type][Num]; + Num -= CompactionValues[type].size(); + } else { + // By default, the global type id is the type id passed in + unsigned GlobalTyID = type; + + // If the type plane was compactified, figure out the global type ID by + // adding the derived type ids and the distance. + if (!CompactionTypes.empty() && type >= Type::FirstDerivedTyID) + GlobalTyID = CompactionTypes[type-Type::FirstDerivedTyID].second; + + if (hasImplicitNull(GlobalTyID)) { + const Type *Ty = getType(type); + if (!isa<OpaqueType>(Ty)) { + if (Num == 0) + return Constant::getNullValue(Ty); + --Num; + } + } + + if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) { + if (Num < ModuleValues[GlobalTyID]->size()) + return ModuleValues[GlobalTyID]->getOperand(Num); + Num -= ModuleValues[GlobalTyID]->size(); + } + } + + if (FunctionValues.size() > type && + FunctionValues[type] && + Num < FunctionValues[type]->size()) + return FunctionValues[type]->getOperand(Num); + + if (!Create) return 0; // Do not create a placeholder? + + // Did we already create a place holder? + std::pair<unsigned,unsigned> KeyValue(type, oNum); + ForwardReferenceMap::iterator I = ForwardReferences.lower_bound(KeyValue); + if (I != ForwardReferences.end() && I->first == KeyValue) + return I->second; // We have already created this placeholder + + // If the type exists (it should) + if (const Type* Ty = getType(type)) { + // Create the place holder + Value *Val = new Argument(Ty); + ForwardReferences.insert(I, std::make_pair(KeyValue, Val)); + return Val; + } + throw "Can't create placeholder for value of type slot #" + utostr(type); +} + +/// This is just like getValue, but when a compaction table is in use, it +/// is ignored. Also, no forward references or other fancy features are +/// supported. +Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) { + if (SlotNo == 0) + return Constant::getNullValue(getType(TyID)); + + if (!CompactionTypes.empty() && TyID >= Type::FirstDerivedTyID) { + TyID -= Type::FirstDerivedTyID; + if (TyID >= CompactionTypes.size()) + error("Type ID out of range for compaction table!"); + TyID = CompactionTypes[TyID].second; + } + + --SlotNo; + + if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 || + SlotNo >= ModuleValues[TyID]->size()) { + if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0) + error("Corrupt compaction table entry!" + + utostr(TyID) + ", " + utostr(SlotNo) + ": " + + utostr(ModuleValues.size())); + else + error("Corrupt compaction table entry!" + + utostr(TyID) + ", " + utostr(SlotNo) + ": " + + utostr(ModuleValues.size()) + ", " + + utohexstr(reinterpret_cast<uint64_t>(((void*)ModuleValues[TyID]))) + + ", " + + utostr(ModuleValues[TyID]->size())); + } + return ModuleValues[TyID]->getOperand(SlotNo); +} + +/// Just like getValue, except that it returns a null pointer +/// only on error. It always returns a constant (meaning that if the value is +/// defined, but is not a constant, that is an error). If the specified +/// constant hasn't been parsed yet, a placeholder is defined and used. +/// Later, after the real value is parsed, the placeholder is eliminated. +Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { + if (Value *V = getValue(TypeSlot, Slot, false)) + if (Constant *C = dyn_cast<Constant>(V)) + return C; // If we already have the value parsed, just return it + else + error("Value for slot " + utostr(Slot) + + " is expected to be a constant!"); + + std::pair<unsigned, unsigned> Key(TypeSlot, Slot); + ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key); + + if (I != ConstantFwdRefs.end() && I->first == Key) { + return I->second; + } else { + // Create a placeholder for the constant reference and + // keep track of the fact that we have a forward ref to recycle it + Constant *C = new ConstantPlaceHolder(getType(TypeSlot)); + + // Keep track of the fact that we have a forward ref to recycle it + ConstantFwdRefs.insert(I, std::make_pair(Key, C)); + return C; + } +} + +//===----------------------------------------------------------------------===// +// IR Construction Methods +//===----------------------------------------------------------------------===// + +/// As values are created, they are inserted into the appropriate place +/// with this method. The ValueTable argument must be one of ModuleValues +/// or FunctionValues data members of this class. +unsigned BytecodeReader::insertValue(Value *Val, unsigned type, + ValueTable &ValueTab) { + assert((!isa<Constant>(Val) || !cast<Constant>(Val)->isNullValue()) || + !hasImplicitNull(type) && + "Cannot read null values from bytecode!"); + + if (ValueTab.size() <= type) + ValueTab.resize(type+1); + + if (!ValueTab[type]) ValueTab[type] = new ValueList(); + + ValueTab[type]->push_back(Val); + + bool HasOffset = hasImplicitNull(type) && !isa<OpaqueType>(Val->getType()); + return ValueTab[type]->size()-1 + HasOffset; +} + +/// Insert the arguments of a function as new values in the reader. +void BytecodeReader::insertArguments(Function* F) { + const FunctionType *FT = F->getFunctionType(); + Function::arg_iterator AI = F->arg_begin(); + for (FunctionType::param_iterator It = FT->param_begin(); + It != FT->param_end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), FunctionValues); +} + +//===----------------------------------------------------------------------===// +// Bytecode Parsing Methods +//===----------------------------------------------------------------------===// + +/// This method parses a single instruction. The instruction is +/// inserted at the end of the \p BB provided. The arguments of +/// the instruction are provided in the \p Oprnds vector. +void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, + BasicBlock* BB) { + BufPtr SaveAt = At; + + // Clear instruction data + Oprnds.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read_uint(); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Oprnds.resize((Op >> 0) & 03); + + // Extract the operands + switch (Oprnds.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Oprnds[0] = (Op >> 20) & 4095; + if (Oprnds[0] == 4095) // Handle special encoding for 0 operands... + Oprnds.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Oprnds[0] = (Op >> 16) & 255; + Oprnds[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Oprnds[0] = (Op >> 14) & 63; + Oprnds[1] = (Op >> 20) & 63; + Oprnds[2] = (Op >> 26) & 63; + break; + case 0: + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); + Opcode >>= 2; + iType = read_vbr_uint(); + + unsigned NumOprnds = read_vbr_uint(); + Oprnds.resize(NumOprnds); + + if (NumOprnds == 0) + error("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOprnds; ++i) + Oprnds[i] = read_vbr_uint(); + align32(); + break; + } + + const Type *InstTy = getSanitizedType(iType); + + // We have enough info to inform the handler now. + if (Handler) Handler->handleInstruction(Opcode, InstTy, Oprnds, At-SaveAt); + + // Declare the resulting instruction we'll build. + Instruction *Result = 0; + + // If this is a bytecode format that did not include the unreachable + // instruction, bump up all opcodes numbers to make space. + if (hasNoUnreachableInst) { + if (Opcode >= Instruction::Unreachable && + Opcode < 62) { + ++Opcode; + } + } + + // Handle binary operators + if (Opcode >= Instruction::BinaryOpsBegin && + Opcode < Instruction::BinaryOpsEnd && Oprnds.size() == 2) + Result = BinaryOperator::create((Instruction::BinaryOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(iType, Oprnds[1])); + + switch (Opcode) { + default: + if (Result == 0) + error("Illegal instruction read!"); + break; + case Instruction::VAArg: + Result = new VAArgInst(getValue(iType, Oprnds[0]), + getSanitizedType(Oprnds[1])); + break; + case 32: { //VANext_old + const Type* ArgTy = getValue(iType, Oprnds[0])->getType(); + Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy, + (Type *)0); + + //b = vanext a, t -> + //foo = alloca 1 of t + //bar = vacopy a + //store bar -> foo + //tmp = vaarg foo, t + //b = load foo + AllocaInst* foo = new AllocaInst(ArgTy, 0, "vanext.fix"); + BB->getInstList().push_back(foo); + CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0])); + BB->getInstList().push_back(bar); + BB->getInstList().push_back(new StoreInst(bar, foo)); + Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1])); + BB->getInstList().push_back(tmp); + Result = new LoadInst(foo); + break; + } + case 33: { //VAArg_old + const Type* ArgTy = getValue(iType, Oprnds[0])->getType(); + Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy, + (Type *)0); + + //b = vaarg a, t -> + //foo = alloca 1 of t + //bar = vacopy a + //store bar -> foo + //b = vaarg foo, t + AllocaInst* foo = new AllocaInst(ArgTy, 0, "vaarg.fix"); + BB->getInstList().push_back(foo); + CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0])); + BB->getInstList().push_back(bar); + BB->getInstList().push_back(new StoreInst(bar, foo)); + Result = new VAArgInst(foo, getSanitizedType(Oprnds[1])); + break; + } + case Instruction::Cast: + Result = new CastInst(getValue(iType, Oprnds[0]), + getSanitizedType(Oprnds[1])); + break; + case Instruction::Select: + Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]), + getValue(iType, Oprnds[1]), + getValue(iType, Oprnds[2])); + break; + case Instruction::PHI: { + if (Oprnds.size() == 0 || (Oprnds.size() & 1)) + error("Invalid phi node encountered!"); + + PHINode *PN = new PHINode(InstTy); + PN->reserveOperandSpace(Oprnds.size()); + for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2) + PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1])); + Result = PN; + break; + } + + case Instruction::Shl: + case Instruction::Shr: + Result = new ShiftInst((Instruction::OtherOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(Type::UByteTyID, Oprnds[1])); + break; + case Instruction::Ret: + if (Oprnds.size() == 0) + Result = new ReturnInst(); + else if (Oprnds.size() == 1) + Result = new ReturnInst(getValue(iType, Oprnds[0])); + else + error("Unrecognized instruction!"); + break; + + case Instruction::Br: + if (Oprnds.size() == 1) + Result = new BranchInst(getBasicBlock(Oprnds[0])); + else if (Oprnds.size() == 3) + Result = new BranchInst(getBasicBlock(Oprnds[0]), + getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); + else + error("Invalid number of operands for a 'br' instruction!"); + break; + case Instruction::Switch: { + if (Oprnds.size() & 1) + error("Switch statement with odd number of arguments!"); + + SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]), + getBasicBlock(Oprnds[1]), + Oprnds.size()/2-1); + for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2) + I->addCase(cast<ConstantInt>(getValue(iType, Oprnds[i])), + getBasicBlock(Oprnds[i+1])); + Result = I; + break; + } + + case 58: // Call with extra operand for calling conv + case 59: // tail call, Fast CC + case 60: // normal call, Fast CC + case 61: // tail call, C Calling Conv + case Instruction::Call: { // Normal Call, C Calling Convention + if (Oprnds.size() == 0) + error("Invalid call instruction encountered!"); + + Value *F = getValue(iType, Oprnds[0]); + + unsigned CallingConv = CallingConv::C; + bool isTailCall = false; + + if (Opcode == 61 || Opcode == 59) + isTailCall = true; + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast<PointerType>(F->getType()); + if (PTy == 0) error("Call to non function pointer value!"); + const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); + if (FTy == 0) error("Call to non function pointer value!"); + + std::vector<Value *> Params; + if (!FTy->isVarArg()) { + FunctionType::param_iterator It = FTy->param_begin(); + + if (Opcode == 58) { + isTailCall = Oprnds.back() & 1; + CallingConv = Oprnds.back() >> 1; + Oprnds.pop_back(); + } else if (Opcode == 59 || Opcode == 60) + CallingConv = CallingConv::Fast; + + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + error("Invalid call instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + error("Invalid call instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + unsigned FirstVariableOperand; + if (Oprnds.size() < FTy->getNumParams()) + error("Call instruction missing operands!"); + + // Read all of the fixed arguments + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i])); + + FirstVariableOperand = FTy->getNumParams(); + + if ((Oprnds.size()-FirstVariableOperand) & 1) + error("Invalid call instruction!"); // Must be pairs of type/value + + for (unsigned i = FirstVariableOperand, e = Oprnds.size(); + i != e; i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new CallInst(F, Params); + if (isTailCall) cast<CallInst>(Result)->setTailCall(); + if (CallingConv) cast<CallInst>(Result)->setCallingConv(CallingConv); + break; + } + case 56: // Invoke with encoded CC + case 57: // Invoke Fast CC + case Instruction::Invoke: { // Invoke C CC + if (Oprnds.size() < 3) + error("Invalid invoke instruction!"); + Value *F = getValue(iType, Oprnds[0]); + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast<PointerType>(F->getType()); + if (PTy == 0) + error("Invoke to non function pointer value!"); + const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); + if (FTy == 0) + error("Invoke to non function pointer value!"); + + std::vector<Value *> Params; + BasicBlock *Normal, *Except; + unsigned CallingConv = CallingConv::C; + + if (Opcode == 57) + CallingConv = CallingConv::Fast; + else if (Opcode == 56) { + CallingConv = Oprnds.back(); + Oprnds.pop_back(); + } + + if (!FTy->isVarArg()) { + Normal = getBasicBlock(Oprnds[1]); + Except = getBasicBlock(Oprnds[2]); + + FunctionType::param_iterator It = FTy->param_begin(); + for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + error("Invalid invoke instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + error("Invalid invoke instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + Normal = getBasicBlock(Oprnds[0]); + Except = getBasicBlock(Oprnds[1]); + + unsigned FirstVariableArgument = FTy->getNumParams()+2; + for (unsigned i = 2; i != FirstVariableArgument; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)), + Oprnds[i])); + + if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs + error("Invalid invoke instruction!"); + + for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new InvokeInst(F, Normal, Except, Params); + if (CallingConv) cast<InvokeInst>(Result)->setCallingConv(CallingConv); + break; + } + case Instruction::Malloc: + if (Oprnds.size() > 2) + error("Invalid malloc instruction!"); + if (!isa<PointerType>(InstTy)) + error("Invalid malloc instruction!"); + + Result = new MallocInst(cast<PointerType>(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) : 0); + break; + + case Instruction::Alloca: + if (Oprnds.size() > 2) + error("Invalid alloca instruction!"); + if (!isa<PointerType>(InstTy)) + error("Invalid alloca instruction!"); + + Result = new AllocaInst(cast<PointerType>(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) :0); + break; + case Instruction::Free: + if (!isa<PointerType>(InstTy)) + error("Invalid free instruction!"); + Result = new FreeInst(getValue(iType, Oprnds[0])); + break; + case Instruction::GetElementPtr: { + if (Oprnds.size() == 0 || !isa<PointerType>(InstTy)) + error("Invalid getelementptr instruction!"); + + std::vector<Value*> Idx; + + const Type *NextTy = InstTy; + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy); + if (!TopTy) + error("Invalid getelementptr instruction!"); + + unsigned ValIdx = Oprnds[i]; + unsigned IdxTy = 0; + if (!hasRestrictedGEPTypes) { + // Struct indices are always uints, sequential type indices can be any + // of the 32 or 64-bit integer types. The actual choice of type is + // encoded in the low two bits of the slot number. + if (isa<StructType>(TopTy)) + IdxTy = Type::UIntTyID; + else { + switch (ValIdx & 3) { + default: + case 0: IdxTy = Type::UIntTyID; break; + case 1: IdxTy = Type::IntTyID; break; + case 2: IdxTy = Type::ULongTyID; break; + case 3: IdxTy = Type::LongTyID; break; + } + ValIdx >>= 2; + } + } else { + IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID; + } + + Idx.push_back(getValue(IdxTy, ValIdx)); + + // Convert ubyte struct indices into uint struct indices. + if (isa<StructType>(TopTy) && hasRestrictedGEPTypes) + if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back())) + Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy); + + NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true); + } + + Result = new GetElementPtrInst(getValue(iType, Oprnds[0]), Idx); + break; + } + + case 62: // volatile load + case Instruction::Load: + if (Oprnds.size() != 1 || !isa<PointerType>(InstTy)) + error("Invalid load instruction!"); + Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62); + break; + + case 63: // volatile store + case Instruction::Store: { + if (!isa<PointerType>(InstTy) || Oprnds.size() != 2) + error("Invalid store instruction!"); + + Value *Ptr = getValue(iType, Oprnds[1]); + const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType(); + Result = new StoreInst(getValue(getTypeSlot(ValTy), Oprnds[0]), Ptr, + Opcode == 63); + break; + } + case Instruction::Unwind: + if (Oprnds.size() != 0) error("Invalid unwind instruction!"); + Result = new UnwindInst(); + break; + case Instruction::Unreachable: + if (Oprnds.size() != 0) error("Invalid unreachable instruction!"); + Result = new UnreachableInst(); + break; + } // end switch(Opcode) + + unsigned TypeSlot; + if (Result->getType() == InstTy) + TypeSlot = iType; + else + TypeSlot = getTypeSlot(Result->getType()); + + insertValue(Result, TypeSlot, FunctionValues); + BB->getInstList().push_back(Result); +} + +/// Get a particular numbered basic block, which might be a forward reference. +/// This works together with ParseBasicBlock to handle these forward references +/// in a clean manner. This function is used when constructing phi, br, switch, +/// and other instructions that reference basic blocks. Blocks are numbered +/// sequentially as they appear in the function. +BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { + // Make sure there is room in the table... + if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); + + // First check to see if this is a backwards reference, i.e., ParseBasicBlock + // has already created this block, or if the forward reference has already + // been created. + if (ParsedBasicBlocks[ID]) + return ParsedBasicBlocks[ID]; + + // Otherwise, the basic block has not yet been created. Do so and add it to + // the ParsedBasicBlocks list. + return ParsedBasicBlocks[ID] = new BasicBlock(); +} + +/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time. +/// This method reads in one of the basicblock packets. This method is not used +/// for bytecode files after LLVM 1.0 +/// @returns The basic block constructed. +BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) { + if (Handler) Handler->handleBasicBlockBegin(BlockNo); + + BasicBlock *BB = 0; + + if (ParsedBasicBlocks.size() == BlockNo) + ParsedBasicBlocks.push_back(BB = new BasicBlock()); + else if (ParsedBasicBlocks[BlockNo] == 0) + BB = ParsedBasicBlocks[BlockNo] = new BasicBlock(); + else + BB = ParsedBasicBlocks[BlockNo]; + + std::vector<unsigned> Operands; + while (moreInBlock()) + ParseInstruction(Operands, BB); + + if (Handler) Handler->handleBasicBlockEnd(BlockNo); + return BB; +} + +/// Parse all of the BasicBlock's & Instruction's in the body of a function. +/// In post 1.0 bytecode files, we no longer emit basic block individually, +/// in order to avoid per-basic-block overhead. +/// @returns Rhe number of basic blocks encountered. +unsigned BytecodeReader::ParseInstructionList(Function* F) { + unsigned BlockNo = 0; + std::vector<unsigned> Args; + + while (moreInBlock()) { + if (Handler) Handler->handleBasicBlockBegin(BlockNo); + BasicBlock *BB; + if (ParsedBasicBlocks.size() == BlockNo) + ParsedBasicBlocks.push_back(BB = new BasicBlock()); + else if (ParsedBasicBlocks[BlockNo] == 0) + BB = ParsedBasicBlocks[BlockNo] = new BasicBlock(); + else + BB = ParsedBasicBlocks[BlockNo]; + ++BlockNo; + F->getBasicBlockList().push_back(BB); + + // Read instructions into this basic block until we get to a terminator + while (moreInBlock() && !BB->getTerminator()) + ParseInstruction(Args, BB); + + if (!BB->getTerminator()) + error("Non-terminated basic block found!"); + + if (Handler) Handler->handleBasicBlockEnd(BlockNo-1); + } + + return BlockNo; +} + +/// Parse a symbol table. This works for both module level and function +/// level symbol tables. For function level symbol tables, the CurrentFunction +/// parameter must be non-zero and the ST parameter must correspond to +/// CurrentFunction's symbol table. For Module level symbol tables, the +/// CurrentFunction argument must be zero. +void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, + SymbolTable *ST) { + if (Handler) Handler->handleSymbolTableBegin(CurrentFunction,ST); + + // Allow efficient basic block lookup by number. + std::vector<BasicBlock*> BBMap; + if (CurrentFunction) + for (Function::iterator I = CurrentFunction->begin(), + E = CurrentFunction->end(); I != E; ++I) + BBMap.push_back(I); + + /// In LLVM 1.3 we write types separately from values so + /// The types are always first in the symbol table. This is + /// because Type no longer derives from Value. + if (!hasTypeDerivedFromValue) { + // Symtab block header: [num entries] + unsigned NumEntries = read_vbr_uint(); + for (unsigned i = 0; i < NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); + const Type* T = getType(slot); + ST->insert(Name, T); + } + } + + while (moreInBlock()) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = 0; + bool isTypeType = read_typeid(Typ); + const Type *Ty = getType(Typ); + + for (unsigned i = 0; i != NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); + + // if we're reading a pre 1.3 bytecode file and the type plane + // is the "type type", handle it here + if (isTypeType) { + const Type* T = getType(slot); + if (T == 0) + error("Failed type look-up for name '" + Name + "'"); + ST->insert(Name, T); + continue; // code below must be short circuited + } else { + Value *V = 0; + if (Typ == Type::LabelTyID) { + if (slot < BBMap.size()) + V = BBMap[slot]; + } else { + V = getValue(Typ, slot, false); // Find mapping... + } + if (V == 0) + error("Failed value look-up for name '" + Name + "'"); + V->setName(Name); + } + } + } + checkPastBlockEnd("Symbol Table"); + if (Handler) Handler->handleSymbolTableEnd(); +} + +/// Read in the types portion of a compaction table. +void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = 0; + if (read_typeid(TypeSlot)) + error("Invalid type in compaction table: type type"); + const Type *Typ = getGlobalTableType(TypeSlot); + CompactionTypes.push_back(std::make_pair(Typ, TypeSlot)); + if (Handler) Handler->handleCompactionTableType(i, TypeSlot, Typ); + } +} + +/// Parse a compaction table. +void BytecodeReader::ParseCompactionTable() { + + // Notify handler that we're beginning a compaction table. + if (Handler) Handler->handleCompactionTableBegin(); + + // In LLVM 1.3 Type no longer derives from Value. So, + // we always write them first in the compaction table + // because they can't occupy a "type plane" where the + // Values reside. + if (! hasTypeDerivedFromValue) { + unsigned NumEntries = read_vbr_uint(); + ParseCompactionTypes(NumEntries); + } + + // Compaction tables live in separate blocks so we have to loop + // until we've read the whole thing. + while (moreInBlock()) { + // Read the number of Value* entries in the compaction table + unsigned NumEntries = read_vbr_uint(); + unsigned Ty = 0; + unsigned isTypeType = false; + + // Decode the type from value read in. Most compaction table + // planes will have one or two entries in them. If that's the + // case then the length is encoded in the bottom two bits and + // the higher bits encode the type. This saves another VBR value. + if ((NumEntries & 3) == 3) { + // In this case, both low-order bits are set (value 3). This + // is a signal that the typeid follows. + NumEntries >>= 2; + isTypeType = read_typeid(Ty); + } else { + // In this case, the low-order bits specify the number of entries + // and the high order bits specify the type. + Ty = NumEntries >> 2; + isTypeType = sanitizeTypeId(Ty); + NumEntries &= 3; + } + + // if we're reading a pre 1.3 bytecode file and the type plane + // is the "type type", handle it here + if (isTypeType) { + ParseCompactionTypes(NumEntries); + } else { + // Make sure we have enough room for the plane. + if (Ty >= CompactionValues.size()) + CompactionValues.resize(Ty+1); + + // Make sure the plane is empty or we have some kind of error. + if (!CompactionValues[Ty].empty()) + error("Compaction table plane contains multiple entries!"); + + // Notify handler about the plane. + if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries); + + // Push the implicit zero. + CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty))); + + // Read in each of the entries, put them in the compaction table + // and notify the handler that we have a new compaction table value. + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned ValSlot = read_vbr_uint(); + Value *V = getGlobalTableValue(Ty, ValSlot); + CompactionValues[Ty].push_back(V); + if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot); + } + } + } + // Notify handler that the compaction table is done. + if (Handler) Handler->handleCompactionTableEnd(); +} + +// Parse a single type. The typeid is read in first. If its a primitive type +// then nothing else needs to be read, we know how to instantiate it. If its +// a derived type, then additional data is read to fill out the type +// definition. +const Type *BytecodeReader::ParseType() { + unsigned PrimType = 0; + if (read_typeid(PrimType)) + error("Invalid type (type type) in type constants!"); + + const Type *Result = 0; + if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType))) + return Result; + + switch (PrimType) { + case Type::FunctionTyID: { + const Type *RetType = readSanitizedType(); + + unsigned NumParams = read_vbr_uint(); + + std::vector<const Type*> Params; + while (NumParams--) + Params.push_back(readSanitizedType()); + + bool isVarArg = Params.size() && Params.back() == Type::VoidTy; + if (isVarArg) Params.pop_back(); + + Result = FunctionType::get(RetType, Params, isVarArg); + break; + } + case Type::ArrayTyID: { + const Type *ElementType = readSanitizedType(); + unsigned NumElements = read_vbr_uint(); + Result = ArrayType::get(ElementType, NumElements); + break; + } + case Type::PackedTyID: { + const Type *ElementType = readSanitizedType(); + unsigned NumElements = read_vbr_uint(); + Result = PackedType::get(ElementType, NumElements); + break; + } + case Type::StructTyID: { + std::vector<const Type*> Elements; + unsigned Typ = 0; + if (read_typeid(Typ)) + error("Invalid element type (type type) for structure!"); + + while (Typ) { // List is terminated by void/0 typeid + Elements.push_back(getType(Typ)); + if (read_typeid(Typ)) + error("Invalid element type (type type) for structure!"); + } + + Result = StructType::get(Elements); + break; + } + case Type::PointerTyID: { + Result = PointerType::get(readSanitizedType()); + break; + } + + case Type::OpaqueTyID: { + Result = OpaqueType::get(); + break; + } + + default: + error("Don't know how to deserialize primitive type " + utostr(PrimType)); + break; + } + if (Handler) Handler->handleType(Result); + return Result; +} + +// ParseTypes - We have to use this weird code to handle recursive +// types. We know that recursive types will only reference the current slab of +// values in the type plane, but they can forward reference types before they +// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might +// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix +// this ugly problem, we pessimistically insert an opaque type for each type we +// are about to read. This means that forward references will resolve to +// something and when we reread the type later, we can replace the opaque type +// with a new resolved concrete type. +// +void BytecodeReader::ParseTypes(TypeListTy &Tab, unsigned NumEntries){ + assert(Tab.size() == 0 && "should not have read type constants in before!"); + + // Insert a bunch of opaque types to be resolved later... + Tab.reserve(NumEntries); + for (unsigned i = 0; i != NumEntries; ++i) + Tab.push_back(OpaqueType::get()); + + if (Handler) + Handler->handleTypeList(NumEntries); + + // If we are about to resolve types, make sure the type cache is clear. + if (NumEntries) + ModuleTypeIDCache.clear(); + + // Loop through reading all of the types. Forward types will make use of the + // opaque types just inserted. + // + for (unsigned i = 0; i != NumEntries; ++i) { + const Type* NewTy = ParseType(); + const Type* OldTy = Tab[i].get(); + if (NewTy == 0) + error("Couldn't parse type!"); + + // Don't directly push the new type on the Tab. Instead we want to replace + // the opaque type we previously inserted with the new concrete value. This + // approach helps with forward references to types. The refinement from the + // abstract (opaque) type to the new type causes all uses of the abstract + // type to use the concrete type (NewTy). This will also cause the opaque + // type to be deleted. + cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy); + + // This should have replaced the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the system. + // Let's just make sure it did. + assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); + } +} + +/// Parse a single constant value +Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { + // We must check for a ConstantExpr before switching by type because + // a ConstantExpr can be of any type, and has no explicit value. + // + // 0 if not expr; numArgs if is expr + unsigned isExprNumArgs = read_vbr_uint(); + + if (isExprNumArgs) { + // 'undef' is encoded with 'exprnumargs' == 1. + if (!hasNoUndefValue) + if (--isExprNumArgs == 0) + return UndefValue::get(getType(TypeID)); + + // FIXME: Encoding of constant exprs could be much more compact! + std::vector<Constant*> ArgVec; + ArgVec.reserve(isExprNumArgs); + unsigned Opcode = read_vbr_uint(); + + // Bytecode files before LLVM 1.4 need have a missing terminator inst. + if (hasNoUnreachableInst) Opcode++; + + // Read the slot number and types of each of the arguments + for (unsigned i = 0; i != isExprNumArgs; ++i) { + unsigned ArgValSlot = read_vbr_uint(); + unsigned ArgTypeSlot = 0; + if (read_typeid(ArgTypeSlot)) + error("Invalid argument type (type type) for constant value"); + + // Get the arg value from its slot if it exists, otherwise a placeholder + ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot)); + } + + // Construct a ConstantExpr of the appropriate kind + if (isExprNumArgs == 1) { // All one-operand expressions + if (Opcode != Instruction::Cast) + error("Only cast instruction has one argument for ConstantExpr"); + + Constant* Result = ConstantExpr::getCast(ArgVec[0], getType(TypeID)); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr + std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end()); + + if (hasRestrictedGEPTypes) { + const Type *BaseTy = ArgVec[0]->getType(); + generic_gep_type_iterator<std::vector<Constant*>::iterator> + GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()), + E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end()); + for (unsigned i = 0; GTI != E; ++GTI, ++i) + if (isa<StructType>(*GTI)) { + if (IdxList[i]->getType() != Type::UByteTy) + error("Invalid index for getelementptr!"); + IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy); + } + } + + Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } else if (Opcode == Instruction::Select) { + if (ArgVec.size() != 3) + error("Select instruction must have three arguments."); + Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1], + ArgVec[2]); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } else { // All other 2-operand expressions + Constant* Result = ConstantExpr::get(Opcode, ArgVec[0], ArgVec[1]); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } + } + + // Ok, not an ConstantExpr. We now know how to read the given type... + const Type *Ty = getType(TypeID); + switch (Ty->getTypeID()) { + case Type::BoolTyID: { + unsigned Val = read_vbr_uint(); + if (Val != 0 && Val != 1) + error("Invalid boolean value read."); + Constant* Result = ConstantBool::get(Val == 1); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val = read_vbr_uint(); + if (!ConstantUInt::isValueValidForType(Ty, Val)) + error("Invalid unsigned byte/short/int read."); + Constant* Result = ConstantUInt::get(Ty, Val); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::ULongTyID: { + Constant* Result = ConstantUInt::get(Ty, read_vbr_uint64()); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: { + case Type::LongTyID: + int64_t Val = read_vbr_int64(); + if (!ConstantSInt::isValueValidForType(Ty, Val)) + error("Invalid signed byte/short/int/long read."); + Constant* Result = ConstantSInt::get(Ty, Val); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::FloatTyID: { + float Val; + read_float(Val); + Constant* Result = ConstantFP::get(Ty, Val); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::DoubleTyID: { + double Val; + read_double(Val); + Constant* Result = ConstantFP::get(Ty, Val); + if (Handler) Handler->handleConstantValue(Result); + return Result; + } + + case Type::ArrayTyID: { + const ArrayType *AT = cast<ArrayType>(Ty); + unsigned NumElements = AT->getNumElements(); + unsigned TypeSlot = getTypeSlot(AT->getElementType()); + std::vector<Constant*> Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(getConstantValue(TypeSlot, + read_vbr_uint())); + Constant* Result = ConstantArray::get(AT, Elements); + if (Handler) Handler->handleConstantArray(AT, Elements, TypeSlot, Result); + return Result; + } + + case Type::StructTyID: { + const StructType *ST = cast<StructType>(Ty); + + std::vector<Constant *> Elements; + Elements.reserve(ST->getNumElements()); + for (unsigned i = 0; i != ST->getNumElements(); ++i) + Elements.push_back(getConstantValue(ST->getElementType(i), + read_vbr_uint())); + + Constant* Result = ConstantStruct::get(ST, Elements); + if (Handler) Handler->handleConstantStruct(ST, Elements, Result); + return Result; + } + + case Type::PackedTyID: { + const PackedType *PT = cast<PackedType>(Ty); + unsigned NumElements = PT->getNumElements(); + unsigned TypeSlot = getTypeSlot(PT->getElementType()); + std::vector<Constant*> Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(getConstantValue(TypeSlot, + read_vbr_uint())); + Constant* Result = ConstantPacked::get(PT, Elements); + if (Handler) Handler->handleConstantPacked(PT, Elements, TypeSlot, Result); + return Result; + } + + case Type::PointerTyID: { // ConstantPointerRef value (backwards compat). + const PointerType *PT = cast<PointerType>(Ty); + unsigned Slot = read_vbr_uint(); + + // Check to see if we have already read this global variable... + Value *Val = getValue(TypeID, Slot, false); + if (Val) { + GlobalValue *GV = dyn_cast<GlobalValue>(Val); + if (!GV) error("GlobalValue not in ValueTable!"); + if (Handler) Handler->handleConstantPointer(PT, Slot, GV); + return GV; + } else { + error("Forward references are not allowed here."); + } + } + + default: + error("Don't know how to deserialize constant value of type '" + + Ty->getDescription()); + break; + } + return 0; +} + +/// Resolve references for constants. This function resolves the forward +/// referenced constants in the ConstantFwdRefs map. It uses the +/// replaceAllUsesWith method of Value class to substitute the placeholder +/// instance with the actual instance. +void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Typ, + unsigned Slot) { + ConstantRefsType::iterator I = + ConstantFwdRefs.find(std::make_pair(Typ, Slot)); + if (I == ConstantFwdRefs.end()) return; // Never forward referenced? + + Value *PH = I->second; // Get the placeholder... + PH->replaceAllUsesWith(NewV); + delete PH; // Delete the old placeholder + ConstantFwdRefs.erase(I); // Remove the map entry for it +} + +/// Parse the constant strings section. +void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){ + for (; NumEntries; --NumEntries) { + unsigned Typ = 0; + if (read_typeid(Typ)) + error("Invalid type (type type) for string constant"); + const Type *Ty = getType(Typ); + if (!isa<ArrayType>(Ty)) + error("String constant data invalid!"); + + const ArrayType *ATy = cast<ArrayType>(Ty); + if (ATy->getElementType() != Type::SByteTy && + ATy->getElementType() != Type::UByteTy) + error("String constant data invalid!"); + + // Read character data. The type tells us how long the string is. + char *Data = reinterpret_cast<char *>(alloca(ATy->getNumElements())); + read_data(Data, Data+ATy->getNumElements()); + + std::vector<Constant*> Elements(ATy->getNumElements()); + if (ATy->getElementType() == Type::SByteTy) + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); + else + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); + + // Create the constant, inserting it as needed. + Constant *C = ConstantArray::get(ATy, Elements); + unsigned Slot = insertValue(C, Typ, Tab); + ResolveReferencesToConstant(C, Typ, Slot); + if (Handler) Handler->handleConstantString(cast<ConstantArray>(C)); + } +} + +/// Parse the constant pool. +void BytecodeReader::ParseConstantPool(ValueTable &Tab, + TypeListTy &TypeTab, + bool isFunction) { + if (Handler) Handler->handleGlobalConstantsBegin(); + + /// In LLVM 1.3 Type does not derive from Value so the types + /// do not occupy a plane. Consequently, we read the types + /// first in the constant pool. + if (isFunction && !hasTypeDerivedFromValue) { + unsigned NumEntries = read_vbr_uint(); + ParseTypes(TypeTab, NumEntries); + } + + while (moreInBlock()) { + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = 0; + bool isTypeType = read_typeid(Typ); + + /// In LLVM 1.2 and before, Types were written to the + /// bytecode file in the "Type Type" plane (#12). + /// In 1.3 plane 12 is now the label plane. Handle this here. + if (isTypeType) { + ParseTypes(TypeTab, NumEntries); + } else if (Typ == Type::VoidTyID) { + /// Use of Type::VoidTyID is a misnomer. It actually means + /// that the following plane is constant strings + assert(&Tab == &ModuleValues && "Cannot read strings in functions!"); + ParseStringConstants(NumEntries, Tab); + } else { + for (unsigned i = 0; i < NumEntries; ++i) { + Constant *C = ParseConstantValue(Typ); + assert(C && "ParseConstantValue returned NULL!"); + unsigned Slot = insertValue(C, Typ, Tab); + + // If we are reading a function constant table, make sure that we adjust + // the slot number to be the real global constant number. + // + if (&Tab != &ModuleValues && Typ < ModuleValues.size() && + ModuleValues[Typ]) + Slot += ModuleValues[Typ]->size(); + ResolveReferencesToConstant(C, Typ, Slot); + } + } + } + + // After we have finished parsing the constant pool, we had better not have + // any dangling references left. + if (!ConstantFwdRefs.empty()) { + ConstantRefsType::const_iterator I = ConstantFwdRefs.begin(); + Constant* missingConst = I->second; + error(utostr(ConstantFwdRefs.size()) + + " unresolved constant reference exist. First one is '" + + missingConst->getName() + "' of type '" + + missingConst->getType()->getDescription() + "'."); + } + + checkPastBlockEnd("Constant Pool"); + if (Handler) Handler->handleGlobalConstantsEnd(); +} + +/// Parse the contents of a function. Note that this function can be +/// called lazily by materializeFunction +/// @see materializeFunction +void BytecodeReader::ParseFunctionBody(Function* F) { + + unsigned FuncSize = BlockEnd - At; + GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; + + unsigned LinkageType = read_vbr_uint(); + switch (LinkageType) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + error("Invalid linkage type for Function."); + Linkage = GlobalValue::InternalLinkage; + break; + } + + F->setLinkage(Linkage); + if (Handler) Handler->handleFunctionBegin(F,FuncSize); + + // Keep track of how many basic blocks we have read in... + unsigned BlockNum = 0; + bool InsertedArguments = false; + + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + unsigned Type, Size; + BufPtr OldAt = At; + read_block(Type, Size); + + switch (Type) { + case BytecodeFormat::ConstantPoolBlockID: + if (!InsertedArguments) { + // Insert arguments into the value table before we parse the first basic + // block in the function, but after we potentially read in the + // compaction table. + insertArguments(F); + InsertedArguments = true; + } + + ParseConstantPool(FunctionValues, FunctionTypes, true); + break; + + case BytecodeFormat::CompactionTableBlockID: + ParseCompactionTable(); + break; + + case BytecodeFormat::BasicBlock: { + if (!InsertedArguments) { + // Insert arguments into the value table before we parse the first basic + // block in the function, but after we potentially read in the + // compaction table. + insertArguments(F); + InsertedArguments = true; + } + + BasicBlock *BB = ParseBasicBlock(BlockNum++); + F->getBasicBlockList().push_back(BB); + break; + } + + case BytecodeFormat::InstructionListBlockID: { + // Insert arguments into the value table before we parse the instruction + // list for the function, but after we potentially read in the compaction + // table. + if (!InsertedArguments) { + insertArguments(F); + InsertedArguments = true; + } + + if (BlockNum) + error("Already parsed basic blocks!"); + BlockNum = ParseInstructionList(F); + break; + } + + case BytecodeFormat::SymbolTableBlockID: + ParseSymbolTable(F, &F->getSymbolTable()); + break; + + default: + At += Size; + if (OldAt > At) + error("Wrapped around reading bytecode."); + break; + } + BlockEnd = MyEnd; + + // Malformed bc file if read past end of block. + align32(); + } + + // Make sure there were no references to non-existant basic blocks. + if (BlockNum != ParsedBasicBlocks.size()) + error("Illegal basic block operand reference"); + + ParsedBasicBlocks.clear(); + + // Resolve forward references. Replace any uses of a forward reference value + // with the real value. + while (!ForwardReferences.empty()) { + std::map<std::pair<unsigned,unsigned>, Value*>::iterator + I = ForwardReferences.begin(); + Value *V = getValue(I->first.first, I->first.second, false); + Value *PlaceHolder = I->second; + PlaceHolder->replaceAllUsesWith(V); + ForwardReferences.erase(I); + delete PlaceHolder; + } + + // Clear out function-level types... + FunctionTypes.clear(); + CompactionTypes.clear(); + CompactionValues.clear(); + freeTable(FunctionValues); + + if (Handler) Handler->handleFunctionEnd(F); +} + +/// This function parses LLVM functions lazily. It obtains the type of the +/// function and records where the body of the function is in the bytecode +/// buffer. The caller can then use the ParseNextFunction and +/// ParseAllFunctionBodies to get handler events for the functions. +void BytecodeReader::ParseFunctionLazily() { + if (FunctionSignatureList.empty()) + error("FunctionSignatureList empty!"); + + Function *Func = FunctionSignatureList.back(); + FunctionSignatureList.pop_back(); + + // Save the information for future reading of the function + LazyFunctionLoadMap[Func] = LazyFunctionInfo(BlockStart, BlockEnd); + + // This function has a body but it's not loaded so it appears `External'. + // Mark it as a `Ghost' instead to notify the users that it has a body. + Func->setLinkage(GlobalValue::GhostLinkage); + + // Pretend we've `parsed' this function + At = BlockEnd; +} + +/// The ParserFunction method lazily parses one function. Use this method to +/// casue the parser to parse a specific function in the module. Note that +/// this will remove the function from what is to be included by +/// ParseAllFunctionBodies. +/// @see ParseAllFunctionBodies +/// @see ParseBytecode +void BytecodeReader::ParseFunction(Function* Func) { + // Find {start, end} pointers and slot in the map. If not there, we're done. + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(Func); + + // Make sure we found it + if (Fi == LazyFunctionLoadMap.end()) { + error("Unrecognized function of type " + Func->getType()->getDescription()); + return; + } + + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + assert(Fi->first == Func && "Found wrong function?"); + + LazyFunctionLoadMap.erase(Fi); + + this->ParseFunctionBody(Func); +} + +/// The ParseAllFunctionBodies method parses through all the previously +/// unparsed functions in the bytecode file. If you want to completely parse +/// a bytecode file, this method should be called after Parsebytecode because +/// Parsebytecode only records the locations in the bytecode file of where +/// the function definitions are located. This function uses that information +/// to materialize the functions. +/// @see ParseBytecode +void BytecodeReader::ParseAllFunctionBodies() { + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); + LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); + + while (Fi != Fe) { + Function* Func = Fi->first; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + ParseFunctionBody(Func); + ++Fi; + } + LazyFunctionLoadMap.clear(); +} + +/// Parse the global type list +void BytecodeReader::ParseGlobalTypes() { + // Read the number of types + unsigned NumEntries = read_vbr_uint(); + + // Ignore the type plane identifier for types if the bc file is pre 1.3 + if (hasTypeDerivedFromValue) + read_vbr_uint(); + + ParseTypes(ModuleTypes, NumEntries); +} + +/// Parse the Global info (types, global vars, constants) +void BytecodeReader::ParseModuleGlobalInfo() { + + if (Handler) Handler->handleModuleGlobalsBegin(); + + // Read global variables... + unsigned VarType = read_vbr_uint(); + while (VarType != Type::VoidTyID) { // List is terminated by Void + // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = + // Linkage, bit4+ = slot# + unsigned SlotNo = VarType >> 5; + if (sanitizeTypeId(SlotNo)) + error("Invalid type (type type) for global var!"); + unsigned LinkageID = (VarType >> 2) & 7; + bool isConstant = VarType & 1; + bool hasInitializer = VarType & 2; + GlobalValue::LinkageTypes Linkage; + + switch (LinkageID) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + error("Unknown linkage type: " + utostr(LinkageID)); + Linkage = GlobalValue::InternalLinkage; + break; + } + + const Type *Ty = getType(SlotNo); + if (!Ty) { + error("Global has no type! SlotNo=" + utostr(SlotNo)); + } + + if (!isa<PointerType>(Ty)) { + error("Global not a pointer type! Ty= " + Ty->getDescription()); + } + + const Type *ElTy = cast<PointerType>(Ty)->getElementType(); + + // Create the global variable... + GlobalVariable *GV = new GlobalVariable(ElTy, isConstant, Linkage, + 0, "", TheModule); + insertValue(GV, SlotNo, ModuleValues); + + unsigned initSlot = 0; + if (hasInitializer) { + initSlot = read_vbr_uint(); + GlobalInits.push_back(std::make_pair(GV, initSlot)); + } + + // Notify handler about the global value. + if (Handler) + Handler->handleGlobalVariable(ElTy, isConstant, Linkage, SlotNo,initSlot); + + // Get next item + VarType = read_vbr_uint(); + } + + // Read the function objects for all of the functions that are coming + unsigned FnSignature = read_vbr_uint(); + + if (hasNoFlagsForFunctions) + FnSignature = (FnSignature << 5) + 1; + + // List is terminated by VoidTy. + while ((FnSignature >> 5) != Type::VoidTyID) { + const Type *Ty = getType(FnSignature >> 5); + if (!isa<PointerType>(Ty) || + !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) { + error("Function not a pointer to function type! Ty = " + + Ty->getDescription()); + } + + // We create functions by passing the underlying FunctionType to create... + const FunctionType* FTy = + cast<FunctionType>(cast<PointerType>(Ty)->getElementType()); + + + // Insert the place holder. + Function* Func = new Function(FTy, GlobalValue::ExternalLinkage, + "", TheModule); + insertValue(Func, FnSignature >> 5, ModuleValues); + + // Flags are not used yet. + unsigned Flags = FnSignature & 31; + + // Save this for later so we know type of lazily instantiated functions. + // Note that known-external functions do not have FunctionInfo blocks, so we + // do not add them to the FunctionSignatureList. + if ((Flags & (1 << 4)) == 0) + FunctionSignatureList.push_back(Func); + + // Look at the low bits. If there is a calling conv here, apply it, + // read it as a vbr. + Flags &= 15; + if (Flags) + Func->setCallingConv(Flags-1); + else + Func->setCallingConv(read_vbr_uint()); + + if (Handler) Handler->handleFunctionDeclaration(Func); + + // Get the next function signature. + FnSignature = read_vbr_uint(); + if (hasNoFlagsForFunctions) + FnSignature = (FnSignature << 5) + 1; + } + + // Now that the function signature list is set up, reverse it so that we can + // remove elements efficiently from the back of the vector. + std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end()); + + // If this bytecode format has dependent library information in it .. + if (!hasNoDependentLibraries) { + // Read in the number of dependent library items that follow + unsigned num_dep_libs = read_vbr_uint(); + std::string dep_lib; + while( num_dep_libs-- ) { + dep_lib = read_str(); + TheModule->addLibrary(dep_lib); + if (Handler) + Handler->handleDependentLibrary(dep_lib); + } + + + // Read target triple and place into the module + std::string triple = read_str(); + TheModule->setTargetTriple(triple); + if (Handler) + Handler->handleTargetTriple(triple); + } + + if (hasInconsistentModuleGlobalInfo) + align32(); + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + At = BlockEnd; + + if (Handler) Handler->handleModuleGlobalsEnd(); +} + +/// Parse the version information and decode it by setting flags on the +/// Reader that enable backward compatibility of the reader. +void BytecodeReader::ParseVersionInfo() { + unsigned Version = read_vbr_uint(); + + // Unpack version number: low four bits are for flags, top bits = version + Module::Endianness Endianness; + Module::PointerSize PointerSize; + Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian; + PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32; + + bool hasNoEndianness = Version & 4; + bool hasNoPointerSize = Version & 8; + + RevisionNum = Version >> 4; + + // Default values for the current bytecode version + hasInconsistentModuleGlobalInfo = false; + hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; + hasTypeDerivedFromValue = false; + hasLongBlockHeaders = false; + has32BitTypes = false; + hasNoDependentLibraries = false; + hasAlignment = false; + hasNoUndefValue = false; + hasNoFlagsForFunctions = false; + hasNoUnreachableInst = false; + + switch (RevisionNum) { + case 0: // LLVM 1.0, 1.1 (Released) + // Base LLVM 1.0 bytecode format. + hasInconsistentModuleGlobalInfo = true; + hasExplicitPrimitiveZeros = true; + + // FALL THROUGH + + case 1: // LLVM 1.2 (Released) + // LLVM 1.2 added explicit support for emitting strings efficiently. + + // Also, it fixed the problem where the size of the ModuleGlobalInfo block + // included the size for the alignment at the end, where the rest of the + // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // LLVM 1.2 and before had the Type class derive from Value class. This + // changed in release 1.3 and consequently LLVM 1.3 bytecode files are + // written differently because Types can no longer be part of the + // type planes for Values. + hasTypeDerivedFromValue = true; + + // FALL THROUGH + + case 2: // 1.2.5 (Not Released) + + // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful, + // especially for small files where the 8 bytes per block is a large + // fraction of the total block size. In LLVM 1.3, the block type and length + // are compressed into a single 32-bit unsigned integer. 27 bits for length, + // 5 bits for block type. + hasLongBlockHeaders = true; + + // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3 + // this has been reduced to vbr_uint24. It shouldn't make much difference + // since we haven't run into a module with > 24 million types, but for + // safety the 24-bit restriction has been enforced in 1.3 to free some bits + // in various places and to ensure consistency. + has32BitTypes = true; + + // LLVM 1.2 and earlier did not provide a target triple nor a list of + // libraries on which the bytecode is dependent. LLVM 1.3 provides these + // features, for use in future versions of LLVM. + hasNoDependentLibraries = true; + + // FALL THROUGH + + case 3: // LLVM 1.3 (Released) + // LLVM 1.3 and earlier caused alignment bytes to be written on some block + // boundaries and at the end of some strings. In extreme cases (e.g. lots + // of GEP references to a constant array), this can increase the file size + // by 30% or more. In version 1.4 alignment is done away with completely. + hasAlignment = true; + + // FALL THROUGH + + case 4: // 1.3.1 (Not Released) + // In version 4, we did not support the 'undef' constant. + hasNoUndefValue = true; + + // In version 4 and above, we did not include space for flags for functions + // in the module info block. + hasNoFlagsForFunctions = true; + + // In version 4 and above, we did not include the 'unreachable' instruction + // in the opcode numbering in the bytecode file. + hasNoUnreachableInst = true; + break; + + // FALL THROUGH + + case 5: // 1.4 (Released) + break; + + default: + error("Unknown bytecode version number: " + itostr(RevisionNum)); + } + + if (hasNoEndianness) Endianness = Module::AnyEndianness; + if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; + + TheModule->setEndianness(Endianness); + TheModule->setPointerSize(PointerSize); + + if (Handler) Handler->handleVersionInfo(RevisionNum, Endianness, PointerSize); +} + +/// Parse a whole module. +void BytecodeReader::ParseModule() { + unsigned Type, Size; + + FunctionSignatureList.clear(); // Just in case... + + // Read into instance variables... + ParseVersionInfo(); + align32(); + + bool SeenModuleGlobalInfo = false; + bool SeenGlobalTypePlane = false; + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + BufPtr OldAt = At; + read_block(Type, Size); + + switch (Type) { + + case BytecodeFormat::GlobalTypePlaneBlockID: + if (SeenGlobalTypePlane) + error("Two GlobalTypePlane Blocks Encountered!"); + + if (Size > 0) + ParseGlobalTypes(); + SeenGlobalTypePlane = true; + break; + + case BytecodeFormat::ModuleGlobalInfoBlockID: + if (SeenModuleGlobalInfo) + error("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(); + SeenModuleGlobalInfo = true; + break; + + case BytecodeFormat::ConstantPoolBlockID: + ParseConstantPool(ModuleValues, ModuleTypes,false); + break; + + case BytecodeFormat::FunctionBlockID: + ParseFunctionLazily(); + break; + + case BytecodeFormat::SymbolTableBlockID: + ParseSymbolTable(0, &TheModule->getSymbolTable()); + break; + + default: + At += Size; + if (OldAt > At) { + error("Unexpected Block of Type #" + utostr(Type) + " encountered!"); + } + break; + } + BlockEnd = MyEnd; + align32(); + } + + // After the module constant pool has been read, we can safely initialize + // global variables... + while (!GlobalInits.empty()) { + GlobalVariable *GV = GlobalInits.back().first; + unsigned Slot = GlobalInits.back().second; + GlobalInits.pop_back(); + + // Look up the initializer value... + // FIXME: Preserve this type ID! + + const llvm::PointerType* GVType = GV->getType(); + unsigned TypeSlot = getTypeSlot(GVType->getElementType()); + if (Constant *CV = getConstantValue(TypeSlot, Slot)) { + if (GV->hasInitializer()) + error("Global *already* has an initializer?!"); + if (Handler) Handler->handleGlobalInitializer(GV,CV); + GV->setInitializer(CV); + } else + error("Cannot find initializer value."); + } + + if (!ConstantFwdRefs.empty()) + error("Use of undefined constants in a module"); + + /// Make sure we pulled them all out. If we didn't then there's a declaration + /// but a missing body. That's not allowed. + if (!FunctionSignatureList.empty()) + error("Function declared, but bytecode stream ended before definition"); +} + +/// This function completely parses a bytecode buffer given by the \p Buf +/// and \p Length parameters. +void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, + const std::string &ModuleID) { + + try { + RevisionNum = 0; + At = MemStart = BlockStart = Buf; + MemEnd = BlockEnd = Buf + Length; + + // Create the module + TheModule = new Module(ModuleID); + + if (Handler) Handler->handleStart(TheModule, Length); + + // Read the four bytes of the signature. + unsigned Sig = read_uint(); + + // If this is a compressed file + if (Sig == ('l' | ('l' << 8) | ('v' << 16) | ('c' << 24))) { + + // Invoke the decompression of the bytecode. Note that we have to skip the + // file's magic number which is not part of the compressed block. Hence, + // the Buf+4 and Length-4. The result goes into decompressedBlock, a data + // member for retention until BytecodeReader is destructed. + unsigned decompressedLength = Compressor::decompressToNewBuffer( + (char*)Buf+4,Length-4,decompressedBlock); + + // We must adjust the buffer pointers used by the bytecode reader to point + // into the new decompressed block. After decompression, the + // decompressedBlock will point to a contiguous memory area that has + // the decompressed data. + At = MemStart = BlockStart = Buf = (BufPtr) decompressedBlock; + MemEnd = BlockEnd = Buf + decompressedLength; + + // else if this isn't a regular (uncompressed) bytecode file, then its + // and error, generate that now. + } else if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + error("Invalid bytecode signature: " + utohexstr(Sig)); + } + + // Tell the handler we're starting a module + if (Handler) Handler->handleModuleBegin(ModuleID); + + // Get the module block and size and verify. This is handled specially + // because the module block/size is always written in long format. Other + // blocks are written in short format so the read_block method is used. + unsigned Type, Size; + Type = read_uint(); + Size = read_uint(); + if (Type != BytecodeFormat::ModuleBlockID) { + error("Expected Module Block! Type:" + utostr(Type) + ", Size:" + + utostr(Size)); + } + + // It looks like the darwin ranlib program is broken, and adds trailing + // garbage to the end of some bytecode files. This hack allows the bc + // reader to ignore trailing garbage on bytecode files. + if (At + Size < MemEnd) + MemEnd = BlockEnd = At+Size; + + if (At + Size != MemEnd) + error("Invalid Top Level Block Length! Type:" + utostr(Type) + + ", Size:" + utostr(Size)); + + // Parse the module contents + this->ParseModule(); + + // Check for missing functions + if (hasFunctions()) + error("Function expected, but bytecode stream ended!"); + + // Tell the handler we're done with the module + if (Handler) + Handler->handleModuleEnd(ModuleID); + + // Tell the handler we're finished the parse + if (Handler) Handler->handleFinish(); + + } catch (std::string& errstr) { + if (Handler) Handler->handleError(errstr); + freeState(); + delete TheModule; + TheModule = 0; + if (decompressedBlock != 0 ) { + ::free(decompressedBlock); + decompressedBlock = 0; + } + throw; + } catch (...) { + std::string msg("Unknown Exception Occurred"); + if (Handler) Handler->handleError(msg); + freeState(); + delete TheModule; + TheModule = 0; + if (decompressedBlock != 0) { + ::free(decompressedBlock); + decompressedBlock = 0; + } + throw msg; + } +} + +//===----------------------------------------------------------------------===// +//=== Default Implementations of Handler Methods +//===----------------------------------------------------------------------===// + +BytecodeHandler::~BytecodeHandler() {} + |