diff options
Diffstat (limited to 'lib/Bytecode')
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 712 |
1 files changed, 434 insertions, 278 deletions
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 353f29c4b2..8d0df020ec 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -20,10 +20,8 @@ #include "llvm/Bytecode/BytecodeHandler.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" -#include "llvm/iMemory.h" -#include "llvm/iOther.h" -#include "llvm/iPHINode.h" -#include "llvm/iTerminators.h" +#include "llvm/Instructions.h" +#include "llvm/SymbolTable.h" #include "llvm/Bytecode/Format.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "Support/StringExtras.h" @@ -31,11 +29,6 @@ using namespace llvm; -/// A convenience macro for calling the handler. Makes maintenance easier in -/// case the interface to handler methods changes. -#define HANDLE(method) \ - if ( Handler ) Handler->handle ## method - /// A convenience macro for handling parsing errors. #define PARSE_ERROR(inserters) { \ std::ostringstream errormsg; \ @@ -66,31 +59,36 @@ typedef PlaceholderDef<ConstantPlaceHolderHelper> ConstPHolder; // Bytecode Reading Methods //===----------------------------------------------------------------------===// +/// Determine if the current block being read contains any more data. inline bool BytecodeReader::moreInBlock() { return At < BlockEnd; } +/// Throw an error if we've read past the end of the current block inline void BytecodeReader::checkPastBlockEnd(const char * block_name) { if ( At > BlockEnd ) PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); } +/// Align the buffer position to a 32 bit boundary inline void BytecodeReader::align32() { BufPtr Save = At; At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); if ( At > Save ) - HANDLE(Alignment( At - Save )); + if (Handler) Handler->handleAlignment( At - Save ); if (At > BlockEnd) - PARSE_ERROR("Ran out of data while aligning!"); + throw std::string("Ran out of data while aligning!"); } +/// Read a whole unsigned integer inline unsigned BytecodeReader::read_uint() { if (At+4 > BlockEnd) - PARSE_ERROR("Ran out of data reading uint!"); + throw std::string("Ran out of data reading uint!"); At += 4; return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); } +/// Read a variable-bit-rate encoded unsigned integer inline unsigned BytecodeReader::read_vbr_uint() { unsigned Shift = 0; unsigned Result = 0; @@ -98,14 +96,15 @@ inline unsigned BytecodeReader::read_vbr_uint() { do { if (At == BlockEnd) - PARSE_ERROR("Ran out of data reading vbr_uint!"); + throw std::string("Ran out of data reading vbr_uint!"); Result |= (unsigned)((*At++) & 0x7F) << Shift; Shift += 7; } while (At[-1] & 0x80); - HANDLE(VBR32(At-Save)); + if (Handler) Handler->handleVBR32(At-Save); return Result; } +/// Read a variable-bit-rate encoded unsigned 64-bit integer. inline uint64_t BytecodeReader::read_vbr_uint64() { unsigned Shift = 0; uint64_t Result = 0; @@ -113,14 +112,15 @@ inline uint64_t BytecodeReader::read_vbr_uint64() { do { if (At == BlockEnd) - PARSE_ERROR("Ran out of data reading vbr_uint64!"); + throw std::string("Ran out of data reading vbr_uint64!"); Result |= (uint64_t)((*At++) & 0x7F) << Shift; Shift += 7; } while (At[-1] & 0x80); - HANDLE(VBR64(At-Save)); + if (Handler) Handler->handleVBR64(At-Save); return Result; } +/// Read a variable-bit-rate encoded signed 64-bit integer. inline int64_t BytecodeReader::read_vbr_int64() { uint64_t R = read_vbr_uint64(); if (R & 1) { @@ -133,45 +133,88 @@ inline int64_t BytecodeReader::read_vbr_int64() { return (int64_t)(R >> 1); } +/// Read a pascal-style string (length followed by text) inline std::string BytecodeReader::read_str() { unsigned Size = read_vbr_uint(); const unsigned char *OldAt = At; At += Size; if (At > BlockEnd) // Size invalid? - PARSE_ERROR("Ran out of data reading a string!"); + throw std::string("Ran out of data reading a string!"); return std::string((char*)OldAt, Size); } +/// Read an arbitrary block of data inline void BytecodeReader::read_data(void *Ptr, void *End) { unsigned char *Start = (unsigned char *)Ptr; unsigned Amount = (unsigned char *)End - Start; if (At+Amount > BlockEnd) - PARSE_ERROR("Ran out of data!"); + throw std::string("Ran out of data!"); std::copy(At, At+Amount, Start); At += Amount; } +/// Read a block header and obtain its type and size inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { Type = read_uint(); Size = read_uint(); BlockStart = At; if ( At + Size > BlockEnd ) - PARSE_ERROR("Attempt to size a block past end of memory"); + throw std::string("Attempt to size a block past end of memory"); BlockEnd = At + Size; - HANDLE(Block( Type, BlockStart, Size )); + if (Handler) Handler->handleBlock( Type, BlockStart, Size ); +} + + +/// In LLVM 1.2 and before, Types were derived from Value and so they were +/// written as part of the type planes along with any other Value. In LLVM +/// 1.3 this changed so that Type does not derive from Value. Consequently, +/// the BytecodeReader's containers for Values can't contain Types because +/// there's no inheritance relationship. This means that the "Type Type" +/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3 +/// whenever a bytecode construct must have both types and values together, +/// the types are always read/written first and then the Values. Furthermore +/// since Type::TypeTyID no longer exists, its value (12) now corresponds to +/// Type::LabelTyID. In order to overcome this we must "sanitize" all the +/// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change. +/// For LLVM 1.2 and before, this function will decrement the type id by +/// one to account for the missing Type::TypeTyID enumerator if the value is +/// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this +/// function returns true, otherwise false. This helps detect situations +/// where the pre 1.3 bytecode is indicating that what follows is a type. +/// @returns true iff type id corresponds to pre 1.3 "type type" +inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId ) { + if ( hasTypeDerivedFromValue ) { /// do nothing if 1.3 or later + if ( TypeId == Type::LabelTyID ) { + TypeId = Type::VoidTyID; // sanitize it + return true; // indicate we got TypeTyID in pre 1.3 bytecode + } else if ( TypeId > Type::LabelTyID ) + --TypeId; // shift all planes down because type type plane is missing + } + return false; +} + +/// Reads a vbr uint to read in a type id and does the necessary +/// conversion on it by calling sanitizeTypeId. +/// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type" +/// @see sanitizeTypeId +inline bool BytecodeReader::read_typeid(unsigned &TypeId) { + TypeId = read_vbr_uint(); + return sanitizeTypeId(TypeId); } //===----------------------------------------------------------------------===// // IR Lookup Methods //===----------------------------------------------------------------------===// +/// Determine if a type id has an implicit null value inline bool BytecodeReader::hasImplicitNull(unsigned TyID ) { if (!hasExplicitPrimitiveZeros) - return TyID != Type::LabelTyID && TyID != Type::TypeTyID && - TyID != Type::VoidTyID; + return TyID != Type::LabelTyID && TyID != Type::VoidTyID; return TyID >= Type::FirstDerivedTyID; } +/// Obtain a type given a typeid and account for things like compaction tables, +/// function level vs module level, and the offsetting for the primitive types. const Type *BytecodeReader::getType(unsigned ID) { if (ID < Type::FirstDerivedTyID) if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID)) @@ -182,7 +225,7 @@ const Type *BytecodeReader::getType(unsigned ID) { if (!CompactionTypes.empty()) { if (ID >= CompactionTypes.size()) - PARSE_ERROR("Type ID out of range for compaction table!"); + throw std::string("Type ID out of range for compaction table!"); return CompactionTypes[ID]; } @@ -195,10 +238,31 @@ const Type *BytecodeReader::getType(unsigned ID) { if (ID < FunctionTypes.size()) return FunctionTypes[ID].get(); - PARSE_ERROR("Illegal type reference!"); + throw std::string("Illegal type reference!"); return Type::VoidTy; } +/// Get a sanitized type id. This just makes sure that the \p ID +/// is both sanitized and not the "type type" of pre-1.3 bytecode. +/// @see sanitizeTypeId +inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) { + bool isTypeType = sanitizeTypeId(ID); + assert(!isTypeType && "Invalid type id occurred"); + return getType(ID); +} + +/// This method just saves some coding. It uses read_typeid to read +/// in a sanitized type id, asserts that its not the type type, and +/// then calls getType to return the type value. +inline const Type* BytecodeReader::readSanitizedType() { + unsigned ID; + bool isTypeType = read_typeid(ID); + assert(!isTypeType && "Invalid type id occurred"); + return getType(ID); +} + +/// Get the slot number associated with a type accounting for primitive +/// types, compaction tables, and function level vs module level. unsigned BytecodeReader::getTypeSlot(const Type *Ty) { if (Ty->isPrimitiveType()) return Ty->getTypeID(); @@ -206,10 +270,10 @@ unsigned BytecodeReader::getTypeSlot(const Type *Ty) { // Scan the compaction table for the type if needed. if (!CompactionTypes.empty()) { std::vector<const Type*>::const_iterator I = - find(CompactionTypes.begin(), CompactionTypes.end(), Ty); + find(CompactionTypes.begin(), CompactionTypes.end(), Ty); if (I == CompactionTypes.end()) - PARSE_ERROR("Couldn't find type specified in compaction table!"); + throw std::string("Couldn't find type specified in compaction table!"); return Type::FirstDerivedTyID + (&*I - &CompactionTypes[0]); } @@ -218,15 +282,18 @@ unsigned BytecodeReader::getTypeSlot(const Type *Ty) { if (I != FunctionTypes.end()) return Type::FirstDerivedTyID + ModuleTypes.size() + - (&*I - &FunctionTypes[0]); + (&*I - &FunctionTypes[0]); // Check the module level types now... I = find(ModuleTypes.begin(), ModuleTypes.end(), Ty); if (I == ModuleTypes.end()) - PARSE_ERROR("Didn't find type in ModuleTypes."); + throw std::string("Didn't find type in ModuleTypes."); return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } +/// This is just like getType, but when a compaction table is in use, it is +/// ignored. It also ignores function level types. +/// @see getType const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { if (Slot < Type::FirstDerivedTyID) { const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot); @@ -235,22 +302,25 @@ const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { } Slot -= Type::FirstDerivedTyID; if (Slot >= ModuleTypes.size()) - PARSE_ERROR("Illegal compaction table type reference!"); + throw std::string("Illegal compaction table type reference!"); return ModuleTypes[Slot]; } +/// This is just like getTypeSlot, but when a compaction table is in use, it +/// is ignored. It also ignores function level types. unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) { if (Ty->isPrimitiveType()) return Ty->getTypeID(); TypeListTy::iterator I = find(ModuleTypes.begin(), - ModuleTypes.end(), Ty); + ModuleTypes.end(), Ty); if (I == ModuleTypes.end()) - PARSE_ERROR("Didn't find type in ModuleTypes."); + throw std::string("Didn't find type in ModuleTypes."); return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } +/// Retrieve a value of a given type and slot number, possibly creating +/// it if it doesn't already exist. Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { - assert(type != Type::TypeTyID && "getValue() cannot get types!"); assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); unsigned Num = oNum; @@ -266,25 +336,23 @@ Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { // If the type plane was compactified, figure out the global type ID // by adding the derived type ids and the distance. - if (CompactionValues.size() > Type::TypeTyID && - !CompactionTypes.empty() && - type >= Type::FirstDerivedTyID) { + if (!CompactionTypes.empty() && type >= Type::FirstDerivedTyID) { const Type *Ty = CompactionTypes[type-Type::FirstDerivedTyID]; TypeListTy::iterator I = - find(ModuleTypes.begin(), ModuleTypes.end(), Ty); + find(ModuleTypes.begin(), ModuleTypes.end(), Ty); assert(I != ModuleTypes.end()); GlobalTyID = Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } if (hasImplicitNull(GlobalTyID)) { if (Num == 0) - return Constant::getNullValue(getType(type)); + return Constant::getNullValue(getType(type)); --Num; } if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) { if (Num < ModuleValues[GlobalTyID]->size()) - return ModuleValues[GlobalTyID]->getOperand(Num); + return ModuleValues[GlobalTyID]->getOperand(Num); Num -= ModuleValues[GlobalTyID]->size(); } } @@ -306,6 +374,9 @@ Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { return Val; } +/// This is just like getValue, but when a compaction table is in use, it +/// is ignored. Also, no forward references or other fancy features are +/// supported. Value* BytecodeReader::getGlobalTableValue(const Type *Ty, unsigned SlotNo) { // FIXME: getTypeSlot is inefficient! unsigned TyID = getGlobalTableTypeSlot(Ty); @@ -319,13 +390,18 @@ Value* BytecodeReader::getGlobalTableValue(const Type *Ty, unsigned SlotNo) { if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 || SlotNo >= ModuleValues[TyID]->size()) { PARSE_ERROR("Corrupt compaction table entry!" - << TyID << ", " << SlotNo << ": " << ModuleValues.size() << ", " - << (void*)ModuleValues[TyID] << ", " - << ModuleValues[TyID]->size() << "\n"); + << TyID << ", " << SlotNo << ": " << ModuleValues.size() << ", " + << (void*)ModuleValues[TyID] << ", " + << ModuleValues[TyID]->size() << "\n"); } return ModuleValues[TyID]->getOperand(SlotNo); } +/// Just like getValue, except that it returns a null pointer +/// only on error. It always returns a constant (meaning that if the value is +/// defined, but is not a constant, that is an error). If the specified +/// constant hasn't been parsed yet, a placeholder is defined and used. +/// Later, after the real value is parsed, the placeholder is eliminated. Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { if (Value *V = getValue(TypeSlot, Slot, false)) if (Constant *C = dyn_cast<Constant>(V)) @@ -335,7 +411,7 @@ Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { // to infest bytecode files. return ConstantPointerRef::get(GV); else - PARSE_ERROR("Reference of a value is expected to be a constant!"); + throw std::string("Reference of a value is expected to be a constant!"); const Type *Ty = getType(TypeSlot); std::pair<const Type*, unsigned> Key(Ty, Slot); @@ -358,12 +434,14 @@ Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { // IR Construction Methods //===----------------------------------------------------------------------===// +/// As values are created, they are inserted into the appropriate place +/// with this method. The ValueTable argument must be one of ModuleValues +/// or FunctionValues data members of this class. unsigned BytecodeReader::insertValue( Value *Val, unsigned type, ValueTable &ValueTab) { assert((!isa<Constant>(Val) || !cast<Constant>(Val)->isNullValue()) || - !hasImplicitNull(type) && - "Cannot read null values from bytecode!"); - assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); + !hasImplicitNull(type) && + "Cannot read null values from bytecode!"); if (ValueTab.size() <= type) ValueTab.resize(type+1); @@ -376,6 +454,7 @@ unsigned BytecodeReader::insertValue( return ValueTab[type]->size()-1 + HasOffset; } +/// Insert the arguments of a function as new values in the reader. void BytecodeReader::insertArguments(Function* F ) { const FunctionType *FT = F->getFunctionType(); Function::aiterator AI = F->abegin(); @@ -388,6 +467,9 @@ void BytecodeReader::insertArguments(Function* F ) { // Bytecode Parsing Methods //===----------------------------------------------------------------------===// +/// This method parses a single instruction. The instruction is +/// inserted at the end of the \p BB provided. The arguments of +/// the instruction are provided in the \p Args vector. void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, BasicBlock* BB) { BufPtr SaveAt = At; @@ -452,7 +534,7 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, Oprnds.resize(NumOprnds); if (NumOprnds == 0) - PARSE_ERROR("Zero-argument instruction found; this is invalid."); + throw std::string("Zero-argument instruction found; this is invalid."); for (unsigned i = 0; i != NumOprnds; ++i) Oprnds[i] = read_vbr_uint(); @@ -460,11 +542,10 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; } - // Get the type of the instruction - const Type *InstTy = getType(iType); + const Type *InstTy = getSanitizedType(iType); // Hae enough to inform the handler now - HANDLE(Instruction(Opcode, InstTy, Oprnds, At-SaveAt)); + if (Handler) Handler->handleInstruction(Opcode, InstTy, Oprnds, At-SaveAt); // Declare the resulting instruction we'll build. Instruction *Result = 0; @@ -478,16 +559,20 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, switch (Opcode) { default: - if (Result == 0) PARSE_ERROR("Illegal instruction read!"); + if (Result == 0) + throw std::string("Illegal instruction read!"); break; case Instruction::VAArg: - Result = new VAArgInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + Result = new VAArgInst(getValue(iType, Oprnds[0]), + getSanitizedType(Oprnds[1])); break; case Instruction::VANext: - Result = new VANextInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + Result = new VANextInst(getValue(iType, Oprnds[0]), + getSanitizedType(Oprnds[1])); break; case Instruction::Cast: - Result = new CastInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + Result = new CastInst(getValue(iType, Oprnds[0]), + getSanitizedType(Oprnds[1])); break; case Instruction::Select: Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]), @@ -496,7 +581,7 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; case Instruction::PHI: { if (Oprnds.size() == 0 || (Oprnds.size() & 1)) - PARSE_ERROR("Invalid phi node encountered!\n"); + throw std::string("Invalid phi node encountered!"); PHINode *PN = new PHINode(InstTy); PN->op_reserve(Oprnds.size()); @@ -518,7 +603,7 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, else if (Oprnds.size() == 1) Result = new ReturnInst(getValue(iType, Oprnds[0])); else - PARSE_ERROR("Unrecognized instruction!"); + throw std::string("Unrecognized instruction!"); break; case Instruction::Br: @@ -526,13 +611,13 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, Result = new BranchInst(getBasicBlock(Oprnds[0])); else if (Oprnds.size() == 3) Result = new BranchInst(getBasicBlock(Oprnds[0]), - getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); + getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); else - PARSE_ERROR("Invalid number of operands for a 'br' instruction!"); + throw std::string("Invalid number of operands for a 'br' instruction!"); break; case Instruction::Switch: { if (Oprnds.size() & 1) - PARSE_ERROR("Switch statement with odd number of arguments!"); + throw std::string("Switch statement with odd number of arguments!"); SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]), getBasicBlock(Oprnds[1])); @@ -545,15 +630,15 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, case Instruction::Call: { if (Oprnds.size() == 0) - PARSE_ERROR("Invalid call instruction encountered!"); + throw std::string("Invalid call instruction encountered!"); Value *F = getValue(iType, Oprnds[0]); // Check to make sure we have a pointer to function type const PointerType *PTy = dyn_cast<PointerType>(F->getType()); - if (PTy == 0) PARSE_ERROR("Call to non function pointer value!"); + if (PTy == 0) throw std::string("Call to non function pointer value!"); const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); - if (FTy == 0) PARSE_ERROR("Call to non function pointer value!"); + if (FTy == 0) throw std::string("Call to non function pointer value!"); std::vector<Value *> Params; if (!FTy->isVarArg()) { @@ -561,17 +646,17 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { if (It == FTy->param_end()) - PARSE_ERROR("Invalid call instruction!"); + throw std::string("Invalid call instruction!"); Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); } if (It != FTy->param_end()) - PARSE_ERROR("Invalid call instruction!"); + throw std::string("Invalid call instruction!"); } else { Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); unsigned FirstVariableOperand; if (Oprnds.size() < FTy->getNumParams()) - PARSE_ERROR("Call instruction missing operands!"); + throw std::string("Call instruction missing operands!"); // Read all of the fixed arguments for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) @@ -580,10 +665,10 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, FirstVariableOperand = FTy->getNumParams(); if ((Oprnds.size()-FirstVariableOperand) & 1) // Must be pairs of type/value - PARSE_ERROR("Invalid call instruction!"); + throw std::string("Invalid call instruction!"); for (unsigned i = FirstVariableOperand, e = Oprnds.size(); - i != e; i += 2) + i != e; i += 2) Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); } @@ -591,14 +676,17 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; } case Instruction::Invoke: { - if (Oprnds.size() < 3) PARSE_ERROR("Invalid invoke instruction!"); + if (Oprnds.size() < 3) + throw std::string("Invalid invoke instruction!"); Value *F = getValue(iType, Oprnds[0]); // Check to make sure we have a pointer to function type const PointerType *PTy = dyn_cast<PointerType>(F->getType()); - if (PTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + if (PTy == 0) + throw std::string("Invoke to non function pointer value!"); const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); - if (FTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + if (FTy == 0) + throw std::string("Invoke to non function pointer value!"); std::vector<Value *> Params; BasicBlock *Normal, *Except; @@ -610,11 +698,11 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, FunctionType::param_iterator It = FTy->param_begin(); for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) { if (It == FTy->param_end()) - PARSE_ERROR("Invalid invoke instruction!"); + throw std::string("Invalid invoke instruction!"); Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); } if (It != FTy->param_end()) - PARSE_ERROR("Invalid invoke instruction!"); + throw std::string("Invalid invoke instruction!"); } else { Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); @@ -627,7 +715,7 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, Oprnds[i])); if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs - PARSE_ERROR("Invalid invoke instruction!"); + throw std::string("Invalid invoke instruction!"); for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2) Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); @@ -637,9 +725,10 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; } case Instruction::Malloc: - if (Oprnds.size() > 2) PARSE_ERROR("Invalid malloc instruction!"); + if (Oprnds.size() > 2) + throw std::string("Invalid malloc instruction!"); if (!isa<PointerType>(InstTy)) - PARSE_ERROR("Invalid malloc instruction!"); + throw std::string("Invalid malloc instruction!"); Result = new MallocInst(cast<PointerType>(InstTy)->getElementType(), Oprnds.size() ? getValue(Type::UIntTyID, @@ -647,29 +736,31 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; case Instruction::Alloca: - if (Oprnds.size() > 2) PARSE_ERROR("Invalid alloca instruction!"); + if (Oprnds.size() > 2) + throw std::string("Invalid alloca instruction!"); if (!isa<PointerType>(InstTy)) - PARSE_ERROR("Invalid alloca instruction!"); + throw std::string("Invalid alloca instruction!"); Result = new AllocaInst(cast<PointerType>(InstTy)->getElementType(), Oprnds.size() ? getValue(Type::UIntTyID, - Oprnds[0]) :0); + Oprnds[0]) :0); break; case Instruction::Free: if (!isa<PointerType>(InstTy)) - PARSE_ERROR("Invalid free instruction!"); + throw std::string("Invalid free instruction!"); Result = new FreeInst(getValue(iType, Oprnds[0])); break; case Instruction::GetElementPtr: { if (Oprnds.size() == 0 || !isa<PointerType>(InstTy)) - PARSE_ERROR("Invalid getelementptr instruction!"); + throw std::string("Invalid getelementptr instruction!"); std::vector<Value*> Idx; const Type *NextTy = InstTy; for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy); - if (!TopTy) PARSE_ERROR("Invalid getelementptr instruction!"); + if (!TopTy) + throw std::string("Invalid getelementptr instruction!"); unsigned ValIdx = Oprnds[i]; unsigned IdxTy = 0; @@ -710,14 +801,14 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, case 62: // volatile load case Instruction::Load: if (Oprnds.size() != 1 || !isa<PointerType>(InstTy)) - PARSE_ERROR("Invalid load instruction!"); + throw std::string("Invalid load instruction!"); Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62); break; case 63: // volatile store case Instruction::Store: { if (!isa<PointerType>(InstTy) || Oprnds.size() != 2) - PARSE_ERROR("Invalid store instruction!"); + throw std::string("Invalid store instruction!"); Value *Ptr = getValue(iType, Oprnds[1]); const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType(); @@ -726,7 +817,8 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, break; } case Instruction::Unwind: - if (Oprnds.size() != 0) PARSE_ERROR("Invalid unwind instruction!"); + if (Oprnds.size() != 0) + throw std::string("Invalid unwind instruction!"); Result = new UnwindInst(); break; } // end switch(Opcode) @@ -741,9 +833,11 @@ void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, BB->getInstList().push_back(Result); } -/// getBasicBlock - Get a particular numbered basic block, which might be a -/// forward reference. This works together with ParseBasicBlock to handle these -/// forward references in a clean manner. +/// Get a particular numbered basic block, which might be a forward reference. +/// This works together with ParseBasicBlock to handle these forward references +/// in a clean manner. This function is used when constructing phi, br, switch, +/// and other instructions that reference basic blocks. Blocks are numbered +/// sequentially as they appear in the function. BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { // Make sure there is room in the table... if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); @@ -759,10 +853,12 @@ BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { return ParsedBasicBlocks[ID] = new BasicBlock(); } -/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one -/// basicblock at a time. This method reads in one of the basicblock packets. +/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time. +/// This method reads in one of the basicblock packets. This method is not used +/// for bytecode files after LLVM 1.0 +/// @returns The basic block constructed. BasicBlock *BytecodeReader::ParseBasicBlock( unsigned BlockNo) { - HANDLE(BasicBlockBegin( BlockNo )); + if (Handler) Handler->handleBasicBlockBegin( BlockNo ); BasicBlock *BB = 0; @@ -777,19 +873,20 @@ BasicBlock *BytecodeReader::ParseBasicBlock( unsigned BlockNo) { while ( moreInBlock() ) ParseInstruction(Operands, BB); - HANDLE(BasicBlockEnd( BlockNo )); + if (Handler) Handler->handleBasicBlockEnd( BlockNo ); return BB; } -/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the -/// body of a function. In post 1.0 bytecode files, we no longer emit basic -/// block individually, in order to avoid per-basic-block overhead. +/// Parse all of the BasicBlock's & Instruction's in the body of a function. +/// In post 1.0 bytecode files, we no longer emit basic block individually, +/// in order to avoid per-basic-block overhead. +/// @returns Rhe number of basic blocks encountered. unsigned BytecodeReader::ParseInstructionList(Function* F) { unsigned BlockNo = 0; std::vector<unsigned> Args; while ( moreInBlock() ) { - HANDLE(BasicBlockBegin( BlockNo )); + if (Handler) Handler->handleBasicBlockBegin( BlockNo ); BasicBlock *BB; if (ParsedBasicBlocks.size() == BlockNo) ParsedBasicBlocks.push_back(BB = new BasicBlock()); @@ -797,7 +894,7 @@ unsigned BytecodeReader::ParseInstructionList(Function* F) { BB = ParsedBasicBlocks[BlockNo] = new BasicBlock(); else BB = ParsedBasicBlocks[BlockNo]; - HANDLE(BasicBlockEnd( BlockNo )); + if (Handler) Handler->handleBasicBlockEnd( BlockNo ); ++BlockNo; F->getBasicBlockList().push_back(BB); @@ -806,15 +903,20 @@ unsigned BytecodeReader::ParseInstructionList(Function* F) { ParseInstruction(Args, BB); if (!BB->getTerminator()) - PARSE_ERROR("Non-terminated basic block found!"); + throw std::string("Non-terminated basic block found!"); } return BlockNo; } +/// Parse a symbol table. This works for both module level and function +/// level symbol tables. For function level symbol tables, the CurrentFunction +/// parameter must be non-zero and the ST parameter must correspond to +/// CurrentFunction's symbol table. For Module level symbol tables, the +/// CurrentFunction argument must be zero. void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, - SymbolTable *ST) { - HANDLE(SymbolTableBegin(CurrentFunction,ST)); + SymbolTable *ST) { + if (Handler) Handler->handleSymbolTableBegin(CurrentFunction,ST); // Allow efficient basic block lookup by number. std::vector<BasicBlock*> BBMap; @@ -823,10 +925,26 @@ void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, E = CurrentFunction->end(); I != E; ++I) BBMap.push_back(I); + /// In LLVM 1.3 we write types separately from values so + /// The types are always first in the symbol table. This is + /// because Type no longer derives from Value. + if ( ! hasTypeDerivedFromValue ) { + // Symtab block header: [num entries] + unsigned NumEntries = read_vbr_uint(); + for ( unsigned i = 0; i < NumEntries; ++i ) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); + const Type* T = getType(slot); + ST->insert(Name, T); + } + } + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] unsigned NumEntries = read_vbr_uint(); - unsigned Typ = read_vbr_uint(); + unsigned Typ = 0; + bool isTypeType = read_typeid(Typ); const Type *Ty = getType(Typ); for (unsigned i = 0; i != NumEntries; ++i) { @@ -834,75 +952,104 @@ void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, unsigned slot = read_vbr_uint(); std::string Name = read_str(); - Value *V = 0; - if (Typ == Type::TypeTyID) - V = (Value*)getType(slot); - else if (Typ == Type::LabelTyID) { - if (slot < BBMap.size()) - V = BBMap[slot]; + // if we're reading a pre 1.3 bytecode file and the type plane + // is the "type type", handle it here + if ( isTypeType ) { + const Type* T = getType(slot); + if ( T == 0 ) + PARSE_ERROR("Failed type look-up for name '" << Name << "'"); + ST->insert(Name, T); + continue; // code below must be short circuited } else { - V = getValue(Typ, slot, false); // Find mapping... + Value *V = 0; + if (Typ == Type::LabelTyID) { + if (slot < BBMap.size()) + V = BBMap[slot]; + } else { + V = getValue(Typ, slot, false); // Find mapping... + } + if (V == 0) + PARSE_ERROR("Failed value look-up for name '" << Name << "'"); + V->setName(Name, ST); } - if (V == 0) - PARSE_ERROR("Failed value look-up for name '" << Name << "'"); - - V->setName(Name, ST); } } checkPastBlockEnd("Symbol Table"); - HANDLE(SymbolTableEnd()); + if (Handler) Handler->handleSymbolTableEnd(); +} + +/// Read in the types portion of a compaction table. +void BytecodeReader::ParseCompactionTypes( unsigned NumEntries ) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = 0; + bool isTypeType = read_typeid(TypeSlot); + assert(!isTypeType && "Invalid type in compaction table: type type"); + const Type *Typ = getGlobalTableType(TypeSlot); + CompactionTypes.push_back(Typ); + if (Handler) Handler->handleCompactionTableType( i, TypeSlot, Typ ); + } } +/// Parse a compaction table. void BytecodeReader::ParseCompactionTable() { - HANDLE(CompactionTableBegin()); + if (Handler) Handler->handleCompactionTableBegin(); + + /// In LLVM 1.3 Type no longer derives from Value. So, + /// we always write them first in the compaction table + /// because they can't occupy a "type plane" where the + /// Values reside. + if ( ! hasTypeDerivedFromValue ) { + unsigned NumEntries = read_vbr_uint(); + ParseCompactionTypes( NumEntries ); + } while ( moreInBlock() ) { unsigned NumEntries = read_vbr_uint(); - unsigned Ty; + unsigned Ty = 0; + unsigned isTypeType = false; if ((NumEntries & 3) == 3) { NumEntries >>= 2; - Ty = read_vbr_uint(); + isTypeType = read_typeid(Ty); } else { Ty = NumEntries >> 2; + isTypeType = sanitizeTypeId(Ty); NumEntries &= 3; } - if (Ty >= CompactionValues.size()) - CompactionValues.resize(Ty+1); + // if we're reading a pre 1.3 bytecode file and the type plane + // is the "type type", handle it here + if ( isTypeType ) { + ParseCompactionTypes(NumEntries); + } else { + if (Ty >= CompactionValues.size()) + CompactionValues.resize(Ty+1); - if (!CompactionValues[Ty].empty()) - PARSE_ERROR("Compaction table plane contains multiple entries!"); + if (!CompactionValues[Ty].empty()) + throw std::string("Compaction table plane contains multiple entries!"); - HANDLE(CompactionTablePlane( Ty, NumEntries )); + if (Handler) Handler->handleCompactionTablePlane( Ty, NumEntries ); - if (Ty == Type::TypeTyID) { - for (unsigned i = 0; i != NumEntries; ++i) { - unsigned TypeSlot = read_vbr_uint(); - const Type *Typ = getGlobalTableType(TypeSlot); - CompactionTypes.push_back(Typ); - HANDLE(CompactionTableType( i, TypeSlot, Typ )); - } - CompactionTypes.resize(NumEntries+Type::FirstDerivedTyID); - } else { const Type *Typ = getType(Ty); // Push the implicit zero CompactionValues[Ty].push_back(Constant::getNullValue(Typ)); for (unsigned i = 0; i != NumEntries; ++i) { - unsigned ValSlot = read_vbr_uint(); - Value *V = getGlobalTableValue(Typ, ValSlot); - CompactionValues[Ty].push_back(V); - HANDLE(CompactionTableValue( i, Ty, ValSlot, Typ )); + unsigned ValSlot = read_vbr_uint(); |