diff options
Diffstat (limited to 'lib/Bytecode/Reader/Reader.cpp')
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 1681 |
1 files changed, 1355 insertions, 326 deletions
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 2ca8a99f4c..353f29c4b2 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -16,45 +16,163 @@ // //===----------------------------------------------------------------------===// -#include "ReaderInternals.h" -#include "llvm/Bytecode/Reader.h" +#include "Reader.h" +#include "llvm/Bytecode/BytecodeHandler.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/iMemory.h" +#include "llvm/iOther.h" +#include "llvm/iPHINode.h" +#include "llvm/iTerminators.h" #include "llvm/Bytecode/Format.h" -#include "llvm/Module.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "Support/StringExtras.h" +#include <sstream> + using namespace llvm; -unsigned BytecodeParser::getTypeSlot(const Type *Ty) { - if (Ty->isPrimitiveType()) - return Ty->getTypeID(); +/// A convenience macro for calling the handler. Makes maintenance easier in +/// case the interface to handler methods changes. +#define HANDLE(method) \ + if ( Handler ) Handler->handle ## method - // Scan the compaction table for the type if needed. - if (CompactionTable.size() > Type::TypeTyID) { - std::vector<Value*> &Plane = CompactionTable[Type::TypeTyID]; - if (!Plane.empty()) { - std::vector<Value*>::iterator I = find(Plane.begin(), Plane.end(), - const_cast<Type*>(Ty)); - if (I == Plane.end()) - throw std::string("Couldn't find type specified in compaction table!"); - return Type::FirstDerivedTyID + (&*I - &Plane[0]); - } +/// A convenience macro for handling parsing errors. +#define PARSE_ERROR(inserters) { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + throw std::string(errormsg.str()); \ } - // Check the function level types first... - TypeValuesListTy::iterator I = find(FunctionTypeValues.begin(), - FunctionTypeValues.end(), Ty); - if (I != FunctionTypeValues.end()) - return Type::FirstDerivedTyID + ModuleTypeValues.size() + - (&*I - &FunctionTypeValues[0]); +/// @brief A class for maintaining the slot number definition +/// as a placeholder for the actual definition. +template<class SuperType> +class PlaceholderDef : public SuperType { + unsigned ID; + PlaceholderDef(); // DO NOT IMPLEMENT + void operator=(const PlaceholderDef &); // DO NOT IMPLEMENT +public: + PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {} + unsigned getID() { return ID; } +}; + +struct ConstantPlaceHolderHelper : public ConstantExpr { + ConstantPlaceHolderHelper(const Type *Ty) + : ConstantExpr(Instruction::UserOp1, Constant::getNullValue(Ty), Ty) {} +}; + +typedef PlaceholderDef<ConstantPlaceHolderHelper> ConstPHolder; - I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty); - if (I == ModuleTypeValues.end()) - throw std::string("Didn't find type in ModuleTypeValues."); - return Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); +//===----------------------------------------------------------------------===// +// Bytecode Reading Methods +//===----------------------------------------------------------------------===// + +inline bool BytecodeReader::moreInBlock() { + return At < BlockEnd; +} + +inline void BytecodeReader::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); } -const Type *BytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +inline void BytecodeReader::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( At > Save ) + HANDLE(Alignment( At - Save )); + if (At > BlockEnd) + PARSE_ERROR("Ran out of data while aligning!"); +} +inline unsigned BytecodeReader::read_uint() { + if (At+4 > BlockEnd) + PARSE_ERROR("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned BytecodeReader::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + PARSE_ERROR("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + HANDLE(VBR32(At-Save)); + return Result; +} + +inline uint64_t BytecodeReader::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + PARSE_ERROR("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + HANDLE(VBR64(At-Save)); + return Result; +} + +inline int64_t BytecodeReader::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} + +inline std::string BytecodeReader::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + PARSE_ERROR("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void BytecodeReader::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + PARSE_ERROR("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} + +inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + PARSE_ERROR("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + HANDLE(Block( Type, BlockStart, Size )); +} + +//===----------------------------------------------------------------------===// +// IR Lookup Methods +//===----------------------------------------------------------------------===// + +inline bool BytecodeReader::hasImplicitNull(unsigned TyID ) { + if (!hasExplicitPrimitiveZeros) + return TyID != Type::LabelTyID && TyID != Type::TypeTyID && + TyID != Type::VoidTyID; + return TyID >= Type::FirstDerivedTyID; +} + +const Type *BytecodeReader::getType(unsigned ID) { if (ID < Type::FirstDerivedTyID) if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID)) return T; // Asked for a primitive type... @@ -62,98 +180,124 @@ const Type *BytecodeParser::getType(unsigned ID) { // Otherwise, derived types need offset... ID -= Type::FirstDerivedTyID; - if (CompactionTable.size() > Type::TypeTyID && - !CompactionTable[Type::TypeTyID].empty()) { - if (ID >= CompactionTable[Type::TypeTyID].size()) - throw std::string("Type ID out of range for compaction table!"); - return cast<Type>(CompactionTable[Type::TypeTyID][ID]); + if (!CompactionTypes.empty()) { + if (ID >= CompactionTypes.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypes[ID]; } // Is it a module-level type? - if (ID < ModuleTypeValues.size()) - return ModuleTypeValues[ID].get(); + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); - // Nope, is it a function-level type? - ID -= ModuleTypeValues.size(); - if (ID < FunctionTypeValues.size()) - return FunctionTypeValues[ID].get(); + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); - throw std::string("Illegal type reference!"); + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; } -static inline bool hasImplicitNull(unsigned TyID, bool EncodesPrimitiveZeros) { - if (!EncodesPrimitiveZeros) - return TyID != Type::LabelTyID && TyID != Type::TypeTyID && - TyID != Type::VoidTyID; - return TyID >= Type::FirstDerivedTyID; -} +unsigned BytecodeReader::getTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); -unsigned BytecodeParser::insertValue(Value *Val, unsigned type, - ValueTable &ValueTab) { - assert((!isa<Constant>(Val) || !cast<Constant>(Val)->isNullValue()) || - !hasImplicitNull(type, hasExplicitPrimitiveZeros) && - "Cannot read null values from bytecode!"); - assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); + // Scan the compaction table for the type if needed. + if (!CompactionTypes.empty()) { + std::vector<const Type*>::const_iterator I = + find(CompactionTypes.begin(), CompactionTypes.end(), Ty); - if (ValueTab.size() <= type) - ValueTab.resize(type+1); + if (I == CompactionTypes.end()) + PARSE_ERROR("Couldn't find type specified in compaction table!"); + return Type::FirstDerivedTyID + (&*I - &CompactionTypes[0]); + } - if (!ValueTab[type]) ValueTab[type] = new ValueList(); + // Check the function level types first... + TypeListTy::iterator I = find(FunctionTypes.begin(), FunctionTypes.end(), Ty); - //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() - // << "] = " << Val << "\n"; - ValueTab[type]->push_back(Val); + if (I != FunctionTypes.end()) + return Type::FirstDerivedTyID + ModuleTypes.size() + + (&*I - &FunctionTypes[0]); - bool HasOffset = hasImplicitNull(type, hasExplicitPrimitiveZeros); - return ValueTab[type]->size()-1 + HasOffset; + // Check the module level types now... + I = find(ModuleTypes.begin(), ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + PARSE_ERROR("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } -Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) { +const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot); + assert(Ty && "Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypes.size()) + PARSE_ERROR("Illegal compaction table type reference!"); + return ModuleTypes[Slot]; +} + +unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getTypeID(); + TypeListTy::iterator I = find(ModuleTypes.begin(), + ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + PARSE_ERROR("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); +} + +Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { assert(type != Type::TypeTyID && "getValue() cannot get types!"); assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); unsigned Num = oNum; // If there is a compaction table active, it defines the low-level numbers. // If not, the module values define the low-level numbers. - if (CompactionTable.size() > type && !CompactionTable[type].empty()) { - if (Num < CompactionTable[type].size()) - return CompactionTable[type][Num]; - Num -= CompactionTable[type].size(); + if (CompactionValues.size() > type && !CompactionValues[type].empty()) { + if (Num < CompactionValues[type].size()) + return CompactionValues[type][Num]; + Num -= CompactionValues[type].size(); } else { - // If the type plane was compactified, figure out the global type ID. + // By default, the global type id is the type id passed in unsigned GlobalTyID = type; - if (CompactionTable.size() > Type::TypeTyID && - !CompactionTable[Type::TypeTyID].empty() && - type >= Type::FirstDerivedTyID) { - std::vector<Value*> &TypePlane = CompactionTable[Type::TypeTyID]; - const Type *Ty = cast<Type>(TypePlane[type-Type::FirstDerivedTyID]); - TypeValuesListTy::iterator I = - find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty); - assert(I != ModuleTypeValues.end()); - GlobalTyID = Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); + + // If the type plane was compactified, figure out the global type ID + // by adding the derived type ids and the distance. + if (CompactionValues.size() > Type::TypeTyID && + !CompactionTypes.empty() && + type >= Type::FirstDerivedTyID) { + const Type *Ty = CompactionTypes[type-Type::FirstDerivedTyID]; + TypeListTy::iterator I = + find(ModuleTypes.begin(), ModuleTypes.end(), Ty); + assert(I != ModuleTypes.end()); + GlobalTyID = Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } - if (hasImplicitNull(GlobalTyID, hasExplicitPrimitiveZeros)) { + if (hasImplicitNull(GlobalTyID)) { if (Num == 0) - return Constant::getNullValue(getType(type)); + return Constant::getNullValue(getType(type)); --Num; } if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) { if (Num < ModuleValues[GlobalTyID]->size()) - return ModuleValues[GlobalTyID]->getOperand(Num); + return ModuleValues[GlobalTyID]->getOperand(Num); Num -= ModuleValues[GlobalTyID]->size(); } } - if (Values.size() > type && Values[type] && Num < Values[type]->size()) - return Values[type]->getOperand(Num); + if (FunctionValues.size() > type && + FunctionValues[type] && + Num < FunctionValues[type]->size()) + return FunctionValues[type]->getOperand(Num); if (!Create) return 0; // Do not create a placeholder? std::pair<unsigned,unsigned> KeyValue(type, oNum); - std::map<std::pair<unsigned,unsigned>, Value*>::iterator I = - ForwardReferences.lower_bound(KeyValue); + ForwardReferenceMap::iterator I = ForwardReferences.lower_bound(KeyValue); if (I != ForwardReferences.end() && I->first == KeyValue) return I->second; // We have already created this placeholder @@ -162,32 +306,27 @@ Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) { return Val; } -/// getBasicBlock - Get a particular numbered basic block, which might be a -/// forward reference. This works together with ParseBasicBlock to handle these -/// forward references in a clean manner. -/// -BasicBlock *BytecodeParser::getBasicBlock(unsigned ID) { - // Make sure there is room in the table... - if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); - - // First check to see if this is a backwards reference, i.e., ParseBasicBlock - // has already created this block, or if the forward reference has already - // been created. - if (ParsedBasicBlocks[ID]) - return ParsedBasicBlocks[ID]; +Value* BytecodeReader::getGlobalTableValue(const Type *Ty, unsigned SlotNo) { + // FIXME: getTypeSlot is inefficient! + unsigned TyID = getGlobalTableTypeSlot(Ty); + + if (TyID != Type::LabelTyID) { + if (SlotNo == 0) + return Constant::getNullValue(Ty); + --SlotNo; + } - // Otherwise, the basic block has not yet been created. Do so and add it to - // the ParsedBasicBlocks list. - return ParsedBasicBlocks[ID] = new BasicBlock(); + if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 || + SlotNo >= ModuleValues[TyID]->size()) { + PARSE_ERROR("Corrupt compaction table entry!" + << TyID << ", " << SlotNo << ": " << ModuleValues.size() << ", " + << (void*)ModuleValues[TyID] << ", " + << ModuleValues[TyID]->size() << "\n"); + } + return ModuleValues[TyID]->getOperand(SlotNo); } -/// getConstantValue - Just like getValue, except that it returns a null pointer -/// only on error. It always returns a constant (meaning that if the value is -/// defined, but is not a constant, that is an error). If the specified -/// constant hasn't been parsed yet, a placeholder is defined and used. Later, -/// after the real value is parsed, the placeholder is eliminated. -/// -Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { +Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { if (Value *V = getValue(TypeSlot, Slot, false)) if (Constant *C = dyn_cast<Constant>(V)) return C; // If we already have the value parsed, just return it @@ -196,19 +335,17 @@ Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { // to infest bytecode files. return ConstantPointerRef::get(GV); else - throw std::string("Reference of a value is expected to be a constant!"); + PARSE_ERROR("Reference of a value is expected to be a constant!"); const Type *Ty = getType(TypeSlot); std::pair<const Type*, unsigned> Key(Ty, Slot); ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key); if (I != ConstantFwdRefs.end() && I->first == Key) { - BCR_TRACE(5, "Previous forward ref found!\n"); return I->second; } else { // Create a placeholder for the constant reference and // keep track of the fact that we have a forward ref to recycle it - BCR_TRACE(5, "Creating new forward ref to a constant!\n"); Constant *C = new ConstPHolder(Ty, Slot); // Keep track of the fact that we have a forward ref to recycle it @@ -217,12 +354,418 @@ Constant *BytecodeParser::getConstantValue(unsigned TypeSlot, unsigned Slot) { } } +//===----------------------------------------------------------------------===// +// IR Construction Methods +//===----------------------------------------------------------------------===// + +unsigned BytecodeReader::insertValue( + Value *Val, unsigned type, ValueTable &ValueTab) { + assert((!isa<Constant>(Val) || !cast<Constant>(Val)->isNullValue()) || + !hasImplicitNull(type) && + "Cannot read null values from bytecode!"); + assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); + + if (ValueTab.size() <= type) + ValueTab.resize(type+1); + + if (!ValueTab[type]) ValueTab[type] = new ValueList(); + + ValueTab[type]->push_back(Val); + + bool HasOffset = hasImplicitNull(type); + return ValueTab[type]->size()-1 + HasOffset; +} + +void BytecodeReader::insertArguments(Function* F ) { + const FunctionType *FT = F->getFunctionType(); + Function::aiterator AI = F->abegin(); + for (FunctionType::param_iterator It = FT->param_begin(); + It != FT->param_end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), FunctionValues); +} + +//===----------------------------------------------------------------------===// +// Bytecode Parsing Methods +//===----------------------------------------------------------------------===// + +void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds, + BasicBlock* BB) { + BufPtr SaveAt = At; + + // Clear instruction data + Oprnds.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read_uint(); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Oprnds.resize((Op >> 0) & 03); + + // Extract the operands + switch (Oprnds.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Oprnds[0] = (Op >> 20) & 4095; + if (Oprnds[0] == 4095) // Handle special encoding for 0 operands... + Oprnds.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Oprnds[0] = (Op >> 16) & 255; + Oprnds[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Oprnds[0] = (Op >> 14) & 63; + Oprnds[1] = (Op >> 20) & 63; + Oprnds[2] = (Op >> 26) & 63; + break; + case 0: + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); + Opcode >>= 2; + iType = read_vbr_uint(); + + unsigned NumOprnds = read_vbr_uint(); + Oprnds.resize(NumOprnds); + + if (NumOprnds == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOprnds; ++i) + Oprnds[i] = read_vbr_uint(); + align32(); + break; + } + + // Get the type of the instruction + const Type *InstTy = getType(iType); + + // Hae enough to inform the handler now + HANDLE(Instruction(Opcode, InstTy, Oprnds, At-SaveAt)); + + // Declare the resulting instruction we'll build. + Instruction *Result = 0; + + // Handle binary operators + if (Opcode >= Instruction::BinaryOpsBegin && + Opcode < Instruction::BinaryOpsEnd && Oprnds.size() == 2) + Result = BinaryOperator::create((Instruction::BinaryOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(iType, Oprnds[1])); + + switch (Opcode) { + default: + if (Result == 0) PARSE_ERROR("Illegal instruction read!"); + break; + case Instruction::VAArg: + Result = new VAArgInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::VANext: + Result = new VANextInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::Cast: + Result = new CastInst(getValue(iType, Oprnds[0]), getType(Oprnds[1])); + break; + case Instruction::Select: + Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]), + getValue(iType, Oprnds[1]), + getValue(iType, Oprnds[2])); + break; + case Instruction::PHI: { + if (Oprnds.size() == 0 || (Oprnds.size() & 1)) + PARSE_ERROR("Invalid phi node encountered!\n"); + + PHINode *PN = new PHINode(InstTy); + PN->op_reserve(Oprnds.size()); + for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2) + PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1])); + Result = PN; + break; + } + + case Instruction::Shl: + case Instruction::Shr: + Result = new ShiftInst((Instruction::OtherOps)Opcode, + getValue(iType, Oprnds[0]), + getValue(Type::UByteTyID, Oprnds[1])); + break; + case Instruction::Ret: + if (Oprnds.size() == 0) + Result = new ReturnInst(); + else if (Oprnds.size() == 1) + Result = new ReturnInst(getValue(iType, Oprnds[0])); + else + PARSE_ERROR("Unrecognized instruction!"); + break; + + case Instruction::Br: + if (Oprnds.size() == 1) + Result = new BranchInst(getBasicBlock(Oprnds[0])); + else if (Oprnds.size() == 3) + Result = new BranchInst(getBasicBlock(Oprnds[0]), + getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); + else + PARSE_ERROR("Invalid number of operands for a 'br' instruction!"); + break; + case Instruction::Switch: { + if (Oprnds.size() & 1) + PARSE_ERROR("Switch statement with odd number of arguments!"); + + SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]), + getBasicBlock(Oprnds[1])); + for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2) + I->addCase(cast<Constant>(getValue(iType, Oprnds[i])), + getBasicBlock(Oprnds[i+1])); + Result = I; + break; + } + + case Instruction::Call: { + if (Oprnds.size() == 0) + PARSE_ERROR("Invalid call instruction encountered!"); + + Value *F = getValue(iType, Oprnds[0]); + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast<PointerType>(F->getType()); + if (PTy == 0) PARSE_ERROR("Call to non function pointer value!"); + const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); + if (FTy == 0) PARSE_ERROR("Call to non function pointer value!"); + + std::vector<Value *> Params; + if (!FTy->isVarArg()) { + FunctionType::param_iterator It = FTy->param_begin(); + + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + PARSE_ERROR("Invalid call instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + PARSE_ERROR("Invalid call instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + unsigned FirstVariableOperand; + if (Oprnds.size() < FTy->getNumParams()) + PARSE_ERROR("Call instruction missing operands!"); + + // Read all of the fixed arguments + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i])); + + FirstVariableOperand = FTy->getNumParams(); + + if ((Oprnds.size()-FirstVariableOperand) & 1) // Must be pairs of type/value + PARSE_ERROR("Invalid call instruction!"); + + for (unsigned i = FirstVariableOperand, e = Oprnds.size(); + i != e; i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new CallInst(F, Params); + break; + } + case Instruction::Invoke: { + if (Oprnds.size() < 3) PARSE_ERROR("Invalid invoke instruction!"); + Value *F = getValue(iType, Oprnds[0]); + + // Check to make sure we have a pointer to function type + const PointerType *PTy = dyn_cast<PointerType>(F->getType()); + if (PTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); + if (FTy == 0) PARSE_ERROR("Invoke to non function pointer value!"); + + std::vector<Value *> Params; + BasicBlock *Normal, *Except; + + if (!FTy->isVarArg()) { + Normal = getBasicBlock(Oprnds[1]); + Except = getBasicBlock(Oprnds[2]); + + FunctionType::param_iterator It = FTy->param_begin(); + for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) { + if (It == FTy->param_end()) + PARSE_ERROR("Invalid invoke instruction!"); + Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i])); + } + if (It != FTy->param_end()) + PARSE_ERROR("Invalid invoke instruction!"); + } else { + Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1); + + Normal = getBasicBlock(Oprnds[0]); + Except = getBasicBlock(Oprnds[1]); + + unsigned FirstVariableArgument = FTy->getNumParams()+2; + for (unsigned i = 2; i != FirstVariableArgument; ++i) + Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)), + Oprnds[i])); + + if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs + PARSE_ERROR("Invalid invoke instruction!"); + + for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2) + Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); + } + + Result = new InvokeInst(F, Normal, Except, Params); + break; + } + case Instruction::Malloc: + if (Oprnds.size() > 2) PARSE_ERROR("Invalid malloc instruction!"); + if (!isa<PointerType>(InstTy)) + PARSE_ERROR("Invalid malloc instruction!"); + + Result = new MallocInst(cast<PointerType>(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) : 0); + break; + + case Instruction::Alloca: + if (Oprnds.size() > 2) PARSE_ERROR("Invalid alloca instruction!"); + if (!isa<PointerType>(InstTy)) + PARSE_ERROR("Invalid alloca instruction!"); + + Result = new AllocaInst(cast<PointerType>(InstTy)->getElementType(), + Oprnds.size() ? getValue(Type::UIntTyID, + Oprnds[0]) :0); + break; + case Instruction::Free: + if (!isa<PointerType>(InstTy)) + PARSE_ERROR("Invalid free instruction!"); + Result = new FreeInst(getValue(iType, Oprnds[0])); + break; + case Instruction::GetElementPtr: { + if (Oprnds.size() == 0 || !isa<PointerType>(InstTy)) + PARSE_ERROR("Invalid getelementptr instruction!"); + + std::vector<Value*> Idx; + + const Type *NextTy = InstTy; + for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { + const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy); + if (!TopTy) PARSE_ERROR("Invalid getelementptr instruction!"); + + unsigned ValIdx = Oprnds[i]; + unsigned IdxTy = 0; + if (!hasRestrictedGEPTypes) { + // Struct indices are always uints, sequential type indices can be any + // of the 32 or 64-bit integer types. The actual choice of type is + // encoded in the low two bits of the slot number. + if (isa<StructType>(TopTy)) + IdxTy = Type::UIntTyID; + else { + switch (ValIdx & 3) { + default: + case 0: IdxTy = Type::UIntTyID; break; + case 1: IdxTy = Type::IntTyID; break; + case 2: IdxTy = Type::ULongTyID; break; + case 3: IdxTy = Type::LongTyID; break; + } + ValIdx >>= 2; + } + } else { + IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID; + } + + Idx.push_back(getValue(IdxTy, ValIdx)); + + // Convert ubyte struct indices into uint struct indices. + if (isa<StructType>(TopTy) && hasRestrictedGEPTypes) + if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back())) + Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy); + + NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true); + } + + Result = new GetElementPtrInst(getValue(iType, Oprnds[0]), Idx); + break; + } + + case 62: // volatile load + case Instruction::Load: + if (Oprnds.size() != 1 || !isa<PointerType>(InstTy)) + PARSE_ERROR("Invalid load instruction!"); + Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62); + break; + + case 63: // volatile store + case Instruction::Store: { + if (!isa<PointerType>(InstTy) || Oprnds.size() != 2) + PARSE_ERROR("Invalid store instruction!"); + + Value *Ptr = getValue(iType, Oprnds[1]); + const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType(); + Result = new StoreInst(getValue(getTypeSlot(ValTy), Oprnds[0]), Ptr, + Opcode == 63); + break; + } + case Instruction::Unwind: + if (Oprnds.size() != 0) PARSE_ERROR("Invalid unwind instruction!"); + Result = new UnwindInst(); + break; + } // end switch(Opcode) + + unsigned TypeSlot; + if (Result->getType() == InstTy) + TypeSlot = iType; + else + TypeSlot = getTypeSlot(Result->getType()); + + insertValue(Result, TypeSlot, FunctionValues); + BB->getInstList().push_back(Result); +} + +/// getBasicBlock - Get a particular numbered basic block, which might be a +/// forward reference. This works together with ParseBasicBlock to handle these +/// forward references in a clean manner. +BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { + // Make sure there is room in the table... + if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1); + + // First check to see if this is a backwards reference, i.e., ParseBasicBlock + // has already created this block, or if the forward reference has already + // been created. + if (ParsedBasicBlocks[ID]) + return ParsedBasicBlocks[ID]; + + // Otherwise, the basic block has not yet been created. Do so and add it to + // the ParsedBasicBlocks list. + return ParsedBasicBlocks[ID] = new BasicBlock(); +} + /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned BlockNo) { - BasicBlock *BB; +BasicBlock *BytecodeReader::ParseBasicBlock( unsigned BlockNo) { + HANDLE(BasicBlockBegin( BlockNo )); + + BasicBlock *BB = 0; + if (ParsedBasicBlocks.size() == BlockNo) ParsedBasicBlocks.push_back(BB = new BasicBlock()); else if (ParsedBasicBlocks[BlockNo] == 0) @@ -230,24 +773,23 @@ BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf, else BB = ParsedBasicBlocks[BlockNo]; - std::vector<unsigned> Args; - while (Buf < EndBuf) - ParseInstruction(Buf, EndBuf, Args, BB); + std::vector<unsigned> Operands; + while ( moreInBlock() ) + ParseInstruction(Operands, BB); + HANDLE(BasicBlockEnd( BlockNo )); return BB; } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned BytecodeParser::ParseInstructionList(Function *F, - const unsigned char *&Buf, - const unsigned char *EndBuf) { +unsigned BytecodeReader::ParseInstructionList(Function* F) { unsigned BlockNo = 0; std::vector<unsigned> Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { + HANDLE(BasicBlockBegin( BlockNo )); BasicBlock *BB; if (ParsedBasicBlocks.size() == BlockNo) ParsedBasicBlocks.push_back(BB = new BasicBlock()); @@ -255,24 +797,25 @@ unsigned BytecodeParser::ParseInstructionList(Function *F, BB = ParsedBasicBlocks[BlockNo] = new BasicBlock(); else BB = ParsedBasicBlocks[BlockNo]; + HANDLE(BasicBlockEnd( BlockNo )); ++BlockNo; F->getBasicBlockList().push_back(BB); // Read instructions into this basic block until we get to a terminator - while (Buf < EndBuf && !BB->getTerminator()) - ParseInstruction(Buf, EndBuf, Args, BB); + while ( moreInBlock() && !BB->getTerminator()) + ParseInstruction(Args, BB); if (!BB->getTerminator()) - throw std::string("Non-terminated basic block found!"); + PARSE_ERROR("Non-terminated basic block found!"); } return BlockNo; } -void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, - const unsigned char *EndBuf, - SymbolTable *ST, - Function *CurrentFunction) { +void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, + SymbolTable *ST) { + HANDLE(SymbolTableBegin(CurrentFunction,ST)); + // Allow efficient basic block lookup by number. std::vector<BasicBlock*> BBMap; if (CurrentFunction) @@ -280,18 +823,16 @@ void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, E = CurrentFunction->end(); I != E; ++I) BBMap.push_back(I); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); - BCR_TRACE(3, "Plane Type: '" << *Ty << "' with " << NumEntries << - " entries\n"); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); Value *V = 0; if (Typ == Type::TypeTyID) @@ -303,76 +844,444 @@ void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf, V = getValue(Typ, slot, false); // Find mapping... } if (V == 0) - throw "Failed value look-up for name '" + Name + "'"; - BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << *V; - if (!isa<Instruction>(V)) std::cerr << "\n"); + PARSE_ERROR("Failed value look-up for name '" << Name << "'"); V->setName(Name, ST); } } + checkPastBlockEnd("Symbol Table"); + HANDLE(SymbolTableEnd()); +} + +void BytecodeReader::ParseCompactionTable() { + + HANDLE(CompactionTableBegin()); + + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Ty; + + if ((NumEntries & 3) == 3) { + NumEntries >>= 2; |