diff options
-rw-r--r-- | lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp | 20 | ||||
-rw-r--r-- | lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp | 89 | ||||
-rw-r--r-- | lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h | 12 | ||||
-rw-r--r-- | test/NaCl/Bitcode/lit.local.cfg | 1 | ||||
-rw-r--r-- | test/NaCl/Bitcode/struct-types.ll | 78 |
5 files changed, 188 insertions, 12 deletions
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp index 1d7d8dc23c..2e318020ad 100644 --- a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp +++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp @@ -233,12 +233,22 @@ static void WriteTypeTable(const NaClValueEnumerator &VE, 4 /*count from # abbrevs */); SmallVector<uint64_t, 64> TypeVals; + // Note: modify to use maximum number of bits if under cutoff. Otherwise, + // use VBR to take advantage that frequently referenced types have + // small IDs. + // + // Note: Cutoff chosen based on experiments on pnacl-translate.pexe. uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1); + static const uint64_t TypeVBRCutoff = 6; + uint64_t TypeIdNumBits = (NumBits <= TypeVBRCutoff ? NumBits : TypeVBRCutoff); + NaClBitCodeAbbrevOp::Encoding TypeIdEncoding = + (NumBits <= TypeVBRCutoff + ? NaClBitCodeAbbrevOp::Fixed : NaClBitCodeAbbrevOp::VBR); // Abbrev for TYPE_CODE_POINTER. NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev(); Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_POINTER)); - Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); Abbv->Add(NaClBitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); @@ -247,7 +257,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE, Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); - Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); @@ -256,7 +266,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE, Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); - Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv); @@ -272,7 +282,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE, Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_NAMED)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array)); - Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); @@ -280,7 +290,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE, Abbv = new NaClBitCodeAbbrev(); Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_ARRAY)); Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8)); // size - Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits)); unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp index 4ac34daa96..e5554b206a 100644 --- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp +++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <set> + using namespace llvm; static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { @@ -31,6 +33,15 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) { /// NaClValueEnumerator - Enumerate module-level information. NaClValueEnumerator::NaClValueEnumerator(const Module *M) { + // Create map for counting frequency of types, and set field + // TypeCountMap accordingly. Note: Pointer field TypeCountMap is + // used to deal with the fact that types are added through various + // method calls in this routine. Rather than pass it as an argument, + // we use a field. The field is a pointer so that the memory + // footprint of count_map can be garbage collected when this + // constructor completes. + TypeCountMapType count_map; + TypeCountMap = &count_map; // Enumerate the global variables. for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) @@ -61,7 +72,7 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) { I != E; ++I) EnumerateValue(I->getAliasee()); - // Insert constants and metadata that are named at module level into the slot + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); EnumerateNamedMetadata(M); @@ -106,10 +117,47 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) { } } + // Optimized type indicies to put "common" expected types in with small + // indices. + OptimizeTypes(M); + TypeCountMap = NULL; + // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); } +void NaClValueEnumerator::OptimizeTypes(const Module *M) { + + // Sort types by count, so that we can index them based on + // frequency. Use indices of built TypeMap, so that order of + // construction is repeatable. + std::set<unsigned> type_counts; + typedef std::set<unsigned> TypeSetType; + std::map<unsigned, TypeSetType> usage_count_map; + TypeList IdType(Types); + + for (TypeCountMapType::iterator iter = TypeCountMap->begin(); + iter != TypeCountMap->end(); ++ iter) { + type_counts.insert(iter->second); + usage_count_map[iter->second].insert(TypeMap[iter->first]-1); + } + + // Reset type tracking maps, so that we can re-enter based + // on fequency ordering. + TypeCountMap = NULL; + Types.clear(); + TypeMap.clear(); + + // Reinsert types, based on frequency. + for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin(); + count_iter != type_counts.rend(); ++count_iter) { + TypeSetType& count_types = usage_count_map[*count_iter]; + for (TypeSetType::iterator type_iter = count_types.begin(); + type_iter != count_types.end(); ++type_iter) + EnumerateType((IdType[*type_iter]), true); + } +} + unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const { InstructionMapType::const_iterator I = InstructionMap.find(Inst); assert(I != InstructionMap.end() && "Instruction is not mapped!"); @@ -351,9 +399,26 @@ void NaClValueEnumerator::EnumerateValue(const Value *V) { } -void NaClValueEnumerator::EnumerateType(Type *Ty) { +void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) { + // This function is used to enumerate types referenced by the given + // module. This function is called in two phases, based on the value + // of TypeCountMap. These phases are: + // + // (1) In this phase, InsideOptimizeTypes=false. We are collecting types + // and all corresponding (implicitly) referenced types. In addition, + // we are keeping track of the number of references to each type in + // TypeCountMap. These reference counts will be used by method + // OptimizeTypes to associate the smallest type ID's with the most + // referenced types. + // + // (2) In this phase, InsideOptimizeTypes=true. We are registering types + // based on frequency. To minimize type IDs for frequently used + // types, (unlike the other context) we are inserting the minimal + // (implicitly) referenced types needed for each type. unsigned *TypeID = &TypeMap[Ty]; + if (TypeCountMap) ++((*TypeCountMap)[Ty]); + // We've already seen this type. if (*TypeID) return; @@ -365,11 +430,24 @@ void NaClValueEnumerator::EnumerateType(Type *Ty) { if (!STy->isLiteral()) *TypeID = ~0U; + // If in the second phase (i.e. inside optimize types), don't expand + // pointers to structures, since we can just generate a forward + // reference to it. This way, we don't use up unnecessary (small) ID + // values just to define the pointer. + bool EnumerateSubtypes = true; + if (InsideOptimizeTypes) + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(PTy->getElementType())) + if (!STy->isLiteral()) + EnumerateSubtypes = false; + // Enumerate all of the subtypes before we enumerate this type. This ensures // that the type will be enumerated in an order that can be directly built. - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) - EnumerateType(*I); + if (EnumerateSubtypes) { + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + EnumerateType(*I, InsideOptimizeTypes); + } // Refresh the TypeID pointer in case the table rehashed. TypeID = &TypeMap[Ty]; @@ -538,4 +616,3 @@ unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs); return getGlobalBasicBlockID(BB); } - diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h index 9e9954883a..0239c34ddc 100644 --- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h +++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h @@ -42,8 +42,17 @@ public: // For each value, we remember its Value* and occurrence frequency. typedef std::vector<std::pair<const Value*, unsigned> > ValueList; private: + // Defines unique ID's for each type. typedef DenseMap<Type*, unsigned> TypeMapType; TypeMapType TypeMap; + // Defines the number of references to each type. If defined, + // we are in the first pass of collecting types, and reference counts + // should be added to the map. If undefined, we are in the second pass + // that actually assigns type IDs, based on frequency counts found in + // the first pass. + typedef TypeMapType TypeCountMapType; + TypeCountMapType* TypeCountMap; + TypeList Types; typedef DenseMap<const Value*, unsigned> ValueMapType; @@ -152,6 +161,7 @@ public: void purgeFunction(); private: + void OptimizeTypes(const Module *M); void OptimizeConstants(unsigned CstStart, unsigned CstEnd); void EnumerateMDNodeOperands(const MDNode *N); @@ -159,7 +169,7 @@ private: void EnumerateFunctionLocalMetadata(const MDNode *N); void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); - void EnumerateType(Type *T); + void EnumerateType(Type *T, bool InsideOptimizeTypes=false); void EnumerateOperandType(const Value *V); void EnumerateAttributes(AttributeSet PAL); diff --git a/test/NaCl/Bitcode/lit.local.cfg b/test/NaCl/Bitcode/lit.local.cfg new file mode 100644 index 0000000000..19eebc0ac7 --- /dev/null +++ b/test/NaCl/Bitcode/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/NaCl/Bitcode/struct-types.ll b/test/NaCl/Bitcode/struct-types.ll new file mode 100644 index 0000000000..a36e6bb0ab --- /dev/null +++ b/test/NaCl/Bitcode/struct-types.ll @@ -0,0 +1,78 @@ +; Checks if llvm bitcode defines a struct type before the pointer type, +; even if the struct definintion appears after the pointer type, while +; pnacl bitcode moves the pointer before the struct. +; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=LLVM +; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump | FileCheck %s -check-prefix=PNACL + +%typeB = type { i8, %typeA, i32, %typeA } +%typeA = type { i16 } + +define %typeB* @foo(%typeB* %a) { + ret %typeB* %a +} + +define %typeB* @bar(%typeB* %b) { + ret %typeB* %b +} + +define i16 @bam(i16 %a) { + ret i16 %a +} + +; Show the ordering llvm uses to order types, which is to expand subtypes +; (including accross pointers) before the type. Expands types for functions +; in order: @foo, @bar, @bam. +; LLVM: <TYPE_BLOCK_ID {{.*}}> +; i8 +; LLVM: <INTEGER op0=8/> +; i16 +; LLVM: <INTEGER op0=16/> +; %typeA = type { i16 } +; LLVM: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/> +; LLVM: <STRUCT_NAMED abbrevid=8 op0=0 op1=1/> +; i32 +; LLVM: <INTEGER op0=32/> +; %typeB = type { i8, %typeA, i32, %typeA } +; LLVM: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/> +; LLVM: <STRUCT_NAMED abbrevid=8 op0=0 op1=0 op2=2 op3=3 op4=2/> +; %typeB* +; LLVM: <POINTER abbrevid=4 op0=4 op1=0/> +; %typeB* (%typeB*) +; LLVM: <FUNCTION abbrevid=5 op0=0 op1=5 op2=5/> +; %typeB* (%typeB*)* +; LLVM: <POINTER abbrevid=4 op0=6 op1=0/> +; i16 (i16) +; LLVM: <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/> +; i16 (i16)* +; LLVM: <POINTER abbrevid=4 op0=8 op1=0/> +; type of instruction "RET" +; LLVM: <VOID/> +; LLVM: </TYPE_BLOCK_ID> + +; Show the ordering pnacl-freeze uses to order types. +; PNACL: <TYPE_BLOCK_ID {{.*}}> +; %typeB* +; PNACL: <POINTER abbrevid=4 op0=8 op1=0/> +; i16 +; PNACL: <INTEGER op0=16/> +; type of instruction "RET" +; PNACL: <VOID/> +; %typeA = type { i16 } +; PNACL: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/> +; PNACL: <STRUCT_NAMED abbrevid=8 op0=0 op1=1/> +; %typeB* (%typeB*) +; PNACL: <FUNCTION abbrevid=5 op0=0 op1=0 op2=0/> +; %typeB* (%typeB*)* +; PNACL: <POINTER abbrevid=4 op0=4 op1=0/> +; i8 +; PNACL: <INTEGER op0=8/> +; i32 +; PNACL: <INTEGER op0=32/> +; %typeB = type { i8, %typeA, i32, %typeA } +; PNACL: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/> +; PNACL: <STRUCT_NAMED abbrevid=8 op0=0 op1=6 op2=3 op3=7 op4=3/> +; i16 (i16) +; PNACL: <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/> +; i16 (i16)* +; PNACL: <POINTER abbrevid=4 op0=9 op1=0/> +; PNACL: </TYPE_BLOCK_ID> |