aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Schimpf <kschimpf@google.com>2013-05-20 16:05:30 -0700
committerKarl Schimpf <kschimpf@google.com>2013-05-20 16:05:30 -0700
commit1046c286a37c082562a348eb6521c71ca0b3ff75 (patch)
tree52af4ee1382d692f8bc2c07dc59b772100ee7fa7
parentdf0f355cb3e96368db367b9edb9bb851cffacbcc (diff)
Create type IDs based on reference counts.
Create type IDs based on number of references, rather than first reached. This is done so that fewer bits are used to encode types that are commonly used. Note that this cuts the size of the generate bitcode file by about 1.5%, with no effect on the reader, since it only changes the order type ID's are created. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/14495008
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp20
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp89
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h12
-rw-r--r--test/NaCl/Bitcode/lit.local.cfg1
-rw-r--r--test/NaCl/Bitcode/struct-types.ll78
5 files changed, 188 insertions, 12 deletions
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
index 1d7d8dc23c..2e318020ad 100644
--- a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
@@ -233,12 +233,22 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
4 /*count from # abbrevs */);
SmallVector<uint64_t, 64> TypeVals;
+ // Note: modify to use maximum number of bits if under cutoff. Otherwise,
+ // use VBR to take advantage that frequently referenced types have
+ // small IDs.
+ //
+ // Note: Cutoff chosen based on experiments on pnacl-translate.pexe.
uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+ static const uint64_t TypeVBRCutoff = 6;
+ uint64_t TypeIdNumBits = (NumBits <= TypeVBRCutoff ? NumBits : TypeVBRCutoff);
+ NaClBitCodeAbbrevOp::Encoding TypeIdEncoding =
+ (NumBits <= TypeVBRCutoff
+ ? NaClBitCodeAbbrevOp::Fixed : NaClBitCodeAbbrevOp::VBR);
// Abbrev for TYPE_CODE_POINTER.
NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_POINTER));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
Abbv->Add(NaClBitCodeAbbrevOp(0)); // Addrspace = 0
unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
@@ -247,7 +257,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // isvararg
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
@@ -256,7 +266,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
@@ -272,7 +282,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
@@ -280,7 +290,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv = new NaClBitCodeAbbrev();
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_ARRAY));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8)); // size
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index 4ac34daa96..e5554b206a 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -23,6 +23,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <set>
+
using namespace llvm;
static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
@@ -31,6 +33,15 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
/// NaClValueEnumerator - Enumerate module-level information.
NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
+ // Create map for counting frequency of types, and set field
+ // TypeCountMap accordingly. Note: Pointer field TypeCountMap is
+ // used to deal with the fact that types are added through various
+ // method calls in this routine. Rather than pass it as an argument,
+ // we use a field. The field is a pointer so that the memory
+ // footprint of count_map can be garbage collected when this
+ // constructor completes.
+ TypeCountMapType count_map;
+ TypeCountMap = &count_map;
// Enumerate the global variables.
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
@@ -61,7 +72,7 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
I != E; ++I)
EnumerateValue(I->getAliasee());
- // Insert constants and metadata that are named at module level into the slot
+ // Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
EnumerateNamedMetadata(M);
@@ -106,10 +117,47 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
}
}
+ // Optimized type indicies to put "common" expected types in with small
+ // indices.
+ OptimizeTypes(M);
+ TypeCountMap = NULL;
+
// Optimize constant ordering.
OptimizeConstants(FirstConstant, Values.size());
}
+void NaClValueEnumerator::OptimizeTypes(const Module *M) {
+
+ // Sort types by count, so that we can index them based on
+ // frequency. Use indices of built TypeMap, so that order of
+ // construction is repeatable.
+ std::set<unsigned> type_counts;
+ typedef std::set<unsigned> TypeSetType;
+ std::map<unsigned, TypeSetType> usage_count_map;
+ TypeList IdType(Types);
+
+ for (TypeCountMapType::iterator iter = TypeCountMap->begin();
+ iter != TypeCountMap->end(); ++ iter) {
+ type_counts.insert(iter->second);
+ usage_count_map[iter->second].insert(TypeMap[iter->first]-1);
+ }
+
+ // Reset type tracking maps, so that we can re-enter based
+ // on fequency ordering.
+ TypeCountMap = NULL;
+ Types.clear();
+ TypeMap.clear();
+
+ // Reinsert types, based on frequency.
+ for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();
+ count_iter != type_counts.rend(); ++count_iter) {
+ TypeSetType& count_types = usage_count_map[*count_iter];
+ for (TypeSetType::iterator type_iter = count_types.begin();
+ type_iter != count_types.end(); ++type_iter)
+ EnumerateType((IdType[*type_iter]), true);
+ }
+}
+
unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -351,9 +399,26 @@ void NaClValueEnumerator::EnumerateValue(const Value *V) {
}
-void NaClValueEnumerator::EnumerateType(Type *Ty) {
+void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) {
+ // This function is used to enumerate types referenced by the given
+ // module. This function is called in two phases, based on the value
+ // of TypeCountMap. These phases are:
+ //
+ // (1) In this phase, InsideOptimizeTypes=false. We are collecting types
+ // and all corresponding (implicitly) referenced types. In addition,
+ // we are keeping track of the number of references to each type in
+ // TypeCountMap. These reference counts will be used by method
+ // OptimizeTypes to associate the smallest type ID's with the most
+ // referenced types.
+ //
+ // (2) In this phase, InsideOptimizeTypes=true. We are registering types
+ // based on frequency. To minimize type IDs for frequently used
+ // types, (unlike the other context) we are inserting the minimal
+ // (implicitly) referenced types needed for each type.
unsigned *TypeID = &TypeMap[Ty];
+ if (TypeCountMap) ++((*TypeCountMap)[Ty]);
+
// We've already seen this type.
if (*TypeID)
return;
@@ -365,11 +430,24 @@ void NaClValueEnumerator::EnumerateType(Type *Ty) {
if (!STy->isLiteral())
*TypeID = ~0U;
+ // If in the second phase (i.e. inside optimize types), don't expand
+ // pointers to structures, since we can just generate a forward
+ // reference to it. This way, we don't use up unnecessary (small) ID
+ // values just to define the pointer.
+ bool EnumerateSubtypes = true;
+ if (InsideOptimizeTypes)
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(PTy->getElementType()))
+ if (!STy->isLiteral())
+ EnumerateSubtypes = false;
+
// Enumerate all of the subtypes before we enumerate this type. This ensures
// that the type will be enumerated in an order that can be directly built.
- for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
- I != E; ++I)
- EnumerateType(*I);
+ if (EnumerateSubtypes) {
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ EnumerateType(*I, InsideOptimizeTypes);
+ }
// Refresh the TypeID pointer in case the table rehashed.
TypeID = &TypeMap[Ty];
@@ -538,4 +616,3 @@ unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const
IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
return getGlobalBasicBlockID(BB);
}
-
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
index 9e9954883a..0239c34ddc 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
@@ -42,8 +42,17 @@ public:
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
private:
+ // Defines unique ID's for each type.
typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;
+ // Defines the number of references to each type. If defined,
+ // we are in the first pass of collecting types, and reference counts
+ // should be added to the map. If undefined, we are in the second pass
+ // that actually assigns type IDs, based on frequency counts found in
+ // the first pass.
+ typedef TypeMapType TypeCountMapType;
+ TypeCountMapType* TypeCountMap;
+
TypeList Types;
typedef DenseMap<const Value*, unsigned> ValueMapType;
@@ -152,6 +161,7 @@ public:
void purgeFunction();
private:
+ void OptimizeTypes(const Module *M);
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
void EnumerateMDNodeOperands(const MDNode *N);
@@ -159,7 +169,7 @@ private:
void EnumerateFunctionLocalMetadata(const MDNode *N);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
- void EnumerateType(Type *T);
+ void EnumerateType(Type *T, bool InsideOptimizeTypes=false);
void EnumerateOperandType(const Value *V);
void EnumerateAttributes(AttributeSet PAL);
diff --git a/test/NaCl/Bitcode/lit.local.cfg b/test/NaCl/Bitcode/lit.local.cfg
new file mode 100644
index 0000000000..19eebc0ac7
--- /dev/null
+++ b/test/NaCl/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/NaCl/Bitcode/struct-types.ll b/test/NaCl/Bitcode/struct-types.ll
new file mode 100644
index 0000000000..a36e6bb0ab
--- /dev/null
+++ b/test/NaCl/Bitcode/struct-types.ll
@@ -0,0 +1,78 @@
+; Checks if llvm bitcode defines a struct type before the pointer type,
+; even if the struct definintion appears after the pointer type, while
+; pnacl bitcode moves the pointer before the struct.
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=LLVM
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump | FileCheck %s -check-prefix=PNACL
+
+%typeB = type { i8, %typeA, i32, %typeA }
+%typeA = type { i16 }
+
+define %typeB* @foo(%typeB* %a) {
+ ret %typeB* %a
+}
+
+define %typeB* @bar(%typeB* %b) {
+ ret %typeB* %b
+}
+
+define i16 @bam(i16 %a) {
+ ret i16 %a
+}
+
+; Show the ordering llvm uses to order types, which is to expand subtypes
+; (including accross pointers) before the type. Expands types for functions
+; in order: @foo, @bar, @bam.
+; LLVM: <TYPE_BLOCK_ID {{.*}}>
+; i8
+; LLVM: <INTEGER op0=8/>
+; i16
+; LLVM: <INTEGER op0=16/>
+; %typeA = type { i16 }
+; LLVM: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/>
+; LLVM: <STRUCT_NAMED abbrevid=8 op0=0 op1=1/>
+; i32
+; LLVM: <INTEGER op0=32/>
+; %typeB = type { i8, %typeA, i32, %typeA }
+; LLVM: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/>
+; LLVM: <STRUCT_NAMED abbrevid=8 op0=0 op1=0 op2=2 op3=3 op4=2/>
+; %typeB*
+; LLVM: <POINTER abbrevid=4 op0=4 op1=0/>
+; %typeB* (%typeB*)
+; LLVM: <FUNCTION abbrevid=5 op0=0 op1=5 op2=5/>
+; %typeB* (%typeB*)*
+; LLVM: <POINTER abbrevid=4 op0=6 op1=0/>
+; i16 (i16)
+; LLVM: <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/>
+; i16 (i16)*
+; LLVM: <POINTER abbrevid=4 op0=8 op1=0/>
+; type of instruction "RET"
+; LLVM: <VOID/>
+; LLVM: </TYPE_BLOCK_ID>
+
+; Show the ordering pnacl-freeze uses to order types.
+; PNACL: <TYPE_BLOCK_ID {{.*}}>
+; %typeB*
+; PNACL: <POINTER abbrevid=4 op0=8 op1=0/>
+; i16
+; PNACL: <INTEGER op0=16/>
+; type of instruction "RET"
+; PNACL: <VOID/>
+; %typeA = type { i16 }
+; PNACL: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/>
+; PNACL: <STRUCT_NAMED abbrevid=8 op0=0 op1=1/>
+; %typeB* (%typeB*)
+; PNACL: <FUNCTION abbrevid=5 op0=0 op1=0 op2=0/>
+; %typeB* (%typeB*)*
+; PNACL: <POINTER abbrevid=4 op0=4 op1=0/>
+; i8
+; PNACL: <INTEGER op0=8/>
+; i32
+; PNACL: <INTEGER op0=32/>
+; %typeB = type { i8, %typeA, i32, %typeA }
+; PNACL: <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/>
+; PNACL: <STRUCT_NAMED abbrevid=8 op0=0 op1=6 op2=3 op3=7 op4=3/>
+; i16 (i16)
+; PNACL: <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/>
+; i16 (i16)*
+; PNACL: <POINTER abbrevid=4 op0=9 op1=0/>
+; PNACL: </TYPE_BLOCK_ID>