aboutsummaryrefslogtreecommitdiff
path: root/lib/Bitcode
diff options
context:
space:
mode:
authorKarl Schimpf <kschimpf@google.com>2013-05-20 16:05:30 -0700
committerKarl Schimpf <kschimpf@google.com>2013-05-20 16:05:30 -0700
commit1046c286a37c082562a348eb6521c71ca0b3ff75 (patch)
tree52af4ee1382d692f8bc2c07dc59b772100ee7fa7 /lib/Bitcode
parentdf0f355cb3e96368db367b9edb9bb851cffacbcc (diff)
Create type IDs based on reference counts.
Create type IDs based on number of references, rather than first reached. This is done so that fewer bits are used to encode types that are commonly used. Note that this cuts the size of the generate bitcode file by about 1.5%, with no effect on the reader, since it only changes the order type ID's are created. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3405 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/14495008
Diffstat (limited to 'lib/Bitcode')
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp20
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp89
-rw-r--r--lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h12
3 files changed, 109 insertions, 12 deletions
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
index 1d7d8dc23c..2e318020ad 100644
--- a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
@@ -233,12 +233,22 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
4 /*count from # abbrevs */);
SmallVector<uint64_t, 64> TypeVals;
+ // Note: modify to use maximum number of bits if under cutoff. Otherwise,
+ // use VBR to take advantage that frequently referenced types have
+ // small IDs.
+ //
+ // Note: Cutoff chosen based on experiments on pnacl-translate.pexe.
uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+ static const uint64_t TypeVBRCutoff = 6;
+ uint64_t TypeIdNumBits = (NumBits <= TypeVBRCutoff ? NumBits : TypeVBRCutoff);
+ NaClBitCodeAbbrevOp::Encoding TypeIdEncoding =
+ (NumBits <= TypeVBRCutoff
+ ? NaClBitCodeAbbrevOp::Fixed : NaClBitCodeAbbrevOp::VBR);
// Abbrev for TYPE_CODE_POINTER.
NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_POINTER));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
Abbv->Add(NaClBitCodeAbbrevOp(0)); // Addrspace = 0
unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
@@ -247,7 +257,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // isvararg
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
@@ -256,7 +266,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
@@ -272,7 +282,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
@@ -280,7 +290,7 @@ static void WriteTypeTable(const NaClValueEnumerator &VE,
Abbv = new NaClBitCodeAbbrev();
Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_ARRAY));
Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8)); // size
- Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
index 4ac34daa96..e5554b206a 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -23,6 +23,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <set>
+
using namespace llvm;
static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
@@ -31,6 +33,15 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
/// NaClValueEnumerator - Enumerate module-level information.
NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
+ // Create map for counting frequency of types, and set field
+ // TypeCountMap accordingly. Note: Pointer field TypeCountMap is
+ // used to deal with the fact that types are added through various
+ // method calls in this routine. Rather than pass it as an argument,
+ // we use a field. The field is a pointer so that the memory
+ // footprint of count_map can be garbage collected when this
+ // constructor completes.
+ TypeCountMapType count_map;
+ TypeCountMap = &count_map;
// Enumerate the global variables.
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
@@ -61,7 +72,7 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
I != E; ++I)
EnumerateValue(I->getAliasee());
- // Insert constants and metadata that are named at module level into the slot
+ // Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
EnumerateNamedMetadata(M);
@@ -106,10 +117,47 @@ NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
}
}
+ // Optimized type indicies to put "common" expected types in with small
+ // indices.
+ OptimizeTypes(M);
+ TypeCountMap = NULL;
+
// Optimize constant ordering.
OptimizeConstants(FirstConstant, Values.size());
}
+void NaClValueEnumerator::OptimizeTypes(const Module *M) {
+
+ // Sort types by count, so that we can index them based on
+ // frequency. Use indices of built TypeMap, so that order of
+ // construction is repeatable.
+ std::set<unsigned> type_counts;
+ typedef std::set<unsigned> TypeSetType;
+ std::map<unsigned, TypeSetType> usage_count_map;
+ TypeList IdType(Types);
+
+ for (TypeCountMapType::iterator iter = TypeCountMap->begin();
+ iter != TypeCountMap->end(); ++ iter) {
+ type_counts.insert(iter->second);
+ usage_count_map[iter->second].insert(TypeMap[iter->first]-1);
+ }
+
+ // Reset type tracking maps, so that we can re-enter based
+ // on fequency ordering.
+ TypeCountMap = NULL;
+ Types.clear();
+ TypeMap.clear();
+
+ // Reinsert types, based on frequency.
+ for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();
+ count_iter != type_counts.rend(); ++count_iter) {
+ TypeSetType& count_types = usage_count_map[*count_iter];
+ for (TypeSetType::iterator type_iter = count_types.begin();
+ type_iter != count_types.end(); ++type_iter)
+ EnumerateType((IdType[*type_iter]), true);
+ }
+}
+
unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -351,9 +399,26 @@ void NaClValueEnumerator::EnumerateValue(const Value *V) {
}
-void NaClValueEnumerator::EnumerateType(Type *Ty) {
+void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) {
+ // This function is used to enumerate types referenced by the given
+ // module. This function is called in two phases, based on the value
+ // of TypeCountMap. These phases are:
+ //
+ // (1) In this phase, InsideOptimizeTypes=false. We are collecting types
+ // and all corresponding (implicitly) referenced types. In addition,
+ // we are keeping track of the number of references to each type in
+ // TypeCountMap. These reference counts will be used by method
+ // OptimizeTypes to associate the smallest type ID's with the most
+ // referenced types.
+ //
+ // (2) In this phase, InsideOptimizeTypes=true. We are registering types
+ // based on frequency. To minimize type IDs for frequently used
+ // types, (unlike the other context) we are inserting the minimal
+ // (implicitly) referenced types needed for each type.
unsigned *TypeID = &TypeMap[Ty];
+ if (TypeCountMap) ++((*TypeCountMap)[Ty]);
+
// We've already seen this type.
if (*TypeID)
return;
@@ -365,11 +430,24 @@ void NaClValueEnumerator::EnumerateType(Type *Ty) {
if (!STy->isLiteral())
*TypeID = ~0U;
+ // If in the second phase (i.e. inside optimize types), don't expand
+ // pointers to structures, since we can just generate a forward
+ // reference to it. This way, we don't use up unnecessary (small) ID
+ // values just to define the pointer.
+ bool EnumerateSubtypes = true;
+ if (InsideOptimizeTypes)
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(PTy->getElementType()))
+ if (!STy->isLiteral())
+ EnumerateSubtypes = false;
+
// Enumerate all of the subtypes before we enumerate this type. This ensures
// that the type will be enumerated in an order that can be directly built.
- for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
- I != E; ++I)
- EnumerateType(*I);
+ if (EnumerateSubtypes) {
+ for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+ I != E; ++I)
+ EnumerateType(*I, InsideOptimizeTypes);
+ }
// Refresh the TypeID pointer in case the table rehashed.
TypeID = &TypeMap[Ty];
@@ -538,4 +616,3 @@ unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const
IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
return getGlobalBasicBlockID(BB);
}
-
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
index 9e9954883a..0239c34ddc 100644
--- a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
@@ -42,8 +42,17 @@ public:
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
private:
+ // Defines unique ID's for each type.
typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;
+ // Defines the number of references to each type. If defined,
+ // we are in the first pass of collecting types, and reference counts
+ // should be added to the map. If undefined, we are in the second pass
+ // that actually assigns type IDs, based on frequency counts found in
+ // the first pass.
+ typedef TypeMapType TypeCountMapType;
+ TypeCountMapType* TypeCountMap;
+
TypeList Types;
typedef DenseMap<const Value*, unsigned> ValueMapType;
@@ -152,6 +161,7 @@ public:
void purgeFunction();
private:
+ void OptimizeTypes(const Module *M);
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
void EnumerateMDNodeOperands(const MDNode *N);
@@ -159,7 +169,7 @@ private:
void EnumerateFunctionLocalMetadata(const MDNode *N);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
- void EnumerateType(Type *T);
+ void EnumerateType(Type *T, bool InsideOptimizeTypes=false);
void EnumerateOperandType(const Value *V);
void EnumerateAttributes(AttributeSet PAL);