aboutsummaryrefslogtreecommitdiff
path: root/tools/shared.py
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2012-11-20 20:50:20 +0100
committerAlon Zakai <alonzakai@gmail.com>2012-11-21 20:47:02 +0100
commit60a11cfaec3ad560cd7fcedf618001bd9c756b1c (patch)
treebd52dfb7481ebe89bb65b7f2a3bed579168d85a4 /tools/shared.py
parenta2b241e70b7f7606c913e916cffb69514d548ade (diff)
improve chunkify to consider previous chunkings when caching
Diffstat (limited to 'tools/shared.py')
-rw-r--r--tools/shared.py54
1 files changed, 51 insertions, 3 deletions
diff --git a/tools/shared.py b/tools/shared.py
index 02d92855..e8767e40 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1208,10 +1208,50 @@ class JCache:
# Given a set of functions of form (ident, text), and a preferred chunk size,
# generates a set of chunks for parallel processing and caching.
# It is very important to generate similar chunks in incremental builds, in
- # order to maximize the chance of cache hits.
+ # order to maximize the chance of cache hits. To achieve that, we save the
+ # chunking used in the previous compilation of this phase, and we try to
+ # generate the same chunks, barring big differences in function sizes that
+ # violate our chunk size guideline. If caching is not used, chunking_file
+ # should be None
@staticmethod
- def chunkify(funcs, chunk_size):
- chunks = [] # bundles of functions
+ def chunkify(funcs, chunk_size, chunking_file):
+ previous_mapping = None
+ if chunking_file:
+ chunking_file = JCache.get_cachename(chunking_file)
+ if os.path.exists(chunking_file):
+ try:
+ previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
+ except:
+ pass
+ chunks = []
+ if previous_mapping:
+ # initialize with previous chunking
+ news = []
+ for func in funcs:
+ ident, data = func
+ if not ident in previous_mapping:
+ news.append(func)
+ else:
+ n = previous_mapping[ident]
+ while n > len(chunks): chunks.append([])
+ chunks[n].append(func)
+ # add news and adjust for new sizes
+ spilled = news
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size > 1.5*chunk_size and len(chunk) > 0:
+ spill = chunk.pop()
+ spilled.append(spill)
+ size -= len(spill[1])
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size < 0.66*chunk_size and len(spilled) > 0:
+ spill = spilled.pop()
+ chunk.append(spill)
+ size += len(spill[1])
+ chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+ funcs = spilled # we will allocate these into chunks as if they were normal inputs
+ # initialize reasonably, the rest of the funcs we need to split out
curr = []
for i in range(len(funcs)):
func = funcs[i]
@@ -1223,6 +1263,14 @@ class JCache:
if curr:
chunks.append(curr)
curr = None
+ if chunking_file:
+ # save new mapping info
+ new_mapping = {}
+ for i in range(len(chunks)):
+ chunk = chunks[i]
+ for ident, data in chunk:
+ new_mapping[ident] = i
+ cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
return chunks
class JS: