diff options
author | Chad Austin <chad@imvu.com> | 2013-01-30 17:16:12 -0800 |
---|---|---|
committer | Chad Austin <chad@imvu.com> | 2013-03-04 19:27:47 -0800 |
commit | a804dfb628279243de1e82504b99d386eb2d2a93 (patch) | |
tree | 8bec244c77c941e16b7f2fe7eb5dc79a6b14e851 /tools | |
parent | 8e63a929d4fe38d8929bc383a433199cba00555d (diff) |
Make chunkify a free function
Diffstat (limited to 'tools')
-rw-r--r-- | tools/js_optimizer.py | 2 | ||||
-rw-r--r-- | tools/shared.py | 220 |
2 files changed, 119 insertions, 103 deletions
diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index 533242b2..f2d3b7db 100644 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -106,7 +106,7 @@ def run_on_js(filename, passes, js_engine, jcache): intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) - chunks = shared.JCache.chunkify(funcs, chunk_size, 'jsopt' if jcache else None) + chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks diff --git a/tools/shared.py b/tools/shared.py index 5383979c..35f45728 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1249,15 +1249,7 @@ class Cache: @classmethod def erase(self): - try: - shutil.rmtree(self.dirname) - except: - pass - try_delete(RELOOPER) - try: - open(Cache.dirname + '__last_clear', 'w').write('last clear: ' + time.asctime() + '\n') - except: - print >> sys.stderr, 'failed to save last clear time' + shutil.rmtree(self.dirname, ignore_errors=True) @classmethod def get_path(self, shortname): @@ -1345,100 +1337,124 @@ class JCache: # open(cachename + '.key' + str(i), 'w').write(keys[i]) # open(cachename + '.value', 'w').write(value) - # Given a set of functions of form (ident, text), and a preferred chunk size, - # generates a set of chunks for parallel processing and caching. - # It is very important to generate similar chunks in incremental builds, in - # order to maximize the chance of cache hits. To achieve that, we save the - # chunking used in the previous compilation of this phase, and we try to - # generate the same chunks, barring big differences in function sizes that - # violate our chunk size guideline. If caching is not used, chunking_file - # should be None - @classmethod - def chunkify(self, funcs, chunk_size, chunking_file): - previous_mapping = None - if chunking_file: - chunking_file = self.get_cachename(chunking_file) - if os.path.exists(chunking_file): - try: - previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in - if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file) - except Exception, e: - print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e - else: - print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file - chunks = [] - if previous_mapping: - # initialize with previous chunking - news = [] - for func in funcs: - ident, data = func - assert ident, 'need names for jcache chunking' - if not ident in previous_mapping: - news.append(func) - else: - n = previous_mapping[ident] - while n >= len(chunks): chunks.append([]) - chunks[n].append(func) - if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news) - # add news and adjust for new sizes - spilled = news - for i in range(len(chunks)): - chunk = chunks[i] - size = sum([len(func[1]) for func in chunk]) - #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size - while size > 1.5*chunk_size and len(chunk) > 1: - spill = chunk.pop() - spilled.append(spill) - size -= len(spill[1]) - #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled) - for chunk in chunks: - size = sum([len(func[1]) for func in chunk]) - while size < 0.66*chunk_size and len(spilled) > 0: - spill = spilled.pop() - chunk.append(spill) - size += len(spill[1]) - chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them - funcs = spilled # we will allocate these into chunks as if they were normal inputs - #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled) - # initialize reasonably, the rest of the funcs we need to split out - curr = [] - total_size = 0 - for i in range(len(funcs)): - func = funcs[i] - curr_size = len(func[1]) - if total_size + curr_size < chunk_size: - curr.append(func) - total_size += curr_size +# Given a set of functions of form (ident, text), and a preferred chunk size, +# generates a set of chunks for parallel processing and caching. +# It is very important to generate similar chunks in incremental builds, in +# order to maximize the chance of cache hits. To achieve that, we save the +# chunking used in the previous compilation of this phase, and we try to +# generate the same chunks, barring big differences in function sizes that +# violate our chunk size guideline. If caching is not used, chunking_file +# should be None +@classmethod +def chunkify(funcs, chunk_size, chunking_file): + previous_mapping = None + if chunking_file: + if os.path.exists(chunking_file): + try: + previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in + if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file) + except Exception, e: + print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e + else: + print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file + chunks = [] + if previous_mapping: + # initialize with previous chunking + news = [] + for func in funcs: + ident, data = func + assert ident, 'need names for jcache chunking' + if not ident in previous_mapping: + news.append(func) else: - chunks.append(curr) - curr = [func] - total_size = curr_size - if curr: - chunks.append(curr) - curr = None - if chunking_file: - # sort within each chunk, to keep the order identical - for chunk in chunks: - chunk.sort(key=lambda func: func[0]) - # save new mapping info - new_mapping = {} - for i in range(len(chunks)): - chunk = chunks[i] - for ident, data in chunk: - assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking' - new_mapping[ident] = i - cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping) - if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file) - #if DEBUG: - # for i in range(len(chunks)): - # chunk = chunks[i] - # print >> sys.stderr, 'final chunk', i, len(chunk) - # print >> sys.stderr, 'new mapping:', new_mapping - # if previous_mapping: - # for ident in set(previous_mapping.keys() + new_mapping.keys()): - # if previous_mapping.get(ident) != new_mapping.get(ident): - # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident) - return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names + n = previous_mapping[ident] + while n >= len(chunks): chunks.append([]) + chunks[n].append(func) + if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news) + # add news and adjust for new sizes + spilled = news + for i in range(len(chunks)): + chunk = chunks[i] + size = sum([len(func[1]) for func in chunk]) + #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size + while size > 1.5*chunk_size and len(chunk) > 1: + spill = chunk.pop() + spilled.append(spill) + size -= len(spill[1]) + #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled) + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size < 0.66*chunk_size and len(spilled) > 0: + spill = spilled.pop() + chunk.append(spill) + size += len(spill[1]) + chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them + funcs = spilled # we will allocate these into chunks as if they were normal inputs + #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled) + # initialize reasonably, the rest of the funcs we need to split out + curr = [] + total_size = 0 + for i in range(len(funcs)): + func = funcs[i] + curr_size = len(func[1]) + if total_size + curr_size < chunk_size: + curr.append(func) + total_size += curr_size + else: + n = previous_mapping[ident] + while n >= len(chunks): chunks.append([]) + chunks[n].append(func) + # add news and adjust for new sizes + spilled = news + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size > 1.5*chunk_size and len(chunk) > 0: + spill = chunk.pop() + spilled.append(spill) + size -= len(spill[1]) + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size < 0.66*chunk_size and len(spilled) > 0: + spill = spilled.pop() + chunk.append(spill) + size += len(spill[1]) + chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them + funcs = spilled # we will allocate these into chunks as if they were normal inputs +# initialize reasonably, the rest of the funcs we need to split out +curr = [] +total_size = 0 +for i in range(len(funcs)): + func = funcs[i] + curr_size = len(func[1]) + if total_size + curr_size < chunk_size: + curr.append(func) + total_size += curr_size + else: + chunks.append(curr) + curr = None + if chunking_file: + # sort within each chunk, to keep the order identical + for chunk in chunks: + chunk.sort(key=lambda func: func[0]) + # save new mapping info + new_mapping = {} + for i in range(len(chunks)): + chunk = chunks[i] + for ident, data in chunk: + assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking' + new_mapping[ident] = i + cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping) + if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file) + #if DEBUG: + # for i in range(len(chunks)): + # chunk = chunks[i] + # print >> sys.stderr, 'final chunk', i, len(chunk) + # print >> sys.stderr, 'new mapping:', new_mapping + # if previous_mapping: + # for ident in set(previous_mapping.keys() + new_mapping.keys()): + # if previous_mapping.get(ident) != new_mapping.get(ident): + # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident) + return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names class JS: @staticmethod |