diff options
author | Chad Austin <chad@imvu.com> | 2013-01-30 17:50:33 -0800 |
---|---|---|
committer | Chad Austin <chad@imvu.com> | 2013-03-04 19:31:47 -0800 |
commit | c919d824ac44031f00f925b5c44e86ac6c393292 (patch) | |
tree | b3517abaeb7eb6aa4f55e9a2a0d89f0fea9e2023 | |
parent | a804dfb628279243de1e82504b99d386eb2d2a93 (diff) |
Split Cache and JCache into objects and move them into cache.py so they can be loaded without shared.py
-rwxr-xr-x | emscripten.py | 1 | ||||
-rw-r--r-- | tools/cache.py | 206 | ||||
-rw-r--r-- | tools/shared.py | 225 |
3 files changed, 212 insertions, 220 deletions
diff --git a/emscripten.py b/emscripten.py index 869e512d..4f3f2a77 100755 --- a/emscripten.py +++ b/emscripten.py @@ -13,6 +13,7 @@ import os, sys, json, optparse, subprocess, re, time, multiprocessing, functools from tools import shared from tools import jsrun +from tools import cache __rootpath__ = os.path.abspath(os.path.dirname(__file__)) def path_from_root(*pathelems): diff --git a/tools/cache.py b/tools/cache.py new file mode 100644 index 00000000..78a11ba0 --- /dev/null +++ b/tools/cache.py @@ -0,0 +1,206 @@ +import os.path, shutil, hashlib, cPickle + +# Permanent cache for dlmalloc and stdlibc++ +class Cache: + def __init__(self, dirname=None): + if dirname is None: + dirname = os.environ.get('EM_CACHE') + if not dirname: + dirname = os.path.expanduser(os.path.join('~', '.emscripten_cache')) + self.dirname = dirname + + def ensure(self): + if not os.path.exists(self.dirname): + os.makedirs(self.dirname) + + def erase(self): + shutil.rmtree(self.dirname, ignore_errors=True) + + def get_path(self, shortname): + return os.path.join(self.dirname, shortname) + + # Request a cached file. If it isn't in the cache, it will be created with + # the given creator function + def get(self, shortname, creator, extension='.bc'): + if not shortname.endswith(extension): shortname += extension + cachename = os.path.join(self.dirname, shortname) + if os.path.exists(cachename): + return cachename + self.ensure() + shutil.copyfile(creator(), cachename) + return cachename + +# JS-specific cache. We cache the results of compilation and optimization, +# so that in incremental builds we can just load from cache. +# We cache reasonably-large-sized chunks +class JCache: + def __init__(self, cache): + self.cache = cache + self.dirname = os.path.join(cache.dirname, 'jcache') + + def ensure(self): + self.cache.ensure() + if not os.path.exists(self.dirname): + os.makedirs(self.dirname) + + def get_shortkey(self, keys): + if type(keys) not in [list, tuple]: + keys = [keys] + ret = '' + for key in keys: + assert type(key) == str + ret += hashlib.md5(key).hexdigest() + return ret + + def get_cachename(self, shortkey): + return os.path.join(self.dirname, shortkey) + + # Returns a cached value, if it exists. Make sure the full key matches + def get(self, shortkey, keys): + #if DEBUG: print >> sys.stderr, 'jcache get?', shortkey + cachename = self.get_cachename(shortkey) + if not os.path.exists(cachename): + #if DEBUG: print >> sys.stderr, 'jcache none at all' + return + data = cPickle.Unpickler(open(cachename, 'rb')).load() + if len(data) != 2: + #if DEBUG: print >> sys.stderr, 'jcache error in get' + return + oldkeys = data[0] + if len(oldkeys) != len(keys): + #if DEBUG: print >> sys.stderr, 'jcache collision (a)' + return + for i in range(len(oldkeys)): + if oldkeys[i] != keys[i]: + #if DEBUG: print >> sys.stderr, 'jcache collision (b)' + return + #if DEBUG: print >> sys.stderr, 'jcache win' + return data[1] + + # Sets the cached value for a key (from get_key) + def set(self, shortkey, keys, value): + cachename = self.get_cachename(shortkey) + cPickle.Pickler(open(cachename, 'wb')).dump([keys, value]) + #if DEBUG: + # for i in range(len(keys)): + # open(cachename + '.key' + str(i), 'w').write(keys[i]) + # open(cachename + '.value', 'w').write(value) + +# Given a set of functions of form (ident, text), and a preferred chunk size, +# generates a set of chunks for parallel processing and caching. +# It is very important to generate similar chunks in incremental builds, in +# order to maximize the chance of cache hits. To achieve that, we save the +# chunking used in the previous compilation of this phase, and we try to +# generate the same chunks, barring big differences in function sizes that +# violate our chunk size guideline. If caching is not used, chunking_file +# should be None +@classmethod +def chunkify(funcs, chunk_size, chunking_file): + previous_mapping = None + if chunking_file: + if os.path.exists(chunking_file): + try: + previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in + if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file) + except Exception, e: + print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e + else: + print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file + chunks = [] + if previous_mapping: + # initialize with previous chunking + news = [] + for func in funcs: + ident, data = func + assert ident, 'need names for jcache chunking' + if not ident in previous_mapping: + news.append(func) + else: + n = previous_mapping[ident] + while n >= len(chunks): chunks.append([]) + chunks[n].append(func) + if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news) + # add news and adjust for new sizes + spilled = news + for i in range(len(chunks)): + chunk = chunks[i] + size = sum([len(func[1]) for func in chunk]) + #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size + while size > 1.5*chunk_size and len(chunk) > 1: + spill = chunk.pop() + spilled.append(spill) + size -= len(spill[1]) + #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled) + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size < 0.66*chunk_size and len(spilled) > 0: + spill = spilled.pop() + chunk.append(spill) + size += len(spill[1]) + chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them + funcs = spilled # we will allocate these into chunks as if they were normal inputs + #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled) + # initialize reasonably, the rest of the funcs we need to split out + curr = [] + total_size = 0 + for i in range(len(funcs)): + func = funcs[i] + curr_size = len(func[1]) + if total_size + curr_size < chunk_size: + curr.append(func) + total_size += curr_size + else: + n = previous_mapping[ident] + while n >= len(chunks): chunks.append([]) + chunks[n].append(func) + # add news and adjust for new sizes + spilled = news + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size > 1.5*chunk_size and len(chunk) > 0: + spill = chunk.pop() + spilled.append(spill) + size -= len(spill[1]) + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size < 0.66*chunk_size and len(spilled) > 0: + spill = spilled.pop() + chunk.append(spill) + size += len(spill[1]) + chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them + funcs = spilled # we will allocate these into chunks as if they were normal inputs +# initialize reasonably, the rest of the funcs we need to split out +curr = [] +total_size = 0 +for i in range(len(funcs)): + func = funcs[i] + curr_size = len(func[1]) + if total_size + curr_size < chunk_size: + curr.append(func) + total_size += curr_size + else: + chunks.append(curr) + curr = None + if chunking_file: + # sort within each chunk, to keep the order identical + for chunk in chunks: + chunk.sort(key=lambda func: func[0]) + # save new mapping info + new_mapping = {} + for i in range(len(chunks)): + chunk = chunks[i] + for ident, data in chunk: + assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking' + new_mapping[ident] = i + cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping) + if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file) + #if DEBUG: + # for i in range(len(chunks)): + # chunk = chunks[i] + # print >> sys.stderr, 'final chunk', i, len(chunk) + # print >> sys.stderr, 'new mapping:', new_mapping + # if previous_mapping: + # for ident in set(previous_mapping.keys() + new_mapping.keys()): + # if previous_mapping.get(ident) != new_mapping.get(ident): + # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident) + return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names diff --git a/tools/shared.py b/tools/shared.py index 35f45728..f1d133d7 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1,7 +1,7 @@ import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, zlib, re from subprocess import Popen, PIPE, STDOUT from tempfile import mkstemp -from . import jsrun +from . import jsrun, cache def listify(x): if type(x) is not list: return [x] @@ -1236,225 +1236,10 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e open(outfile, 'w').write(src) return outfile -# Permanent cache for dlmalloc and stdlibc++ -class Cache: - dirname = os.environ.get('EM_CACHE') - if not dirname: - dirname = os.path.expanduser(os.path.join('~', '.emscripten_cache')) - - @classmethod - def ensure(self): - if not os.path.exists(self.dirname): - os.makedirs(self.dirname) - - @classmethod - def erase(self): - shutil.rmtree(self.dirname, ignore_errors=True) - - @classmethod - def get_path(self, shortname): - return os.path.join(self.dirname, shortname) - - # Request a cached file. If it isn't in the cache, it will be created with - # the given creator function - @classmethod - def get(self, shortname, creator, extension='.bc'): - if not shortname.endswith(extension): shortname += extension - cachename = os.path.join(self.dirname, shortname) - if os.path.exists(cachename): - return cachename - Cache.ensure() - shutil.copyfile(creator(), cachename) - return cachename - -# JS-specific cache. We cache the results of compilation and optimization, -# so that in incremental builds we can just load from cache. -# We cache reasonably-large-sized chunks -class JCache: - dirname = os.path.join(Cache.dirname, 'jcache') - - @classmethod - def ensure(self): - Cache.ensure() - if not os.path.exists(self.dirname): - os.makedirs(self.dirname) - - @staticmethod - def get_shortkey(keys): - if type(keys) not in [list, tuple]: - keys = [keys] - ret = '' - for key in keys: - assert type(key) == str - ret += hashlib.md5(key).hexdigest() - return ret - - @classmethod - def get_cachename(self, shortkey): - return os.path.join(self.dirname, shortkey) - - # Returns a cached value, if it exists. Make sure the full key matches - @classmethod - def get(self, shortkey, keys): - if DEBUG_CACHE: print >> sys.stderr, 'jcache get?', shortkey - cachename = self.get_cachename(shortkey) - if not os.path.exists(cachename): - if DEBUG_CACHE: print >> sys.stderr, 'jcache none at all' - return - try: - data = cPickle.loads(zlib.decompress(open(cachename).read())) - except Exception, e: - if DEBUG_CACHE: print >> sys.stderr, 'jcache decompress/unpickle error:', e - return - if len(data) != 2: - if DEBUG_CACHE: print >> sys.stderr, 'jcache error in get' - return - oldkeys = data[0] - if len(oldkeys) != len(keys): - if DEBUG_CACHE: print >> sys.stderr, 'jcache collision (a)' - return - for i in range(len(oldkeys)): - if oldkeys[i] != keys[i]: - if DEBUG_CACHE: print >> sys.stderr, 'jcache collision (b)' - return - if DEBUG_CACHE: print >> sys.stderr, 'jcache win' - return data[1] - - # Sets the cached value for a key (from get_key) - @classmethod - def set(self, shortkey, keys, value): - if DEBUG_CACHE: print >> sys.stderr, 'save to cache', shortkey - cachename = self.get_cachename(shortkey) - try: - f = open(cachename, 'w') - f.write(zlib.compress(cPickle.dumps([keys, value]))) - f.close() - except Exception, e: - if DEBUG_CACHE: print >> sys.stderr, 'jcache compress/pickle error:', e - return - #if DEBUG: - # for i in range(len(keys)): - # open(cachename + '.key' + str(i), 'w').write(keys[i]) - # open(cachename + '.value', 'w').write(value) - -# Given a set of functions of form (ident, text), and a preferred chunk size, -# generates a set of chunks for parallel processing and caching. -# It is very important to generate similar chunks in incremental builds, in -# order to maximize the chance of cache hits. To achieve that, we save the -# chunking used in the previous compilation of this phase, and we try to -# generate the same chunks, barring big differences in function sizes that -# violate our chunk size guideline. If caching is not used, chunking_file -# should be None -@classmethod -def chunkify(funcs, chunk_size, chunking_file): - previous_mapping = None - if chunking_file: - if os.path.exists(chunking_file): - try: - previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in - if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file) - except Exception, e: - print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e - else: - print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file - chunks = [] - if previous_mapping: - # initialize with previous chunking - news = [] - for func in funcs: - ident, data = func - assert ident, 'need names for jcache chunking' - if not ident in previous_mapping: - news.append(func) - else: - n = previous_mapping[ident] - while n >= len(chunks): chunks.append([]) - chunks[n].append(func) - if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news) - # add news and adjust for new sizes - spilled = news - for i in range(len(chunks)): - chunk = chunks[i] - size = sum([len(func[1]) for func in chunk]) - #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size - while size > 1.5*chunk_size and len(chunk) > 1: - spill = chunk.pop() - spilled.append(spill) - size -= len(spill[1]) - #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled) - for chunk in chunks: - size = sum([len(func[1]) for func in chunk]) - while size < 0.66*chunk_size and len(spilled) > 0: - spill = spilled.pop() - chunk.append(spill) - size += len(spill[1]) - chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them - funcs = spilled # we will allocate these into chunks as if they were normal inputs - #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled) - # initialize reasonably, the rest of the funcs we need to split out - curr = [] - total_size = 0 - for i in range(len(funcs)): - func = funcs[i] - curr_size = len(func[1]) - if total_size + curr_size < chunk_size: - curr.append(func) - total_size += curr_size - else: - n = previous_mapping[ident] - while n >= len(chunks): chunks.append([]) - chunks[n].append(func) - # add news and adjust for new sizes - spilled = news - for chunk in chunks: - size = sum([len(func[1]) for func in chunk]) - while size > 1.5*chunk_size and len(chunk) > 0: - spill = chunk.pop() - spilled.append(spill) - size -= len(spill[1]) - for chunk in chunks: - size = sum([len(func[1]) for func in chunk]) - while size < 0.66*chunk_size and len(spilled) > 0: - spill = spilled.pop() - chunk.append(spill) - size += len(spill[1]) - chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them - funcs = spilled # we will allocate these into chunks as if they were normal inputs -# initialize reasonably, the rest of the funcs we need to split out -curr = [] -total_size = 0 -for i in range(len(funcs)): - func = funcs[i] - curr_size = len(func[1]) - if total_size + curr_size < chunk_size: - curr.append(func) - total_size += curr_size - else: - chunks.append(curr) - curr = None - if chunking_file: - # sort within each chunk, to keep the order identical - for chunk in chunks: - chunk.sort(key=lambda func: func[0]) - # save new mapping info - new_mapping = {} - for i in range(len(chunks)): - chunk = chunks[i] - for ident, data in chunk: - assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking' - new_mapping[ident] = i - cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping) - if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file) - #if DEBUG: - # for i in range(len(chunks)): - # chunk = chunks[i] - # print >> sys.stderr, 'final chunk', i, len(chunk) - # print >> sys.stderr, 'new mapping:', new_mapping - # if previous_mapping: - # for ident in set(previous_mapping.keys() + new_mapping.keys()): - # if previous_mapping.get(ident) != new_mapping.get(ident): - # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident) - return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names +# compatibility with existing emcc, etc. scripts +Cache = cache.Cache() +JCache = cache.JCache(Cache) +chunkify = cache.chunkify class JS: @staticmethod |