aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Austin <chad@imvu.com>2013-01-30 17:50:33 -0800
committerChad Austin <chad@imvu.com>2013-03-04 19:31:47 -0800
commitc919d824ac44031f00f925b5c44e86ac6c393292 (patch)
treeb3517abaeb7eb6aa4f55e9a2a0d89f0fea9e2023
parenta804dfb628279243de1e82504b99d386eb2d2a93 (diff)
Split Cache and JCache into objects and move them into cache.py so they can be loaded without shared.py
-rwxr-xr-xemscripten.py1
-rw-r--r--tools/cache.py206
-rw-r--r--tools/shared.py225
3 files changed, 212 insertions, 220 deletions
diff --git a/emscripten.py b/emscripten.py
index 869e512d..4f3f2a77 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -13,6 +13,7 @@ import os, sys, json, optparse, subprocess, re, time, multiprocessing, functools
from tools import shared
from tools import jsrun
+from tools import cache
__rootpath__ = os.path.abspath(os.path.dirname(__file__))
def path_from_root(*pathelems):
diff --git a/tools/cache.py b/tools/cache.py
new file mode 100644
index 00000000..78a11ba0
--- /dev/null
+++ b/tools/cache.py
@@ -0,0 +1,206 @@
+import os.path, shutil, hashlib, cPickle
+
+# Permanent cache for dlmalloc and stdlibc++
+class Cache:
+ def __init__(self, dirname=None):
+ if dirname is None:
+ dirname = os.environ.get('EM_CACHE')
+ if not dirname:
+ dirname = os.path.expanduser(os.path.join('~', '.emscripten_cache'))
+ self.dirname = dirname
+
+ def ensure(self):
+ if not os.path.exists(self.dirname):
+ os.makedirs(self.dirname)
+
+ def erase(self):
+ shutil.rmtree(self.dirname, ignore_errors=True)
+
+ def get_path(self, shortname):
+ return os.path.join(self.dirname, shortname)
+
+ # Request a cached file. If it isn't in the cache, it will be created with
+ # the given creator function
+ def get(self, shortname, creator, extension='.bc'):
+ if not shortname.endswith(extension): shortname += extension
+ cachename = os.path.join(self.dirname, shortname)
+ if os.path.exists(cachename):
+ return cachename
+ self.ensure()
+ shutil.copyfile(creator(), cachename)
+ return cachename
+
+# JS-specific cache. We cache the results of compilation and optimization,
+# so that in incremental builds we can just load from cache.
+# We cache reasonably-large-sized chunks
+class JCache:
+ def __init__(self, cache):
+ self.cache = cache
+ self.dirname = os.path.join(cache.dirname, 'jcache')
+
+ def ensure(self):
+ self.cache.ensure()
+ if not os.path.exists(self.dirname):
+ os.makedirs(self.dirname)
+
+ def get_shortkey(self, keys):
+ if type(keys) not in [list, tuple]:
+ keys = [keys]
+ ret = ''
+ for key in keys:
+ assert type(key) == str
+ ret += hashlib.md5(key).hexdigest()
+ return ret
+
+ def get_cachename(self, shortkey):
+ return os.path.join(self.dirname, shortkey)
+
+ # Returns a cached value, if it exists. Make sure the full key matches
+ def get(self, shortkey, keys):
+ #if DEBUG: print >> sys.stderr, 'jcache get?', shortkey
+ cachename = self.get_cachename(shortkey)
+ if not os.path.exists(cachename):
+ #if DEBUG: print >> sys.stderr, 'jcache none at all'
+ return
+ data = cPickle.Unpickler(open(cachename, 'rb')).load()
+ if len(data) != 2:
+ #if DEBUG: print >> sys.stderr, 'jcache error in get'
+ return
+ oldkeys = data[0]
+ if len(oldkeys) != len(keys):
+ #if DEBUG: print >> sys.stderr, 'jcache collision (a)'
+ return
+ for i in range(len(oldkeys)):
+ if oldkeys[i] != keys[i]:
+ #if DEBUG: print >> sys.stderr, 'jcache collision (b)'
+ return
+ #if DEBUG: print >> sys.stderr, 'jcache win'
+ return data[1]
+
+ # Sets the cached value for a key (from get_key)
+ def set(self, shortkey, keys, value):
+ cachename = self.get_cachename(shortkey)
+ cPickle.Pickler(open(cachename, 'wb')).dump([keys, value])
+ #if DEBUG:
+ # for i in range(len(keys)):
+ # open(cachename + '.key' + str(i), 'w').write(keys[i])
+ # open(cachename + '.value', 'w').write(value)
+
+# Given a set of functions of form (ident, text), and a preferred chunk size,
+# generates a set of chunks for parallel processing and caching.
+# It is very important to generate similar chunks in incremental builds, in
+# order to maximize the chance of cache hits. To achieve that, we save the
+# chunking used in the previous compilation of this phase, and we try to
+# generate the same chunks, barring big differences in function sizes that
+# violate our chunk size guideline. If caching is not used, chunking_file
+# should be None
+@classmethod
+def chunkify(funcs, chunk_size, chunking_file):
+ previous_mapping = None
+ if chunking_file:
+ if os.path.exists(chunking_file):
+ try:
+ previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
+ if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
+ except Exception, e:
+ print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
+ else:
+ print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
+ chunks = []
+ if previous_mapping:
+ # initialize with previous chunking
+ news = []
+ for func in funcs:
+ ident, data = func
+ assert ident, 'need names for jcache chunking'
+ if not ident in previous_mapping:
+ news.append(func)
+ else:
+ n = previous_mapping[ident]
+ while n >= len(chunks): chunks.append([])
+ chunks[n].append(func)
+ if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
+ # add news and adjust for new sizes
+ spilled = news
+ for i in range(len(chunks)):
+ chunk = chunks[i]
+ size = sum([len(func[1]) for func in chunk])
+ #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
+ while size > 1.5*chunk_size and len(chunk) > 1:
+ spill = chunk.pop()
+ spilled.append(spill)
+ size -= len(spill[1])
+ #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size < 0.66*chunk_size and len(spilled) > 0:
+ spill = spilled.pop()
+ chunk.append(spill)
+ size += len(spill[1])
+ chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+ funcs = spilled # we will allocate these into chunks as if they were normal inputs
+ #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
+ # initialize reasonably, the rest of the funcs we need to split out
+ curr = []
+ total_size = 0
+ for i in range(len(funcs)):
+ func = funcs[i]
+ curr_size = len(func[1])
+ if total_size + curr_size < chunk_size:
+ curr.append(func)
+ total_size += curr_size
+ else:
+ n = previous_mapping[ident]
+ while n >= len(chunks): chunks.append([])
+ chunks[n].append(func)
+ # add news and adjust for new sizes
+ spilled = news
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size > 1.5*chunk_size and len(chunk) > 0:
+ spill = chunk.pop()
+ spilled.append(spill)
+ size -= len(spill[1])
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size < 0.66*chunk_size and len(spilled) > 0:
+ spill = spilled.pop()
+ chunk.append(spill)
+ size += len(spill[1])
+ chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+ funcs = spilled # we will allocate these into chunks as if they were normal inputs
+# initialize reasonably, the rest of the funcs we need to split out
+curr = []
+total_size = 0
+for i in range(len(funcs)):
+ func = funcs[i]
+ curr_size = len(func[1])
+ if total_size + curr_size < chunk_size:
+ curr.append(func)
+ total_size += curr_size
+ else:
+ chunks.append(curr)
+ curr = None
+ if chunking_file:
+ # sort within each chunk, to keep the order identical
+ for chunk in chunks:
+ chunk.sort(key=lambda func: func[0])
+ # save new mapping info
+ new_mapping = {}
+ for i in range(len(chunks)):
+ chunk = chunks[i]
+ for ident, data in chunk:
+ assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
+ new_mapping[ident] = i
+ cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
+ if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
+ #if DEBUG:
+ # for i in range(len(chunks)):
+ # chunk = chunks[i]
+ # print >> sys.stderr, 'final chunk', i, len(chunk)
+ # print >> sys.stderr, 'new mapping:', new_mapping
+ # if previous_mapping:
+ # for ident in set(previous_mapping.keys() + new_mapping.keys()):
+ # if previous_mapping.get(ident) != new_mapping.get(ident):
+ # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
+ return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
diff --git a/tools/shared.py b/tools/shared.py
index 35f45728..f1d133d7 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1,7 +1,7 @@
import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, zlib, re
from subprocess import Popen, PIPE, STDOUT
from tempfile import mkstemp
-from . import jsrun
+from . import jsrun, cache
def listify(x):
if type(x) is not list: return [x]
@@ -1236,225 +1236,10 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e
open(outfile, 'w').write(src)
return outfile
-# Permanent cache for dlmalloc and stdlibc++
-class Cache:
- dirname = os.environ.get('EM_CACHE')
- if not dirname:
- dirname = os.path.expanduser(os.path.join('~', '.emscripten_cache'))
-
- @classmethod
- def ensure(self):
- if not os.path.exists(self.dirname):
- os.makedirs(self.dirname)
-
- @classmethod
- def erase(self):
- shutil.rmtree(self.dirname, ignore_errors=True)
-
- @classmethod
- def get_path(self, shortname):
- return os.path.join(self.dirname, shortname)
-
- # Request a cached file. If it isn't in the cache, it will be created with
- # the given creator function
- @classmethod
- def get(self, shortname, creator, extension='.bc'):
- if not shortname.endswith(extension): shortname += extension
- cachename = os.path.join(self.dirname, shortname)
- if os.path.exists(cachename):
- return cachename
- Cache.ensure()
- shutil.copyfile(creator(), cachename)
- return cachename
-
-# JS-specific cache. We cache the results of compilation and optimization,
-# so that in incremental builds we can just load from cache.
-# We cache reasonably-large-sized chunks
-class JCache:
- dirname = os.path.join(Cache.dirname, 'jcache')
-
- @classmethod
- def ensure(self):
- Cache.ensure()
- if not os.path.exists(self.dirname):
- os.makedirs(self.dirname)
-
- @staticmethod
- def get_shortkey(keys):
- if type(keys) not in [list, tuple]:
- keys = [keys]
- ret = ''
- for key in keys:
- assert type(key) == str
- ret += hashlib.md5(key).hexdigest()
- return ret
-
- @classmethod
- def get_cachename(self, shortkey):
- return os.path.join(self.dirname, shortkey)
-
- # Returns a cached value, if it exists. Make sure the full key matches
- @classmethod
- def get(self, shortkey, keys):
- if DEBUG_CACHE: print >> sys.stderr, 'jcache get?', shortkey
- cachename = self.get_cachename(shortkey)
- if not os.path.exists(cachename):
- if DEBUG_CACHE: print >> sys.stderr, 'jcache none at all'
- return
- try:
- data = cPickle.loads(zlib.decompress(open(cachename).read()))
- except Exception, e:
- if DEBUG_CACHE: print >> sys.stderr, 'jcache decompress/unpickle error:', e
- return
- if len(data) != 2:
- if DEBUG_CACHE: print >> sys.stderr, 'jcache error in get'
- return
- oldkeys = data[0]
- if len(oldkeys) != len(keys):
- if DEBUG_CACHE: print >> sys.stderr, 'jcache collision (a)'
- return
- for i in range(len(oldkeys)):
- if oldkeys[i] != keys[i]:
- if DEBUG_CACHE: print >> sys.stderr, 'jcache collision (b)'
- return
- if DEBUG_CACHE: print >> sys.stderr, 'jcache win'
- return data[1]
-
- # Sets the cached value for a key (from get_key)
- @classmethod
- def set(self, shortkey, keys, value):
- if DEBUG_CACHE: print >> sys.stderr, 'save to cache', shortkey
- cachename = self.get_cachename(shortkey)
- try:
- f = open(cachename, 'w')
- f.write(zlib.compress(cPickle.dumps([keys, value])))
- f.close()
- except Exception, e:
- if DEBUG_CACHE: print >> sys.stderr, 'jcache compress/pickle error:', e
- return
- #if DEBUG:
- # for i in range(len(keys)):
- # open(cachename + '.key' + str(i), 'w').write(keys[i])
- # open(cachename + '.value', 'w').write(value)
-
-# Given a set of functions of form (ident, text), and a preferred chunk size,
-# generates a set of chunks for parallel processing and caching.
-# It is very important to generate similar chunks in incremental builds, in
-# order to maximize the chance of cache hits. To achieve that, we save the
-# chunking used in the previous compilation of this phase, and we try to
-# generate the same chunks, barring big differences in function sizes that
-# violate our chunk size guideline. If caching is not used, chunking_file
-# should be None
-@classmethod
-def chunkify(funcs, chunk_size, chunking_file):
- previous_mapping = None
- if chunking_file:
- if os.path.exists(chunking_file):
- try:
- previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
- if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
- except Exception, e:
- print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
- else:
- print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
- chunks = []
- if previous_mapping:
- # initialize with previous chunking
- news = []
- for func in funcs:
- ident, data = func
- assert ident, 'need names for jcache chunking'
- if not ident in previous_mapping:
- news.append(func)
- else:
- n = previous_mapping[ident]
- while n >= len(chunks): chunks.append([])
- chunks[n].append(func)
- if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
- # add news and adjust for new sizes
- spilled = news
- for i in range(len(chunks)):
- chunk = chunks[i]
- size = sum([len(func[1]) for func in chunk])
- #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
- while size > 1.5*chunk_size and len(chunk) > 1:
- spill = chunk.pop()
- spilled.append(spill)
- size -= len(spill[1])
- #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
- for chunk in chunks:
- size = sum([len(func[1]) for func in chunk])
- while size < 0.66*chunk_size and len(spilled) > 0:
- spill = spilled.pop()
- chunk.append(spill)
- size += len(spill[1])
- chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
- funcs = spilled # we will allocate these into chunks as if they were normal inputs
- #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
- # initialize reasonably, the rest of the funcs we need to split out
- curr = []
- total_size = 0
- for i in range(len(funcs)):
- func = funcs[i]
- curr_size = len(func[1])
- if total_size + curr_size < chunk_size:
- curr.append(func)
- total_size += curr_size
- else:
- n = previous_mapping[ident]
- while n >= len(chunks): chunks.append([])
- chunks[n].append(func)
- # add news and adjust for new sizes
- spilled = news
- for chunk in chunks:
- size = sum([len(func[1]) for func in chunk])
- while size > 1.5*chunk_size and len(chunk) > 0:
- spill = chunk.pop()
- spilled.append(spill)
- size -= len(spill[1])
- for chunk in chunks:
- size = sum([len(func[1]) for func in chunk])
- while size < 0.66*chunk_size and len(spilled) > 0:
- spill = spilled.pop()
- chunk.append(spill)
- size += len(spill[1])
- chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
- funcs = spilled # we will allocate these into chunks as if they were normal inputs
-# initialize reasonably, the rest of the funcs we need to split out
-curr = []
-total_size = 0
-for i in range(len(funcs)):
- func = funcs[i]
- curr_size = len(func[1])
- if total_size + curr_size < chunk_size:
- curr.append(func)
- total_size += curr_size
- else:
- chunks.append(curr)
- curr = None
- if chunking_file:
- # sort within each chunk, to keep the order identical
- for chunk in chunks:
- chunk.sort(key=lambda func: func[0])
- # save new mapping info
- new_mapping = {}
- for i in range(len(chunks)):
- chunk = chunks[i]
- for ident, data in chunk:
- assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
- new_mapping[ident] = i
- cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
- if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
- #if DEBUG:
- # for i in range(len(chunks)):
- # chunk = chunks[i]
- # print >> sys.stderr, 'final chunk', i, len(chunk)
- # print >> sys.stderr, 'new mapping:', new_mapping
- # if previous_mapping:
- # for ident in set(previous_mapping.keys() + new_mapping.keys()):
- # if previous_mapping.get(ident) != new_mapping.get(ident):
- # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
- return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
+# compatibility with existing emcc, etc. scripts
+Cache = cache.Cache()
+JCache = cache.JCache(Cache)
+chunkify = cache.chunkify
class JS:
@staticmethod