aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2012-11-20 20:50:20 +0100
committerAlon Zakai <alonzakai@gmail.com>2012-11-21 20:47:02 +0100
commit60a11cfaec3ad560cd7fcedf618001bd9c756b1c (patch)
treebd52dfb7481ebe89bb65b7f2a3bed579168d85a4
parenta2b241e70b7f7606c913e916cffb69514d548ade (diff)
improve chunkify to consider previous chunkings when caching
-rwxr-xr-xemscripten.py2
-rwxr-xr-xtests/runner.py14
-rw-r--r--tools/shared.py54
3 files changed, 61 insertions, 9 deletions
diff --git a/emscripten.py b/emscripten.py
index 945df09a..e90acea7 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -171,7 +171,7 @@ def emscript(infile, settings, outfile, libraries=[]):
forwarded_json = json.loads(forwarded_data)
indexed_functions = set()
- chunks = shared.JCache.chunkify(funcs, chunk_size)
+ chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None)
if cores == 1 and total_ll_size < MAX_CHUNK_SIZE: assert len(chunks) == 1, 'no point in splitting up without multiple cores'
if DEBUG: print >> sys.stderr, ' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))
diff --git a/tests/runner.py b/tests/runner.py
index 5bb53323..87d02fd5 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -10358,8 +10358,8 @@ fi
assert not os.path.exists(EMCC_CACHE)
try:
- emcc_debug = os.environ.get('EMCC_DEBUG')
os.environ['EMCC_DEBUG'] ='1'
+ self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp')
# Building a file that doesn't need cached stuff should not trigger cache generation
output = self.do([EMCC, path_from_root('tests', 'hello_world.cpp')])
@@ -10375,8 +10375,6 @@ fi
ll_name1 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-2-ll.ll')
ll_name2 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-3-ll.ll')
- self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp')
-
# Building a file that *does* need dlmalloc *should* trigger cache generation, but only the first time
for filename, libname in [('hello_malloc.cpp', 'dlmalloc'), ('hello_libcxx.cpp', 'libcxx')]:
for i in range(3):
@@ -10409,8 +10407,7 @@ fi
print i, 'll metadata should be removed in -O1 and O2 by default', ll[-300:]
assert False
finally:
- if emcc_debug:
- os.environ['EMCC_DEBUG'] = emcc_debug
+ del os.environ['EMCC_DEBUG']
# Manual cache clearing
assert os.path.exists(EMCC_CACHE)
@@ -10447,6 +10444,9 @@ fi
try:
os.environ['EMCC_DEBUG'] = '1'
+ self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp')
+
+ assert not os.path.exists(JCache.get_cachename('emscript_files'))
src = None
for args, input_file, expect_save, expect_load in [
@@ -10461,6 +10461,7 @@ fi
(['--jcache'], 'hello_world_loop.cpp', False, True), # go back to old file, experience caching
]:
print args, input_file, expect_save, expect_load
+ self.clear()
out, err = Popen(['python', EMCC, path_from_root('tests', input_file)] + args, stdout=PIPE, stderr=PIPE).communicate()
assert (PRE_SAVE_MSG in err) == expect_save, err
assert (PRE_LOAD_MSG in err) == expect_load, errr
@@ -10469,6 +10470,9 @@ fi
src = None
else:
assert src == curr, 'caching must not affect codegen'
+
+ assert os.path.exists(JCache.get_cachename('emscript_files'))
+
finally:
del os.environ['EMCC_DEBUG']
diff --git a/tools/shared.py b/tools/shared.py
index 02d92855..e8767e40 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1208,10 +1208,50 @@ class JCache:
# Given a set of functions of form (ident, text), and a preferred chunk size,
# generates a set of chunks for parallel processing and caching.
# It is very important to generate similar chunks in incremental builds, in
- # order to maximize the chance of cache hits.
+ # order to maximize the chance of cache hits. To achieve that, we save the
+ # chunking used in the previous compilation of this phase, and we try to
+ # generate the same chunks, barring big differences in function sizes that
+ # violate our chunk size guideline. If caching is not used, chunking_file
+ # should be None
@staticmethod
- def chunkify(funcs, chunk_size):
- chunks = [] # bundles of functions
+ def chunkify(funcs, chunk_size, chunking_file):
+ previous_mapping = None
+ if chunking_file:
+ chunking_file = JCache.get_cachename(chunking_file)
+ if os.path.exists(chunking_file):
+ try:
+ previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
+ except:
+ pass
+ chunks = []
+ if previous_mapping:
+ # initialize with previous chunking
+ news = []
+ for func in funcs:
+ ident, data = func
+ if not ident in previous_mapping:
+ news.append(func)
+ else:
+ n = previous_mapping[ident]
+ while n > len(chunks): chunks.append([])
+ chunks[n].append(func)
+ # add news and adjust for new sizes
+ spilled = news
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size > 1.5*chunk_size and len(chunk) > 0:
+ spill = chunk.pop()
+ spilled.append(spill)
+ size -= len(spill[1])
+ for chunk in chunks:
+ size = sum([len(func[1]) for func in chunk])
+ while size < 0.66*chunk_size and len(spilled) > 0:
+ spill = spilled.pop()
+ chunk.append(spill)
+ size += len(spill[1])
+ chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+ funcs = spilled # we will allocate these into chunks as if they were normal inputs
+ # initialize reasonably, the rest of the funcs we need to split out
curr = []
for i in range(len(funcs)):
func = funcs[i]
@@ -1223,6 +1263,14 @@ class JCache:
if curr:
chunks.append(curr)
curr = None
+ if chunking_file:
+ # save new mapping info
+ new_mapping = {}
+ for i in range(len(chunks)):
+ chunk = chunks[i]
+ for ident, data in chunk:
+ new_mapping[ident] = i
+ cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
return chunks
class JS: