diff options
-rwxr-xr-x | emscripten.py | 2 | ||||
-rwxr-xr-x | tests/runner.py | 14 | ||||
-rw-r--r-- | tools/shared.py | 54 |
3 files changed, 61 insertions, 9 deletions
diff --git a/emscripten.py b/emscripten.py index 945df09a..e90acea7 100755 --- a/emscripten.py +++ b/emscripten.py @@ -171,7 +171,7 @@ def emscript(infile, settings, outfile, libraries=[]): forwarded_json = json.loads(forwarded_data) indexed_functions = set() - chunks = shared.JCache.chunkify(funcs, chunk_size) + chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None) if cores == 1 and total_ll_size < MAX_CHUNK_SIZE: assert len(chunks) == 1, 'no point in splitting up without multiple cores' if DEBUG: print >> sys.stderr, ' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.)) diff --git a/tests/runner.py b/tests/runner.py index 5bb53323..87d02fd5 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -10358,8 +10358,8 @@ fi assert not os.path.exists(EMCC_CACHE) try: - emcc_debug = os.environ.get('EMCC_DEBUG') os.environ['EMCC_DEBUG'] ='1' + self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp') # Building a file that doesn't need cached stuff should not trigger cache generation output = self.do([EMCC, path_from_root('tests', 'hello_world.cpp')]) @@ -10375,8 +10375,6 @@ fi ll_name1 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-2-ll.ll') ll_name2 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-3-ll.ll') - self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp') - # Building a file that *does* need dlmalloc *should* trigger cache generation, but only the first time for filename, libname in [('hello_malloc.cpp', 'dlmalloc'), ('hello_libcxx.cpp', 'libcxx')]: for i in range(3): @@ -10409,8 +10407,7 @@ fi print i, 'll metadata should be removed in -O1 and O2 by default', ll[-300:] assert False finally: - if emcc_debug: - os.environ['EMCC_DEBUG'] = emcc_debug + del os.environ['EMCC_DEBUG'] # Manual cache clearing assert os.path.exists(EMCC_CACHE) @@ -10447,6 +10444,9 @@ fi try: os.environ['EMCC_DEBUG'] = '1' + self.working_dir = os.path.join(TEMP_DIR, 'emscripten_temp') + + assert not os.path.exists(JCache.get_cachename('emscript_files')) src = None for args, input_file, expect_save, expect_load in [ @@ -10461,6 +10461,7 @@ fi (['--jcache'], 'hello_world_loop.cpp', False, True), # go back to old file, experience caching ]: print args, input_file, expect_save, expect_load + self.clear() out, err = Popen(['python', EMCC, path_from_root('tests', input_file)] + args, stdout=PIPE, stderr=PIPE).communicate() assert (PRE_SAVE_MSG in err) == expect_save, err assert (PRE_LOAD_MSG in err) == expect_load, errr @@ -10469,6 +10470,9 @@ fi src = None else: assert src == curr, 'caching must not affect codegen' + + assert os.path.exists(JCache.get_cachename('emscript_files')) + finally: del os.environ['EMCC_DEBUG'] diff --git a/tools/shared.py b/tools/shared.py index 02d92855..e8767e40 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1208,10 +1208,50 @@ class JCache: # Given a set of functions of form (ident, text), and a preferred chunk size, # generates a set of chunks for parallel processing and caching. # It is very important to generate similar chunks in incremental builds, in - # order to maximize the chance of cache hits. + # order to maximize the chance of cache hits. To achieve that, we save the + # chunking used in the previous compilation of this phase, and we try to + # generate the same chunks, barring big differences in function sizes that + # violate our chunk size guideline. If caching is not used, chunking_file + # should be None @staticmethod - def chunkify(funcs, chunk_size): - chunks = [] # bundles of functions + def chunkify(funcs, chunk_size, chunking_file): + previous_mapping = None + if chunking_file: + chunking_file = JCache.get_cachename(chunking_file) + if os.path.exists(chunking_file): + try: + previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in + except: + pass + chunks = [] + if previous_mapping: + # initialize with previous chunking + news = [] + for func in funcs: + ident, data = func + if not ident in previous_mapping: + news.append(func) + else: + n = previous_mapping[ident] + while n > len(chunks): chunks.append([]) + chunks[n].append(func) + # add news and adjust for new sizes + spilled = news + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size > 1.5*chunk_size and len(chunk) > 0: + spill = chunk.pop() + spilled.append(spill) + size -= len(spill[1]) + for chunk in chunks: + size = sum([len(func[1]) for func in chunk]) + while size < 0.66*chunk_size and len(spilled) > 0: + spill = spilled.pop() + chunk.append(spill) + size += len(spill[1]) + chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them + funcs = spilled # we will allocate these into chunks as if they were normal inputs + # initialize reasonably, the rest of the funcs we need to split out curr = [] for i in range(len(funcs)): func = funcs[i] @@ -1223,6 +1263,14 @@ class JCache: if curr: chunks.append(curr) curr = None + if chunking_file: + # save new mapping info + new_mapping = {} + for i in range(len(chunks)): + chunk = chunks[i] + for ident, data in chunk: + new_mapping[ident] = i + cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping) return chunks class JS: |