aboutsummaryrefslogtreecommitdiff
path: root/emscripten.py
diff options
context:
space:
mode:
Diffstat (limited to 'emscripten.py')
-rwxr-xr-xemscripten.py135
1 files changed, 97 insertions, 38 deletions
diff --git a/emscripten.py b/emscripten.py
index 91b1de5a..15beb4ee 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -32,6 +32,7 @@ def path_from_root(*pathelems):
temp_files = shared.TempFiles()
compiler_engine = None
+jcache = False
def scan(ll, settings):
# blockaddress(@main, %23)
@@ -47,12 +48,13 @@ MIN_CHUNK_SIZE = 1024*1024
MAX_CHUNK_SIZE = float(os.environ.get('EMSCRIPT_MAX_CHUNK_SIZE') or 'inf') # configuring this is just for debugging purposes
def process_funcs(args):
- i, ll, settings_file, compiler, forwarded_file, libraries = args
+ i, funcs, meta, settings_file, compiler, forwarded_file, libraries = args
+ ll = ''.join(funcs) + '\n' + meta
funcs_file = temp_files.get('.func_%d.ll' % i).name
open(funcs_file, 'w').write(ll)
out = shared.run_js(compiler, compiler_engine, [settings_file, funcs_file, 'funcs', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
shared.try_delete(funcs_file)
- return out.split('//FORWARDED_DATA:')
+ return out
def emscript(infile, settings, outfile, libraries=[]):
"""Runs the emscripten LLVM-to-JS compiler. We parallelize as much as possible
@@ -73,6 +75,8 @@ def emscript(infile, settings, outfile, libraries=[]):
if DEBUG: print >> sys.stderr, 'emscript: ll=>js'
+ if jcache: shared.JCache.ensure()
+
# Pre-scan ll and alter settings as necessary
if DEBUG: t = time.time()
ll = open(infile).read()
@@ -84,31 +88,31 @@ def emscript(infile, settings, outfile, libraries=[]):
# Split input into the relevant parts for each phase
pre = []
funcs = [] # split up functions here, for parallelism later
+ func_idents = []
meta = [] # needed by each function XXX
- post = []
if DEBUG: t = time.time()
in_func = False
ll_lines = open(infile).readlines()
for line in ll_lines:
if in_func:
- funcs[-1].append(line)
+ funcs[-1][1].append(line)
if line.startswith('}'):
in_func = False
- funcs[-1] = ''.join(funcs[-1])
+ funcs[-1] = (funcs[-1][0], ''.join(funcs[-1][1]))
pre.append(line) # pre needs it to, so we know about all implemented functions
else:
+ if line.startswith(';'): continue
if line.startswith('define '):
in_func = True
- funcs.append([line])
+ funcs.append((line, [line])) # use the entire line as the identifier
pre.append(line) # pre needs it to, so we know about all implemented functions
elif line.find(' = type { ') > 0:
pre.append(line) # type
elif line.startswith('!'):
meta.append(line) # metadata
else:
- post.append(line) # global
- pre.append(line) # pre needs it to, so we know about globals in pre and funcs
+ pre.append(line) # pre needs it so we know about globals in pre and funcs. So emit globals there
ll_lines = None
meta = ''.join(meta)
if DEBUG and len(meta) > 1024*1024: print >> sys.stderr, 'emscript warning: large amounts of metadata, will slow things down'
@@ -120,24 +124,32 @@ def emscript(infile, settings, outfile, libraries=[]):
# print >> sys.stderr, '========== funcs ===============\n'
# for func in funcs:
# print >> sys.stderr, '\n// ===\n\n', ''.join(func)
- # print >> sys.stderr, '========== post ==============\n'
- # print >> sys.stderr, ''.join(post)
# print >> sys.stderr, '=========================\n'
# Save settings to a file to work around v8 issue 1579
settings_file = temp_files.get('.txt').name
+ settings_text = json.dumps(settings)
s = open(settings_file, 'w')
- s.write(json.dumps(settings))
+ s.write(settings_text)
s.close()
# Phase 1 - pre
if DEBUG: t = time.time()
pre_file = temp_files.get('.pre.ll').name
- open(pre_file, 'w').write(''.join(pre) + '\n' + meta)
- out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, pre_file, 'pre'] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
- js, forwarded_data = out.split('//FORWARDED_DATA:')
- outfile.write(js)
- js = None
+ pre_input = ''.join(pre) + '\n' + meta
+ out = None
+ if jcache:
+ keys = [pre_input, settings_text, ','.join(libraries)]
+ shortkey = shared.JCache.get_shortkey(keys)
+ out = shared.JCache.get(shortkey, keys)
+ if out and DEBUG: print >> sys.stderr, ' loading pre from jcache'
+ if not out:
+ open(pre_file, 'w').write(pre_input)
+ out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, pre_file, 'pre'] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
+ if jcache:
+ if DEBUG: print >> sys.stderr, ' saving pre to jcache'
+ shared.JCache.set(shortkey, keys, out)
+ pre, forwarded_data = out.split('//FORWARDED_DATA:')
forwarded_file = temp_files.get('.json').name
open(forwarded_file, 'w').write(forwarded_data)
if DEBUG: print >> sys.stderr, ' emscript: phase 1 took %s seconds' % (time.time() - t)
@@ -157,28 +169,58 @@ def emscript(infile, settings, outfile, libraries=[]):
if DEBUG: t = time.time()
forwarded_json = json.loads(forwarded_data)
indexed_functions = set()
- chunks = [] # bundles of functions
- curr = ''
- for i in range(len(funcs)):
- func = funcs[i]
- if len(curr) + len(func) < chunk_size:
- curr += func
+
+ chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None)
+
+ if jcache:
+ # load chunks from cache where we can # TODO: ignore small chunks
+ cached_outputs = []
+ def load_from_cache(chunk):
+ keys = [settings_text, forwarded_data, chunk]
+ shortkey = shared.JCache.get_shortkey(keys) # TODO: share shortkeys with later code
+ out = shared.JCache.get(shortkey, keys) # this is relatively expensive (pickling?)
+ if out:
+ cached_outputs.append(out)
+ return False
+ return True
+ chunks = filter(load_from_cache, chunks)
+ if len(cached_outputs) > 0:
+ if out and DEBUG: print >> sys.stderr, ' loading %d funcchunks from jcache' % len(cached_outputs)
else:
- chunks.append(curr)
- curr = func
- if curr:
- chunks.append(curr)
- curr = ''
+ cached_outputs = []
+
+ # TODO: minimize size of forwarded data from funcs to what we actually need
+
if cores == 1 and total_ll_size < MAX_CHUNK_SIZE: assert len(chunks) == 1, 'no point in splitting up without multiple cores'
- if DEBUG: print >> sys.stderr, ' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))
- commands = [(i, chunks[i] + '\n' + meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]
+ if len(chunks) > 0:
+ if DEBUG: print >> sys.stderr, ' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))
+
+ commands = [(i, chunks[i], meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]
- if len(chunks) > 1:
- pool = multiprocessing.Pool(processes=cores)
- outputs = pool.map(process_funcs, commands, chunksize=1)
+ if len(chunks) > 1:
+ pool = multiprocessing.Pool(processes=cores)
+ outputs = pool.map(process_funcs, commands, chunksize=1)
+ elif len(chunks) == 1:
+ outputs = [process_funcs(commands[0])]
else:
- outputs = [process_funcs(commands[0])]
+ outputs = []
+
+ if jcache:
+ # save chunks to cache
+ for i in range(len(chunks)):
+ chunk = chunks[i]
+ keys = [settings_text, forwarded_data, chunk]
+ shortkey = shared.JCache.get_shortkey(keys)
+ shared.JCache.set(shortkey, keys, outputs[i])
+ if out and DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d funcchunks to jcache' % len(chunks)
+
+ if jcache: outputs += cached_outputs # TODO: preserve order
+
+ outputs = [output.split('//FORWARDED_DATA:') for output in outputs]
+
+ if DEBUG: print >> sys.stderr, ' emscript: phase 2 took %s seconds' % (time.time() - t)
+ if DEBUG: t = time.time()
funcs_js = ''.join([output[0] for output in outputs])
@@ -191,7 +233,7 @@ def emscript(infile, settings, outfile, libraries=[]):
for key in curr_forwarded_json['Functions']['indexedFunctions'].iterkeys():
indexed_functions.add(key)
outputs = None
- if DEBUG: print >> sys.stderr, ' emscript: phase 2 took %s seconds' % (time.time() - t)
+ if DEBUG: print >> sys.stderr, ' emscript: phase 2b took %s seconds' % (time.time() - t)
if DEBUG: t = time.time()
# calculations on merged forwarded data
@@ -201,23 +243,35 @@ def emscript(infile, settings, outfile, libraries=[]):
forwarded_json['Functions']['indexedFunctions'][indexed] = i # make sure not to modify this python object later - we use it in indexize
i += 2
forwarded_json['Functions']['nextIndex'] = i
+
indexing = forwarded_json['Functions']['indexedFunctions']
def indexize(js):
return re.sub(r'{{{ FI_([\w\d_$]+) }}}', lambda m: str(indexing[m.groups(0)[0]]), js)
- outfile.write(indexize(funcs_js))
+
+ blockaddrs = forwarded_json['Functions']['blockAddresses']
+ def blockaddrsize(js):
+ return re.sub(r'{{{ BA_([\w\d_$]+)\|([\w\d_$]+) }}}', lambda m: str(blockaddrs[m.groups(0)[0]][m.groups(0)[1]]), js)
+
+ #if DEBUG: outfile.write('// pre\n')
+ outfile.write(blockaddrsize(indexize(pre)))
+ pre = None
+
+ #if DEBUG: outfile.write('// funcs\n')
+ outfile.write(blockaddrsize(indexize(funcs_js)))
funcs_js = None
# forward
forwarded_data = json.dumps(forwarded_json)
forwarded_file = temp_files.get('.2.json').name
- open(forwarded_file, 'w').write(forwarded_data)
- if DEBUG: print >> sys.stderr, ' emscript: phase 2b took %s seconds' % (time.time() - t)
+ open(forwarded_file, 'w').write(indexize(forwarded_data))
+ if DEBUG: print >> sys.stderr, ' emscript: phase 2c took %s seconds' % (time.time() - t)
# Phase 3 - post
if DEBUG: t = time.time()
post_file = temp_files.get('.post.ll').name
- open(post_file, 'w').write(''.join(post) + '\n' + meta)
+ open(post_file, 'w').write('\n') # no input, just processing of forwarded data
out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, post_file, 'post', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
+ #if DEBUG: outfile.write('// post\n')
outfile.write(indexize(out))
if DEBUG: print >> sys.stderr, ' emscript: phase 3 took %s seconds' % (time.time() - t)
@@ -329,6 +383,10 @@ if __name__ == '__main__':
metavar='FOO=BAR',
help=('Overrides for settings defined in settings.js. '
'May occur multiple times.'))
+ parser.add_option('-j', '--jcache',
+ action='store_true',
+ default=False,
+ help=('Enable jcache (ccache-like caching of compilation results, for faster incremental builds).'))
# Convert to the same format that argparse would have produced.
keywords, positional = parser.parse_args()
@@ -338,6 +396,7 @@ if __name__ == '__main__':
if isinstance(keywords.outfile, basestring):
keywords.outfile = open(keywords.outfile, 'w')
compiler_engine = keywords.compiler
+ jcache = keywords.jcache
temp_files.run_and_clean(lambda: main(keywords))