diff options
Diffstat (limited to 'tools/js_optimizer.py')
-rw-r--r-- | tools/js_optimizer.py | 206 |
1 files changed, 174 insertions, 32 deletions
diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index b72a2084..0452a725 100644 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -1,8 +1,9 @@ -import os, sys, subprocess, multiprocessing, re +import os, sys, subprocess, multiprocessing, re, string, json import shared -temp_files = shared.TempFiles() +configuration = shared.configuration +temp_files = configuration.get_temp_files() __rootpath__ = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) def path_from_root(*pathelems): @@ -10,24 +11,100 @@ def path_from_root(*pathelems): JS_OPTIMIZER = path_from_root('tools', 'js-optimizer.js') -BEST_JS_PROCESS_SIZE = 1024*1024 +NUM_CHUNKS_PER_CORE = 1.5 +MIN_CHUNK_SIZE = int(os.environ.get('EMCC_JSOPT_MIN_CHUNK_SIZE') or 1024*1024) # configuring this is just for debugging purposes +MAX_CHUNK_SIZE = 20*1024*1024 WINDOWS = sys.platform.startswith('win') DEBUG = os.environ.get('EMCC_DEBUG') +func_sig = re.compile('( *)function ([_\w$]+)\(') + +class Minifier: + ''' + asm.js minification support. We calculate possible names and minification of + globals here, then pass that into the parallel js-optimizer.js runners which + during registerize perform minification of locals. + ''' + + def __init__(self, js, js_engine): + self.js = js + self.js_engine = js_engine + + # Create list of valid short names + + MAX_NAMES = 80000 + INVALID_2 = set(['do', 'if', 'in']) + INVALID_3 = set(['for', 'new', 'try', 'var', 'env']) + + self.names = [] + init_possibles = string.ascii_letters + '_$' + later_possibles = init_possibles + string.digits + for a in init_possibles: + if len(self.names) >= MAX_NAMES: break + self.names.append(a) + for a in init_possibles: + for b in later_possibles: + if len(self.names) >= MAX_NAMES: break + curr = a + b + if curr not in INVALID_2: self.names.append(curr) + for a in init_possibles: + for b in later_possibles: + for c in later_possibles: + if len(self.names) >= MAX_NAMES: break + curr = a + b + c + if curr not in INVALID_3: self.names.append(curr) + #print >> sys.stderr, self.names + + def minify_shell(self, shell, compress): + #print >> sys.stderr, "MINIFY SHELL 1111111111", shell, "\n222222222222222" + # Run through js-optimizer.js to find and minify the global symbols + # We send it the globals, which it parses at the proper time. JS decides how + # to minify all global names, we receive a dictionary back, which is then + # used by the function processors + + shell = shell.replace('0.0', '13371337') # avoid uglify doing 0.0 => 0 + + # Find all globals in the JS functions code + self.globs = [m.group(2) for m in func_sig.finditer(self.js)] + + temp_file = temp_files.get('.minifyglobals.js').name + f = open(temp_file, 'w') + f.write(shell) + f.write('\n') + self + f.write('// MINIFY_INFO:' + self.serialize()) + f.close() + + output = subprocess.Popen(self.js_engine + [JS_OPTIMIZER, temp_file, 'minifyGlobals', 'noPrintMetadata'] + (['compress'] if compress else []), stdout=subprocess.PIPE).communicate()[0] + assert len(output) > 0 and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output + #print >> sys.stderr, "minified SHELL 3333333333333333", output, "\n44444444444444444444" + code, metadata = output.split('// MINIFY_INFO:') + self.globs = json.loads(metadata) + return code.replace('13371337', '0.0') + + + def serialize(self): + return json.dumps({ + 'names': self.names, + 'globals': self.globs + }) + def run_on_chunk(command): filename = command[2] # XXX hackish + #print >> sys.stderr, 'running js optimizer command', ' '.join(command), '""""', open(filename).read() output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] assert len(output) > 0 and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output filename = temp_files.get(os.path.basename(filename) + '.jo.js').name f = open(filename, 'w') f.write(output) f.close() + if DEBUG and not shared.WINDOWS: print >> sys.stderr, '.' # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console. return filename def run_on_js(filename, passes, js_engine, jcache): - + if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: @@ -42,10 +119,25 @@ def run_on_js(filename, passes, js_engine, jcache): suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: - suffix = js[suffix_start:js.find('\n', suffix_start)] + '\n' + suffix_end = js.find('\n', suffix_start) + suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker)+1:])) + # Find markers + start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' + end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' + start_funcs = js.find(start_funcs_marker) + end_funcs = js.rfind(end_funcs_marker) + #assert (start_funcs >= 0) == (end_funcs >= 0) == (not not suffix) + asm_registerize = 'asm' in passes and 'registerize' in passes + if asm_registerize: + start_asm_marker = '// EMSCRIPTEN_START_ASM\n' + end_asm_marker = '// EMSCRIPTEN_END_ASM\n' + start_asm = js.find(start_asm_marker) + end_asm = js.rfind(end_asm_marker) + assert (start_asm >= 0) == (end_asm >= 0) + if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure @@ -53,27 +145,45 @@ def run_on_js(filename, passes, js_engine, jcache): if DEBUG: print >>sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False - # If we process only generated code, find that and save the rest on the side - func_sig = re.compile('( *)function (_[\w$]+)\(') if suffix: - pos = 0 - gen_start = 0 - gen_end = 0 - while 1: - m = func_sig.search(js, pos) - if not m: break - pos = m.end() - indent = m.group(1) - ident = m.group(2) - if ident in generated: - if not gen_start: - gen_start = m.start() - assert gen_start - gen_end = js.find('\n%s}\n' % indent, m.end()) + (3 + len(indent)) - assert gen_end > gen_start - pre = js[:gen_start] - post = js[gen_end:] - js = js[gen_start:gen_end] + if not asm_registerize: + pre = js[:start_funcs + len(start_funcs_marker)] + post = js[end_funcs + len(end_funcs_marker):] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post + class Finals: + buf = [] + def process(line): + if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): + Finals.buf.append(line) + return False + return True + js = '\n'.join(filter(process, js.split('\n'))) + post = '\n'.join(Finals.buf) + '\n' + post + post = end_funcs_marker + post + else: + # We need to split out the asm shell as well, for minification + pre = js[:start_asm + len(start_asm_marker)] + post = js[end_asm:] + asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' +EMSCRIPTEN_FUNCS(); +''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + + minifier = Minifier(js, js_engine) + asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'compress' in passes).split('EMSCRIPTEN_FUNCS();'); + asm_shell_post = asm_shell_post.replace('});', '})'); + pre += asm_shell_pre + '\n' + start_funcs_marker + post = end_funcs_marker + asm_shell_post + post + + minify_info = minifier.serialize() + #if DEBUG: print >> sys.stderr, 'minify info:', minify_info + # remove suffix if no longer needed + if 'last' in passes: + suffix_start = post.find(suffix_marker) + suffix_end = post.find('\n', suffix_start) + post = post[:suffix_start] + post[suffix_end:] + else: pre = '' post = '' @@ -87,16 +197,21 @@ def run_on_js(filename, passes, js_engine, jcache): if i < len(parts)-1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: - ident = m.group(1) + ident = m.group(2) else: if suffix: continue # ignore whitespace ident = 'anon_%d' % i + assert ident funcs.append((ident, func)) parts = None total_size = len(js) js = None - chunks = shared.JCache.chunkify(funcs, BEST_JS_PROCESS_SIZE, 'jsopt' if jcache else None) + cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) + intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) + chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) + + chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks @@ -120,7 +235,10 @@ def run_on_js(filename, passes, js_engine, jcache): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) - f.write(suffix) + f.write(suffix_marker) + if asm_registerize: + f.write('\n') + f.write('// MINIFY_INFO:' + minify_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] @@ -129,24 +247,48 @@ def run_on_js(filename, passes, js_engine, jcache): if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) - commands = map(lambda filename: [js_engine, JS_OPTIMIZER, filename, 'noPrintMetadata'] + passes, filenames) + commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + passes, filenames) #print [' '.join(command) for command in commands] - cores = min(multiprocessing.cpu_count(), filenames) + cores = min(cores, filenames) if len(chunks) > 1 and cores >= 2: # We can parallelize - if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) + if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)' % (len(chunks), chunk_size, cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues - if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) + if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % (len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) + if 'closure' in passes: + # run closure on the shell code, everything but what we js-optimize + start_asm = '// EMSCRIPTEN_START_ASM\n' + end_asm = '// EMSCRIPTEN_END_ASM\n' + closure_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' + + closuree = temp_files.get('.closure.js').name + c = open(closuree, 'w') + pre_1, pre_2 = pre.split(start_asm) + post_1, post_2 = post.split(end_asm) + c.write(pre_1) + c.write(closure_sep) + c.write(post_2) + c.close() + closured = shared.Building.closure_compiler(closuree, pretty='compress' not in passes) + temp_files.note(closured) + coutput = open(closured).read() + coutput = coutput.replace('wakaUnknownBefore();', '') + after = 'wakaUnknownAfter' + start = coutput.find(after) + end = coutput.find(')', start) + pre = coutput[:start] + '(function(global,env,buffer) {\n' + start_asm + pre_2[pre_2.find('{')+1:] + post = post_1[:post_1.rfind('}')] + '\n' + end_asm + '\n})' + coutput[end+1:] + filename += '.jo.js' f = open(filename, 'w') f.write(pre); |