diff options
author | Alon Zakai <alonzakai@gmail.com> | 2013-03-09 20:01:14 -0800 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2013-03-09 20:01:14 -0800 |
commit | 0f538ae57bb65af11efb7372661ba7c929c98d61 (patch) | |
tree | 091e8ed588207b317af07455bac65654b183245a | |
parent | 5c5a9dd4bf35d95ffe83e655fe2a237241f0a96a (diff) | |
parent | 4794e95b9ba2ab9104d6da7b0a6c7bf31c2f812a (diff) |
Merge branch 'asm_minifier' into incoming
-rwxr-xr-x | emcc | 21 | ||||
-rwxr-xr-x | emscripten.py | 17 | ||||
-rw-r--r-- | src/runtime.js | 2 | ||||
-rwxr-xr-x | tests/runner.py | 51 | ||||
-rw-r--r-- | tools/js-optimizer.js | 175 | ||||
-rw-r--r-- | tools/js_optimizer.py | 152 | ||||
-rw-r--r-- | tools/test-js-optimizer-asm-regs-min-output.js | 36 | ||||
-rw-r--r-- | tools/test-js-optimizer-asm-regs-min.js | 37 | ||||
-rw-r--r-- | tools/test-js-optimizer-asm-regs-output.js | 4 | ||||
-rw-r--r-- | tools/test-js-optimizer-asm-regs.js | 6 |
10 files changed, 427 insertions, 74 deletions
@@ -332,13 +332,11 @@ Options that are modified or new in %s include: output HTML but with suffix .data.compress --minify <on> 0: Do not minify the generated JavaScript's - whitespace (default if closure compiler - will not be run) + whitespace (default in -O0, -O1, or if + -g is used) 1: Minify the generated JavaScript's - whitespace (default if closure compiler - will be run). Note that this by itself - will not minify the code (closure does - that) + whitespace (default in -O2+, assuming + -g is not used) --split <size> Splits the resulting javascript file into pieces to ease debugging. This option only works if @@ -983,7 +981,7 @@ try: closure = False if minify_whitespace is None: - minify_whitespace = closure # if closure is run, minify whitespace + minify_whitespace = opt_level >= 2 and not keep_js_debug ## Compile source code to bitcode @@ -1367,11 +1365,10 @@ try: flush_js_optimizer_queue() - if not minify_whitespace: - # Remove some trivial whitespace - src = open(final).read() - src = re.sub(r'\n+[ \n]*\n+', '\n', src) - open(final, 'w').write(src) + # Remove some trivial whitespace # TODO: do not run when compress has already been done on all parts of the code + src = open(final).read() + src = re.sub(r'\n+[ \n]*\n+', '\n', src) + open(final, 'w').write(src) # If we were asked to also generate HTML, do that if final_suffix == 'html': diff --git a/emscripten.py b/emscripten.py index b698654b..09a57e37 100755 --- a/emscripten.py +++ b/emscripten.py @@ -332,9 +332,9 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, params = ','.join(['p%d' % p for p in range(len(sig)-1)]) coercions = ';'.join(['p%d = %sp%d%s' % (p, '+' if sig[p+1] != 'i' else '', p, '' if sig[p+1] != 'i' else '|0') for p in range(len(sig)-1)]) + ';' ret = '' if sig[0] == 'v' else ('return %s0' % ('+' if sig[0] != 'i' else '')) - return ('function %s(%s) { %s abort(%d); %s };' % (bad, params, coercions, i, ret), raw.replace('[0,', '[' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0]', ',' + bad + ']').replace(',0]', ',' + bad + ']').replace(',0\n', ',' + bad + '\n')) + return ('function %s(%s) { %s abort(%d); %s }' % (bad, params, coercions, i, ret), raw.replace('[0,', '[' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0]', ',' + bad + ']').replace(',0]', ',' + bad + ']').replace(',0\n', ',' + bad + '\n')) infos = [make_table(sig, raw) for sig, raw in last_forwarded_json['Functions']['tables'].iteritems()] - function_tables_defs = '\n'.join([info[0] for info in infos] + [info[1] for info in infos]) + function_tables_defs = '\n'.join([info[0] for info in infos]) + '\n// EMSCRIPTEN_END_FUNCS\n' + '\n'.join([info[1] for info in infos]) asm_setup = '' maths = ['Math.' + func for func in ['floor', 'abs', 'sqrt', 'pow', 'cos', 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'exp', 'log', 'ceil', 'imul']] @@ -416,6 +416,7 @@ function asmPrintInt(x, y) { function asmPrintFloat(x, y) { Module.print('float ' + x + ',' + y);// + ' ' + new Error().stack); } +// EMSCRIPTEN_START_ASM var asm = (function(global, env, buffer) { 'use asm'; var HEAP8 = new global.Int8Array(buffer); @@ -432,6 +433,7 @@ var asm = (function(global, env, buffer) { var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0; ''' + ''.join([''' var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs + ''' +// EMSCRIPTEN_START_FUNCS function stackAlloc(size) { size = size|0; var ret = 0; @@ -457,11 +459,12 @@ var asm = (function(global, env, buffer) { tempRet%d = value; } ''' % (i, i) for i in range(10)]) + funcs_js + ''' - %s return %s; -})(%s, %s, buffer); +}) +// EMSCRIPTEN_END_ASM +(%s, %s, buffer); %s; Runtime.stackAlloc = function(size) { return asm.stackAlloc(size) }; Runtime.stackSave = function() { return asm.stackSave() }; @@ -483,6 +486,12 @@ Runtime.stackRestore = function(top) { asm.stackRestore(top) }; else: function_tables_defs = '\n'.join([table for table in last_forwarded_json['Functions']['tables'].itervalues()]) outfile.write(function_tables_defs) + funcs_js = ''' +// EMSCRIPTEN_START_FUNCS +''' + funcs_js + ''' +// EMSCRIPTEN_END_FUNCS +''' + outfile.write(blockaddrsize(indexize(funcs_js))) funcs_js = None diff --git a/src/runtime.js b/src/runtime.js index dc604a8d..8352ade1 100644 --- a/src/runtime.js +++ b/src/runtime.js @@ -87,7 +87,7 @@ var RuntimeGenerator = { }; function unInline(name_, params) { - var src = '(function ' + name_ + '(' + params + ') { var ret = ' + RuntimeGenerator[name_].apply(null, params) + '; return ret; })'; + var src = '(function(' + params + ') { var ret = ' + RuntimeGenerator[name_].apply(null, params) + '; return ret; })'; var ret = eval(src); return ret; } diff --git a/tests/runner.py b/tests/runner.py index 2a41d953..bb787def 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -445,7 +445,7 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv) and 'brows if len(sys.argv) == 2 and 'ALL.' in sys.argv[1]: ignore, test = sys.argv[1].split('.') print 'Running all test modes on test "%s"' % test - sys.argv = [sys.argv[0], 'default.'+test, 'o1.'+test, 'o2.'+test, 'asm2.'+test, 's_0_0.'+test, 's_0_1.'+test, 's_1_0.'+test, 's_1_1.'+test] + sys.argv = [sys.argv[0], 'default.'+test, 'o1.'+test, 'o2.'+test, 'asm2.'+test, 'asm2g.'+test, 's_0_0.'+test, 's_0_1.'+test, 's_1_0.'+test, 's_1_1.'+test] class T(RunnerCore): # Short name, to make it more fun to use manually on the commandline ## Does a complete test - builds, runs, checks output, etc. @@ -7311,6 +7311,8 @@ def process(filename): return output + self.emcc_args += ['--minify', '0'] # to compare the versions + def do_test(): self.do_run(open(path_from_root('tests', 'openjpeg', 'codec', 'j2k_to_image.c'), 'r').read(), 'Successfully generated', # The real test for valid output is in image_compare @@ -8524,8 +8526,8 @@ TT = %s exec('o2 = make_run("o2", compiler=CLANG, emcc_args=["-O2"])') # asm.js - #exec('asm = make_run("asm", compiler=CLANG, emcc_args=["-O0", "-s", "ASM_JS=1"])') exec('asm2 = make_run("asm2", compiler=CLANG, emcc_args=["-O2", "-s", "ASM_JS=1"])') + exec('asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-s", "ASM_JS=1", "-g"])') # Make custom runs with various options for compiler, quantum, embetter, typed_arrays, llvm_opts in [ @@ -8696,7 +8698,8 @@ Options that are modified or new in %s include: assert 'SAFE_HEAP' not in generated, 'safe heap should not be used by default' assert ': while(' not in generated, 'when relooping we also js-optimize, so there should be no labelled whiles' if closure: - assert 'Module._main=' in generated, 'closure compiler should have been run (and output should be minified)' + if opt_level <= 1: assert 'Module._main =' in generated, 'closure compiler should have been run' + elif opt_level >= 2: assert 'Module._main=' in generated, 'closure compiler should have been run (and output should be minified)' else: # closure has not been run, we can do some additional checks. TODO: figure out how to do these even with closure assert 'Module._main = ' not in generated, 'closure compiler should not have been run' @@ -8705,14 +8708,16 @@ Options that are modified or new in %s include: assert ('assert(STACKTOP < STACK_MAX' in generated) == (opt_level == 0), 'assertions should be in opt == 0' assert 'var $i;' in generated or 'var $i_0' in generated or 'var $storemerge3;' in generated or 'var $storemerge4;' in generated or 'var $i_04;' in generated, 'micro opts should always be on' if opt_level >= 2: - assert re.search('HEAP8\[\$?\w+ \+ \(+\$?\w+ ', generated) or re.search('HEAP8\[HEAP32\[', generated), 'eliminator should create compound expressions, and fewer one-time vars' # also in -O1, but easier to test in -O2 + assert re.search('HEAP8\[\$?\w+ ?\+ ?\(+\$?\w+ ?', generated) or re.search('HEAP8\[HEAP32\[', generated), 'eliminator should create compound expressions, and fewer one-time vars' # also in -O1, but easier to test in -O2 assert ('_puts(' in generated) == (opt_level >= 1), 'with opt >= 1, llvm opts are run and they should optimize printf to puts' - assert 'function _main() {' in generated, 'Should be unminified, including whitespace' + if opt_level <= 1 or '-g' in params: assert 'function _main() {' in generated, 'Should be unminified, including whitespace' + elif opt_level >= 2: assert 'function _main(){' in generated, 'Should be whitespace-minified' # emcc -s RELOOP=1 src.cpp ==> should pass -s to emscripten.py. --typed-arrays is a convenient alias for -s USE_TYPED_ARRAYS for params, test, text in [ - (['-s', 'ASM_JS=1', '-O2'], lambda generated: 'var i1 = 0' in generated, 'registerize is run by default in -O2'), - (['-s', 'ASM_JS=1', '-O2', '-g'], lambda generated: 'var i1 = 0' not in generated, 'registerize is cancelled by -g'), + (['-s', 'ASM_JS=1', '-O2'], lambda generated: 'var b=0' in generated and not 'function _main' in generated, 'registerize/minify is run by default in -O2'), + (['-s', 'ASM_JS=1', '-O2', '--minify', '0'], lambda generated: 'var b = 0' in generated and not 'function _main' in generated, 'minify is cancelled, but not registerize'), + (['-s', 'ASM_JS=1', '-O2', '-g'], lambda generated: 'var b=0' not in generated and 'var b = 0' not in generated and 'function _main' in generated, 'registerize/minify is cancelled by -g'), (['-s', 'INLINING_LIMIT=0'], lambda generated: 'function _dump' in generated, 'no inlining without opts'), (['-O3', '-s', 'INLINING_LIMIT=0', '--closure', '0'], lambda generated: 'function _dump' not in generated, 'lto/inlining'), (['-Os', '--llvm-lto', '1'], lambda generated: 'function _dump' in generated, '-Os disables inlining'), @@ -8927,6 +8932,32 @@ f.close() Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'test.cpp'), '-s', 'UNALIGNED_MEMORY=1']).communicate() self.assertContained('testString = Hello, World!', run_js(os.path.join(self.get_dir(), 'a.out.js'))) + def test_asm_minify(self): + def test(args): + Popen([PYTHON, EMCC, path_from_root('tests', 'hello_world_loop_malloc.cpp')] + args).communicate() + self.assertContained('hello, world!', run_js(self.in_dir('a.out.js'))) + return open(self.in_dir('a.out.js')).read() + + src = test([]) + assert 'function _malloc' in src + + src = test(['-O2', '-s', 'ASM_JS=1']) + normal_size = len(src) + print 'normal', normal_size + assert 'function _malloc' not in src + + src = test(['-O2', '-s', 'ASM_JS=1', '--minify', '0']) + unminified_size = len(src) + print 'unminified', unminified_size + assert unminified_size > normal_size + assert 'function _malloc' not in src + + src = test(['-O2', '-s', 'ASM_JS=1', '-g']) + debug_size = len(src) + print 'debug', debug_size + assert debug_size > unminified_size + assert 'function _malloc' in src + def test_l_link(self): # Linking with -lLIBNAME and -L/DIRNAME should work @@ -9639,6 +9670,8 @@ f.close() ['asm', 'eliminate']), (path_from_root('tools', 'test-js-optimizer-asm-regs.js'), open(path_from_root('tools', 'test-js-optimizer-asm-regs-output.js')).read(), ['asm', 'registerize']), + (path_from_root('tools', 'test-js-optimizer-asm-regs-min.js'), open(path_from_root('tools', 'test-js-optimizer-asm-regs-min-output.js')).read(), + ['asm', 'registerize']), (path_from_root('tools', 'test-js-optimizer-asm-pre.js'), open(path_from_root('tools', 'test-js-optimizer-asm-pre-output.js')).read(), ['asm', 'simplifyExpressionsPre']), (path_from_root('tools', 'test-js-optimizer-asm-last.js'), open(path_from_root('tools', 'test-js-optimizer-asm-last-output.js')).read(), @@ -9660,8 +9693,8 @@ f.close() try: os.environ['EMCC_DEBUG'] = '1' for asm, linkable, chunks, js_chunks in [ - (0, 0, 3, 2), (0, 1, 4, 4), - (1, 0, 3, 2), (1, 1, 4, 5) + (0, 0, 3, 2), (0, 1, 3, 4), + (1, 0, 3, 2), (1, 1, 3, 4) ]: print asm, linkable, chunks, js_chunks output, err = Popen([PYTHON, EMCC, path_from_root('tests', 'hello_libcxx.cpp'), '-O1', '-s', 'LINKABLE=%d' % linkable, '-s', 'ASM_JS=%d' % asm], stdout=PIPE, stderr=PIPE).communicate() diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index 9c744fa3..834c99e3 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -143,6 +143,8 @@ var FALSE_NODE = ['unary-prefix', '!', ['num', 1]]; var GENERATED_FUNCTIONS_MARKER = '// EMSCRIPTEN_GENERATED_FUNCTIONS'; var generatedFunctions = false; // whether we have received only generated functions +var minifierInfo = null; + function srcToAst(src) { return uglify.parser.parse(src); } @@ -218,12 +220,15 @@ function traverseGenerated(ast, pre, post, stack) { function traverseGeneratedFunctions(ast, callback) { assert(generatedFunctions); - traverse(ast, function(node) { - if (node[0] == 'defun') { - callback(node); - return null; + if (ast[0] == 'toplevel') { + var stats = ast[1]; + for (var i = 0; i < stats.length; i++) { + var curr = stats[i]; + if (curr[0] == 'defun') callback(curr); } - }); + } else if (ast[0] == 'defun') { + callback(ast); + } } // Walk the ast in a simple way, with an understanding of which JS variables are defined) @@ -535,9 +540,31 @@ function simplifyExpressionsPre(ast) { }); } + function addFinalReturns(ast) { + traverseGeneratedFunctions(ast, function(fun) { + var returnType = null; + traverse(fun, function(node, type) { + if (type == 'return' && node[1]) { + returnType = detectAsmCoercion(node[1]); + } + }); + // Add a final return if one is missing. + if (returnType !== null) { + var stats = getStatements(fun); + var last = stats[stats.length-1]; + if (last[0] != 'return') { + var returnValue = ['num', 0]; + if (returnType == ASM_DOUBLE) returnValue = ['unary-prefix', '+', returnValue]; + stats.push(['return', returnValue]); + } + } + }); + } + simplifyBitops(ast); joinAdditions(ast); // simplifyZeroComp(ast); TODO: investigate performance + if (asm) addFinalReturns(ast); } // In typed arrays mode 2, we can have @@ -1300,7 +1327,9 @@ function normalizeAsm(func) { var node = stats[i]; if (node[0] != 'stat' || node[1][0] != 'assign' || node[1][2][0] != 'name') break; node = node[1]; - data.params[node[2][1]] = detectAsmCoercion(node[3]); + var name = node[2][1]; + if (func[2] && func[2].indexOf(name) < 0) break; // not an assign into a parameter, but a global + data.params[name] = detectAsmCoercion(node[3]); stats[i] = emptyNode(); i++; } @@ -1393,7 +1422,10 @@ function denormalizeAsm(func, data) { //printErr('denormalized \n\n' + astToSrc(func) + '\n\n'); } -// Very simple 'registerization', coalescing of variables into a smaller number. +// Very simple 'registerization', coalescing of variables into a smaller number, +// as part of minification. Globals-level minification began in a previous pass, +// we receive minifierInfo which tells us how to rename globals. (Only in asm.js.) +// // We do not optimize when there are switches, so this pass only makes sense with // relooping. // TODO: Consider how this fits in with the rest of the optimization toolchain. Do @@ -1420,7 +1452,6 @@ function registerize(ast) { // We also mark local variables - i.e., having a var definition var localVars = {}; var hasSwitch = false; // we cannot optimize variables if there is a switch - var returnType = null; // for asm traverse(fun, function(node, type) { if (type == 'var') { node[1].forEach(function(defined) { localVars[defined[0]] = 1 }); @@ -1432,11 +1463,74 @@ function registerize(ast) { } } else if (type == 'switch') { hasSwitch = true; - } else if (asm && type == 'return' && node[1]) { - returnType = detectAsmCoercion(node[1]); } }); vacuum(fun); + if (minifierInfo) { + assert(asm); + var usedGlobals = {}; + var nextLocal = 0; + // Minify globals using the mapping we were given + traverse(fun, function(node, type) { + if (type == 'name') { + var name = node[1]; + var minified = minifierInfo.globals[name]; + if (minified) { + assert(!localVars[name], name); // locals must not shadow globals, or else we don't know which is which + if (localVars[minified]) { + // trying to minify a global into a name used locally. rename all the locals + var newName = '$_newLocal_' + (nextLocal++); + assert(!localVars[newName]); + if (params[minified]) { + params[newName] = 1; + delete params[minified]; + } + localVars[newName] = 1; + delete localVars[minified]; + asmData.vars[newName] = asmData.vars[minified]; + delete asmData.vars[minified]; + asmData.params[newName] = asmData.params[minified]; + delete asmData.params[minified]; + traverse(fun, function(node, type) { + if (type == 'name' && node[1] == minified) { + node[1] = newName; + } + }); + if (fun[2]) { + for (var i = 0; i < fun[2].length; i++) { + if (fun[2][i] == minified) fun[2][i] = newName; + } + } + } + node[1] = minified; + usedGlobals[minified] = 1; + } + } + }); + assert(fun[1] in minifierInfo.globals, fun[1]); + fun[1] = minifierInfo.globals[fun[1]]; + assert(fun[1]); + var nextRegName = 0; + } + var regTypes = {}; + function getNewRegName(num, name) { + if (!asm) return 'r' + num; + var type = asmData.vars[name]; + if (!minifierInfo) { + var ret = (type ? 'd' : 'i') + num; + regTypes[ret] = type; + return ret; + } + // find the next free minified name that is not used by a global that shows up in this function + while (nextRegName < minifierInfo.names.length) { + var ret = minifierInfo.names[nextRegName++]; + if (!usedGlobals[ret]) { + regTypes[ret] = type; + return ret; + } + } + assert('ran out of names'); + } // Find the # of uses of each variable. // While doing so, check if all a variable's uses are dominated in a simple // way by a simple assign, if so, then we can assign its register to it @@ -1521,7 +1615,7 @@ function registerize(ast) { saved++; } else { reg = nextReg++; - fullNames[reg] = (asm ? (asmData.vars[name] ? 'd' : 'i') : 'r') + reg; // TODO: even smaller names + fullNames[reg] = getNewRegName(reg, name); if (params[name]) paramRegs[reg] = 1; } varRegs[name] = reg; @@ -1565,7 +1659,7 @@ function registerize(ast) { if (loopRegs[loops]) { if (asm) { loopRegs[loops].forEach(function(loopReg) { - freeRegsClasses[fullNames[loopReg][0] == 'i' ? ASM_INT : ASM_DOUBLE].push(loopReg); + freeRegsClasses[regTypes[fullNames[loopReg]]].push(loopReg); }); } else { freeRegsClasses = freeRegsClasses.concat(loopRegs[loops]); @@ -1601,7 +1695,7 @@ function registerize(ast) { }; for (var i = 1; i < nextReg; i++) { var reg = fullNames[i]; - var type = reg[0] == 'i' ? ASM_INT : ASM_DOUBLE + var type = regTypes[reg]; if (!paramRegs[i]) { finalAsmData.vars[reg] = type; } else { @@ -1610,17 +1704,6 @@ function registerize(ast) { } } denormalizeAsm(fun, finalAsmData); - // Add a final return if one is missing. This is not strictly a register operation, but - // this pass traverses the entire AST anyhow so adding it here is efficient. - if (returnType !== null) { - var stats = getStatements(fun); - var last = stats[stats.length-1]; - if (last[0] != 'return') { - var returnValue = ['num', 0]; - if (returnType == ASM_DOUBLE) returnValue = ['unary-prefix', '+', returnValue]; - stats.push(['return', returnValue]); - } - } } }); } @@ -2155,6 +2238,43 @@ function eliminateMemSafe(ast) { eliminate(ast, true); } +function minifyGlobals(ast) { + var minified = {}; + var next = 0; + var first = true; // do not minify initial 'var asm =' + // find the globals + traverse(ast, function(node, type) { + if (type == 'var') { + if (first) { + first = false; + return; + } + var vars = node[1]; + for (var i = 0; i < vars.length; i++) { + var name = vars[i][0]; + assert(next < minifierInfo.names.length); + vars[i][0] = minified[name] = minifierInfo.names[next++]; + } + } + }); + // add all globals in function chunks, i.e. not here but passed to us + for (var i = 0; i < minifierInfo.globals.length; i++) { + name = minifierInfo.globals[i]; + assert(next < minifierInfo.names.length); + minified[name] = minifierInfo.names[next++]; + } + // apply minification + traverse(ast, function(node, type) { + if (type == 'name') { + var name = node[1]; + if (name in minified) { + node[1] = minified[name]; + } + } + }); + suffix = '// MINIFY_INFO:' + JSON.stringify(minified); +} + // Change +5 to DOT$ZERO(5). We then textually change 5 to 5.0 (uglify's ast cannot differentiate between 5 and 5.0 directly) function prepDotZero(ast) { traverse(ast, function(node, type) { @@ -2200,6 +2320,7 @@ var passes = { registerize: registerize, eliminate: eliminate, eliminateMemSafe: eliminateMemSafe, + minifyGlobals: minifyGlobals, compress: function() { compress = true }, noPrintMetadata: function() { printMetadata = false }, asm: function() { asm = true }, @@ -2208,10 +2329,15 @@ var passes = { // Main +var suffix = ''; + var src = read(arguments_[0]); var ast = srcToAst(src); //printErr(JSON.stringify(ast)); throw 1; generatedFunctions = src.indexOf(GENERATED_FUNCTIONS_MARKER) >= 0; +var minifierInfoStart = src.indexOf('// MINIFY_INFO:') +if (minifierInfoStart > 0) minifierInfo = JSON.parse(src.substr(minifierInfoStart + 15)); +//printErr(JSON.stringify(minifierInfo)); arguments_.slice(1).forEach(function(arg) { passes[arg](ast); @@ -2231,4 +2357,5 @@ do { } while (js != old); print(js); print('\n'); +print(suffix); diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index 13e6e4f6..60093bca 100644 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -1,5 +1,5 @@ -import os, sys, subprocess, multiprocessing, re +import os, sys, subprocess, multiprocessing, re, string, json import shared configuration = shared.configuration @@ -19,6 +19,78 @@ WINDOWS = sys.platform.startswith('win') DEBUG = os.environ.get('EMCC_DEBUG') +func_sig = re.compile('( *)function ([_\w$]+)\(') + +class Minifier: + ''' + asm.js minification support. We calculate possible names and minification of + globals here, then pass that into the parallel js-optimizer.js runners which + during registerize perform minification of locals. + ''' + + def __init__(self, js, js_engine): + self.js = js + self.js_engine = js_engine + + # Create list of valid short names + + MAX_NAMES = 6000#0 + INVALID_2 = set(['do', 'if', 'in']) + INVALID_3 = set(['for', 'new', 'try', 'var', 'env']) + + self.names = [] + init_possibles = string.ascii_letters + '_$' + later_possibles = init_possibles + string.digits + for a in init_possibles: + if len(self.names) >= MAX_NAMES: break + self.names.append(a) + for a in init_possibles: + for b in later_possibles: + if len(self.names) >= MAX_NAMES: break + curr = a + b + if curr not in INVALID_2: self.names.append(curr) + for a in init_possibles: + for b in later_possibles: + for c in later_possibles: + if len(self.names) >= MAX_NAMES: break + curr = a + b + c + if curr not in INVALID_3: self.names.append(curr) + #print >> sys.stderr, self.names + + def minify_shell(self, shell, compress): + #print >> sys.stderr, "MINIFY SHELL 1111111111", shell, "\n222222222222222" + # Run through js-optimizer.js to find and minify the global symbols + # We send it the globals, which it parses at the proper time. JS decides how + # to minify all global names, we receive a dictionary back, which is then + # used by the function processors + + shell = shell.replace('0.0', '13371337') # avoid uglify doing 0.0 => 0 + + # Find all globals in the JS functions code + self.globs = [m.group(2) for m in func_sig.finditer(self.js)] + + temp_file = temp_files.get('.minifyglobals.js').name + f = open(temp_file, 'w') + f.write(shell) + f.write('\n') + self + f.write('// MINIFY_INFO:' + self.serialize()) + f.close() + + output = subprocess.Popen(self.js_engine + [JS_OPTIMIZER, temp_file, 'minifyGlobals', 'noPrintMetadata'] + (['compress'] if compress else []), stdout=subprocess.PIPE).communicate()[0] + assert len(output) > 0 and not output.startswith('Assertion failed'), 'Error in js optimizer: ' + output + #print >> sys.stderr, "minified SHELL 3333333333333333", output, "\n44444444444444444444" + code, metadata = output.split('// MINIFY_INFO:') + self.globs = json.loads(metadata) + return code.replace('13371337', '0.0') + + + def serialize(self): + return json.dumps({ + 'names': self.names, + 'globals': self.globs + }) + def run_on_chunk(command): filename = command[2] # XXX hackish #print >> sys.stderr, 'running js optimizer command', ' '.join(command), '""""', open(filename).read() @@ -46,10 +118,25 @@ def run_on_js(filename, passes, js_engine, jcache): suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: - suffix = js[suffix_start:js.find('\n', suffix_start)] + '\n' + suffix_end = js.find('\n', suffix_start) + suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker)+1:])) + # Find markers + start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' + end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' + start_funcs = js.find(start_funcs_marker) + end_funcs = js.rfind(end_funcs_marker) + assert (start_funcs >= 0) == (end_funcs >= 0) == (not not suffix) + asm_registerize = 'asm' in passes and 'registerize' in passes + if asm_registerize: + start_asm_marker = '// EMSCRIPTEN_START_ASM\n' + end_asm_marker = '// EMSCRIPTEN_END_ASM\n' + start_asm = js.find(start_asm_marker) + end_asm = js.rfind(end_asm_marker) + assert (start_asm >= 0) == (end_asm >= 0) + if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure @@ -57,29 +144,45 @@ def run_on_js(filename, passes, js_engine, jcache): if DEBUG: print >>sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False - # If we process only generated code, find that and save the rest on the side - func_sig = re.compile('( *)function (_[\w$]+)\(') if suffix: - pos = 0 - gen_start = 0 - gen_end = 0 - while 1: - m = func_sig.search(js, pos) - if not m: break - pos = m.end() - indent = m.group(1) - ident = m.group(2) - if ident in generated: - if not gen_start: - gen_start = m.start() - assert gen_start - gen_end = js.find('\n%s}\n' % indent, m.end()) + (3 + len(indent)) - assert gen_end > gen_start - pre = js[:gen_start] - post = js[gen_end:] + if not asm_registerize: + pre = js[:start_funcs + len(start_funcs_marker)] + post = js[end_funcs + len(end_funcs_marker):] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post + class Finals: + buf = [] + def process(line): + if len(line) > 0 and not line.startswith((' ', 'function', '}')): + Finals.buf.append(line) + return False + return True + js = '\n'.join(filter(process, js.split('\n'))) + post = '\n'.join(Finals.buf) + '\n' + post + post = end_funcs_marker + post + else: + # We need to split out the asm shell as well, for minification + pre = js[:start_asm + len(start_asm_marker)] + post = js[end_asm:] + asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' +EMSCRIPTEN_FUNCS(); +''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + + minifier = Minifier(js, js_engine) + asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'compress' in passes).split('EMSCRIPTEN_FUNCS();'); + asm_shell_post = asm_shell_post.replace('});', '})'); + pre += asm_shell_pre + '\n' + start_funcs_marker + post = end_funcs_marker + asm_shell_post + post + + minify_info = minifier.serialize() + #if DEBUG: print >> sys.stderr, 'minify info:', minify_info + # remove suffix if no longer needed if 'last' in passes: - post = post.replace(suffix, '') # no need to write out the metadata - nothing after us needs it - js = js[gen_start:gen_end] + suffix_start = post.find(suffix_marker) + suffix_end = post.find('\n', suffix_start) + post = post[:suffix_start] + post[suffix_end:] + else: pre = '' post = '' @@ -132,6 +235,9 @@ def run_on_js(filename, passes, js_engine, jcache): f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) + if asm_registerize: + f.write('\n') + f.write('// MINIFY_INFO:' + minify_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] diff --git a/tools/test-js-optimizer-asm-regs-min-output.js b/tools/test-js-optimizer-asm-regs-min-output.js new file mode 100644 index 00000000..b8088022 --- /dev/null +++ b/tools/test-js-optimizer-asm-regs-min-output.js @@ -0,0 +1,36 @@ +function cl(b) { + b = b | 0; + var c = 0; + c = b * b; + a(c); + i1(b); +} +function cl(b) { + b = b | 0; + var c = 0; + c = b * b; + a(c); + i1(b); +} +function cl(b) { + b = b | 0; + var c = 0; + c = b * b; + a(c); + i1(b); +} +function cl(b) { + b = b | 0; + var c = 0; + c = b * b; + a(c); + i1(b); +} +function cl(b) { + b = b | 0; + var c = 0; + c = b * b; + a(c); + i1(b); +} + diff --git a/tools/test-js-optimizer-asm-regs-min.js b/tools/test-js-optimizer-asm-regs-min.js new file mode 100644 index 00000000..c126946d --- /dev/null +++ b/tools/test-js-optimizer-asm-regs-min.js @@ -0,0 +1,37 @@ +function collideLocal(x) { + x = x | 0; + var a = 0; + a = x*x; + aGlobal(a); // aGlobal needs to be minified into a, but a is used! + bGlobal(x); +} +function collideLocal(x) { + x = x | 0; + var i1 = 0; + i1 = x*x; + aGlobal(i1); + bGlobal(x); // bGlobal needs to be minified into i1, but i1 is used! +} +function collideLocal(a) { + a = a | 0; + var x = 0; + x = a*a; + aGlobal(x); // aGlobal needs to be minified into a, but a is used by a param! + bGlobal(a); +} +function collideLocal(i1) { + i1 = i1 | 0; + var x = 0; + x = i1*i1; + aGlobal(x); + bGlobal(i1); // bGlobal needs to be minified into i1, but i1 is used by a param! +} +function collideLocal(i1) { + i1 = i1 | 0; + var a = 0; + a = i1*i1; + aGlobal(a); // multiple collisions, a and i1 + bGlobal(i1); +} +// EMSCRIPTEN_GENERATED_FUNCTIONS +// MINIFY_INFO: { "names": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "i1", "cl"], "globals": { "aGlobal": "a", "bGlobal": "i1", "collideLocal": "cl" } } diff --git a/tools/test-js-optimizer-asm-regs-output.js b/tools/test-js-optimizer-asm-regs-output.js index 99bccd2e..8c0bd970 100644 --- a/tools/test-js-optimizer-asm-regs-output.js +++ b/tools/test-js-optimizer-asm-regs-output.js @@ -38,4 +38,8 @@ function retf() { } return +0; } +function stackRestore(i1) { + i1 = i1 | 0; + STACKTOP = i1; +} diff --git a/tools/test-js-optimizer-asm-regs.js b/tools/test-js-optimizer-asm-regs.js index 0afced29..a8b637ce 100644 --- a/tools/test-js-optimizer-asm-regs.js +++ b/tools/test-js-optimizer-asm-regs.js @@ -41,5 +41,9 @@ function retf() { } // missing final return, need it as a float } -// EMSCRIPTEN_GENERATED_FUNCTIONS: ["asm", "_doit", "rett", "ret2t", "retf"] +function stackRestore(top) { + top = top|0; + STACKTOP = top; +} +// EMSCRIPTEN_GENERATED_FUNCTIONS: ["asm", "_doit", "rett", "ret2t", "retf", "stackRestore"] |