diff options
Diffstat (limited to 'tools/js-optimizer.js')
-rw-r--r-- | tools/js-optimizer.js | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index e7a745e1..bc13354b 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -1545,6 +1545,11 @@ function makeAsmVarDef(v, type) { return [v, type === ASM_INT ? ['num', 0] : ['unary-prefix', '+', ['num', 0]]]; } +function getAsmType(asmInfo, name) { + if (name in asmInfo.vars) return asmInfo.vars[name]; + return asmInfo.params[name]; +} + function normalizeAsm(func) { //printErr('pre-normalize \n\n' + astToSrc(func) + '\n\n'); var data = { @@ -2796,6 +2801,329 @@ function relocate(ast) { }); } +// Break up very large functions + +var NODES_WITHOUT_ELIMINATION_SENSITIVITY = set('name', 'num', 'binary', 'unary-prefix'); +var FAST_ELIMINATION_BINARIES = setUnion(setUnion(USEFUL_BINARY_OPS, COMPARE_OPS), set('+')); + +function outline(ast) { + function measureSize(ast) { + var size = 0; + traverse(ast, function() { + size++; + }); + return size; + } + + function aggressiveVariableElimination(func, asmData) { + // This removes as many variables as possible. This is often not the best thing because it increases + // code size, but it is far preferable to the risk of split functions needing to do more spilling. Specifically, + // it finds 'trivial' variables: ones with 1 definition, and that definition is not sensitive to any changes: it + // only depends on constants and local variables that are themselves trivial. We can unquestionably eliminate + // such variables in a trivial manner. + + var assignments = {}; + var appearances = {}; + var defs = {}; + var considered = {}; + + traverse(func, function(node, type) { + if (type == 'assign' && node[2][0] == 'name') { + var name = node[2][1]; + if (name in asmData.vars) { + assignments[name] = (assignments[name] || 0) + 1; + appearances[name] = (appearances[name] || 0) - 1; // this appearance is a definition, offset the counting later + defs[name] = node; + } else { + if (name in asmData.params) { + considered[name] = true; // this parameter is not ssa, it must be in a hand-optimized function, so it is not trivial + } + } + } else if (type == 'name') { + var name = node[1]; + if (name in asmData.vars) { + appearances[name] = (appearances[name] || 0) + 1; + } + } + }); + + var allTrivials = {}; // key of a trivial var => size of its (expanded) value, at least 1 + + // three levels of variables: + // 1. trivial: 1 def (or less), uses nothing sensitive, can be eliminated + // 2. safe: 1 def (or less), can be used in a trivial, but cannot itself be eliminated + // 3. sensitive: uses a global or memory or something else that prevents trivial elimination. + + function assessTriviality(name) { + // only care about vars with 0-1 assignments of (0 for parameters), and can ignore label (which is not explicitly initialized, but cannot be eliminated ever anyhow) + if (assignments[name] > 1 || (!(name in asmData.vars) && !(name in asmData.params)) || name == 'label') return false; + if (considered[name]) return allTrivials[name]; + considered[name] = true; + var sensitive = false; + var size = 0, originalSize = 0; + var def = defs[name]; + if (def) { + var value = def[3]; + originalSize = measureSize(value); + if (value) { + traverse(value, function recurseValue(node, type) { + var one = node[1]; + if (!(type in NODES_WITHOUT_ELIMINATION_SENSITIVITY)) { // || (type == 'binary' && !(one in FAST_ELIMINATION_BINARIES))) { + sensitive = true; + return true; + } + if (type == 'name' && !assessTriviality(one)) { + if (assignments[one] > 1 || (!(one in asmData.vars) && !(one in asmData.params))) { + sensitive = true; // directly using something sensitive + return true; + } // otherwise, not trivial, but at least safe. + } + // if this is a name, it must be a trivial variable (or a safe one) and we know its size + size += ((type == 'name') ? allTrivials[one] : 1) || 1; + }); + } + } + if (!sensitive) { + size = size || 1; + originalSize = originalSize || 1; + var factor = ((appearances[name] - 1) || 0) * (size - originalSize); // If no size change or just one appearance, always ok to trivially eliminate. otherwise, tradeoff + if (factor <= 12) { + allTrivials[name] = size; // trivial! + return true; + } + } + return false; + } + for (var name in asmData.vars) { + assessTriviality(name); + } + var trivials = {}; + + for (var name in allTrivials) { // from now on, ignore parameters + if (name in asmData.vars) trivials[name] = true; + } + + allTrivials = {}; + + var values = {}; + + function evaluate(name) { + var node = values[name]; + if (node) return node; + values[node] = null; // prevent infinite recursion + var def = defs[name]; + if (def) { + node = def[3]; + if (node[0] == 'name') { + var name2 = node[1]; + if (name2 in trivials) { + node = evaluate(name2); + } + } else { + traverse(node, function(node, type) { + if (type == 'name') { + var name2 = node[1]; + if (name2 in trivials) { + return evaluate(name2); + } + } + }); + } + values[name] = node; + } + return node; + } + + for (var name in trivials) { + evaluate(name); + } + + for (var name in trivials) { + var def = defs[name]; + if (def) { + def.length = 0; + def[0] = 'toplevel'; + def[1] = []; + } + delete asmData.vars[name]; + } + + // Perform replacements TODO: save list of uses objects before, replace directly, avoid extra traverse + traverse(func, function(node, type) { + if (type == 'name') { + var name = node[1]; + if (name in trivials) { + var value = values[name]; + if (!value) throw 'missing value: ' + [func[1], name, values[name]] + ' - faulty reliance on asm zero-init?'; + return copy(value); // must copy, or else the same object can be used multiple times + } + } + }); + } + + // Prepares information for spilling of local variables + function analyzeFunction(func, asmData) { + var stack = []; // list of variables, each gets 8 bytes + for (var name in asmData.params) { + stack.push(name); + } + for (var name in asmData.vars) { + stack.push(name); + } + asmData.stackPos = {}; + for (var i = 0; i < stack.length; i++) { + asmData.stackPos[stack[i]] = i*8; + } + + asmData.splitCounter = 0; + } + + // Analyze uses - reads and writes - of variables in part of the AST of a function + function analyzeVariables(func, asmData, ast) { + var writes = {}; + var appearances = {}; + + traverse(ast, function(node, type) { + if (type == 'assign' && node[2][0] == 'name') { + var name = node[2][1]; + if (name in asmData.vars || name in asmData.params) { + writes[name] = 0; + appearances[name] = (appearances[name] || 0) - 1; // this appearance is a definition, offset the counting later + } + } else if (type == 'name') { + var name = node[1]; + if (name in asmData.vars || name in asmData.params) { + appearances[name] = (appearances[name] || 0) + 1; + } + } + }); + + var reads = {}; + + for (var name in appearances) { + if (appearances[name] > 0) reads[name] = 0; + } + + return { writes: writes, reads: reads }; + } + + var sizeToOutline = extraInfo.sizeToOutline; + var level = 0; + + function doOutline(func, asmData, stats, start, end) { + printErr(' do outline ' + [func[1], level, 'range:', start, end, 'of', stats.length]); + var code = stats.slice(start, end+1); + var newIdent = func[1] + '$' + (asmData.splitCounter++); + // add spills and reads before and after the call to the outlined code + var varInfo = analyzeVariables(func, asmData, code); + var reps = []; + for (var v in varInfo.reads) { + if (v != 'sp') { + reps.push(['stat', ['assign', true, ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]); + } + } + reps.push(['stat', ['call', ['name', newIdent], [['name', 'sp']]]]); + for (var v in varInfo.writes) { + reps.push(['stat', ['assign', true, ['name', v], ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]]]]); + } + stats.splice.apply(stats, [start, end-start+1].concat(reps)); + // Generate new function + var newFunc = ['defun', newIdent, ['sp'], code]; + var newAsmInfo = { params: { sp: ASM_INT }, vars: {} }; + for (var v in varInfo.reads) { + newAsmInfo.vars[v] = getAsmType(asmData, v); + } + for (var v in varInfo.writes) { + newAsmInfo.vars[v] = getAsmType(asmData, v); + } + denormalizeAsm(newFunc, newAsmInfo); + return [newFunc]; + } + + function outlineStatements(func, asmData, stats) { + level++; + if (measureSize(stats) < sizeToOutline) return; + var ret = []; + var sizeSeen = 0; + var end = stats.length-1; + var i = stats.length; + while (--i >= 0) { + var stat = stats[i]; + var size = measureSize(stat); + //printErr(level + ' size ' + [i, size]); + if (size >= sizeToOutline) { + // this by itself is big enough to inline, recurse into it and find statements to split on + var subStatements = null; + traverse(stat, function(node, type) { + if (type == 'block') { + if (measureSize(node) >= sizeToOutline) { + var subRet = outlineStatements(func, asmData, node[1]); + if (subRet && subRet.length > 0) ret.push.apply(ret, subRet); + } + return null; // do not recurse into children, outlineStatements will do so if necessary + } + }); + sizeSeen = 0; + continue; + } + sizeSeen += size; + if (sizeSeen >= sizeToOutline) { + if (i == 0 && end == stats.length-1) { + // we have the full range here, so inlining would do nothing useful + if (stats.length >= 2) { + // at least split this function in half + i = Math.floor(stats.length/2); + end = stats.length-1; + } else { + break; + } + } + ret.push.apply(ret, doOutline(func, asmData, stats, i, end)); // outline [i, .. ,end] inclusive + sizeSeen = 0; + end = i-1; + } + } + level--; + return ret; + } + + // + + var newFuncs = []; + + traverseGeneratedFunctions(ast, function(func) { + var asmData = normalizeAsm(func); + var size = measureSize(func); + if (size >= sizeToOutline) { + aggressiveVariableElimination(func, asmData); + analyzeFunction(func, asmData); + var ret = outlineStatements(func, asmData, getStatements(func)); + if (ret && ret.length > 0) newFuncs.push.apply(newFuncs, ret); + } + denormalizeAsm(func, asmData); + }); + + // TODO: control flow: route returns and breaks + // TODO: recurse into new functions, must be careful though so as to not quickly re-outline and leave an intermediary skeletal function + + if (newFuncs.length > 0) { + // We have outlined. Add stack support: header in which we allocate enough stack space TODO + // If sp was not present before, add it and before each return, pop the stack TODO + // (none of this should be done in inner functions, of course, just the original) + + // add new functions to the toplevel, or create a toplevel if there isn't one + if (ast[0] === 'toplevel') { + var stats = ast[1]; + stats.push.apply(stats, newFuncs); + } else if (ast[0] === 'defun') { + newFuncs.unshift(copy(ast)); + ast.length = 0; + ast[0] = 'toplevel'; + ast[1] = newFuncs; + } + } +} + // Last pass utilities // Change +5 to DOT$ZERO(5). We then textually change 5 to 5.0 (uglify's ast cannot differentiate between 5 and 5.0 directly) @@ -2880,6 +3208,7 @@ var passes = { eliminateMemSafe: eliminateMemSafe, minifyGlobals: minifyGlobals, relocate: relocate, + outline: outline, minifyWhitespace: function() { minifyWhitespace = true }, noPrintMetadata: function() { printMetadata = false }, asm: function() { asm = true }, |