diff options
Diffstat (limited to 'tools/js-optimizer.js')
-rw-r--r-- | tools/js-optimizer.js | 357 |
1 files changed, 314 insertions, 43 deletions
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index e508cf40..50ddaf8b 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -136,6 +136,9 @@ var CONTROL_FLOW = set('do', 'while', 'for', 'if', 'switch'); var NAME_OR_NUM = set('name', 'num'); var ASSOCIATIVE_BINARIES = set('+', '*', '|', '&', '^'); +var BREAK_CAPTURERS = set('do', 'while', 'for', 'switch'); +var CONTINUE_CAPTURERS = LOOP; + var NULL_NODE = ['name', 'null']; var UNDEFINED_NODE = ['unary-prefix', 'void', ['num', 0]]; var TRUE_NODE = ['unary-prefix', '!', ['num', 0]]; @@ -405,7 +408,7 @@ function removeUnneededLabelSettings(ast) { // Various expression simplifications. Pre run before closure (where we still have metadata), Post run after. var USEFUL_BINARY_OPS = set('<<', '>>', '|', '&', '^'); -var COMPARE_OPS = set('<', '<=', '>', '>=', '==', '===', '!='); +var COMPARE_OPS = set('<', '<=', '>', '>=', '==', '===', '!=', '!=='); function simplifyExpressionsPre(ast) { // Simplify common expressions used to perform integer conversion operations @@ -504,6 +507,8 @@ function simplifyExpressionsPre(ast) { stack.push(1); } else if ((type === 'binary' && node[1] in SAFE_BINARY_OPS) || type === 'num' || type === 'name') { stack.push(0); // This node is safe in that it does not interfere with this optimization + } else if (type === 'unary-prefix' && node[1] === '~') { + stack.push(1); } else { stack.push(-1); // This node is dangerous! Give up if you see this before you see '1' } @@ -554,6 +559,12 @@ function simplifyExpressionsPre(ast) { } } } + } else if (type === 'binary' && node[1] === '^') { + // LLVM represents bitwise not as xor with -1. Translate it back to an actual bitwise not. + if (node[3][0] === 'unary-prefix' && node[3][1] === '-' && node[3][2][0] === 'num' && + node[3][2][1] === 1) { + return ['unary-prefix', '~', node[2]]; + } } else if (type === 'binary' && node[1] === '>>' && node[3][0] === 'num' && node[2][0] === 'binary' && node[2][1] === '<<' && node[2][3][0] === 'num' && node[2][2][0] === 'sub' && node[2][2][1][0] === 'name') { @@ -1523,10 +1534,15 @@ function unVarify(vars, ret) { // transform var x=1, y=2 etc. into (x=1, y=2), i var ASM_INT = 0; var ASM_DOUBLE = 1; -function detectAsmCoercion(node) { +function detectAsmCoercion(node, asmInfo) { // for params, +x vs x|0, for vars, 0.0 vs 0 if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE; - return node[0] === 'unary-prefix' ? ASM_DOUBLE : ASM_INT; + if (node[0] === 'unary-prefix') return ASM_DOUBLE; + if (asmInfo && node[0] == 'name') { + if (node[1] in asmInfo.vars) return asmInfo.vars[node[1]]; + if (node[1] in asmInfo.params) return asmInfo.params[node[1]]; + } + return ASM_INT; } function makeAsmParamCoercion(param, type) { @@ -1537,6 +1553,11 @@ function makeAsmVarDef(v, type) { return [v, type === ASM_INT ? ['num', 0] : ['unary-prefix', '+', ['num', 0]]]; } +function getAsmType(asmInfo, name) { + if (name in asmInfo.vars) return asmInfo.vars[name]; + return asmInfo.params[name]; +} + function normalizeAsm(func) { //printErr('pre-normalize \n\n' + astToSrc(func) + '\n\n'); var data = { @@ -2948,15 +2969,242 @@ function outline(ast) { }); } + // Prepares information for spilling of local variables + function analyzeFunction(func, asmData) { + var stack = []; // list of variables, each gets 8 bytes + for (var name in asmData.params) { + stack.push(name); + } + for (var name in asmData.vars) { + stack.push(name); + } + asmData.stackPos = {}; + for (var i = 0; i < stack.length; i++) { + asmData.stackPos[stack[i]] = i*8; + } + // Reserve an extra two spots: one for control flow var, the other for control flow data + asmData.stackSize = (stack.length + 2)*8; + asmData.controlStackPos = asmData.stackSize - 16; + asmData.controlDataStackPos = asmData.stackSize - 8; + asmData.splitCounter = 0; + } + + // Analyze uses - reads and writes - of variables in part of the AST of a function + function analyzeCode(func, asmData, ast) { + var labels = {}; + var labelCounter = 1; // 0 means no label + + traverse(ast, function(node, type) { + if ((type == 'label' || type in LOOP_FLOW) && node[1] && !(node[1] in labels)) { + labels[node[1]] = labelCounter++; + } + }); + + var writes = {}; + var appearances = {}; + var hasReturn = false, hasBreak = false, hasContinue = false; + var breaks = {}; // set of labels we break or continue + var continues = {}; // to. '0' is an unlabeled one + var breakCapturers = 0; + var continueCapturers = 0; + + traverse(ast, function(node, type) { + if (type == 'assign' && node[2][0] == 'name') { + var name = node[2][1]; + if (name in asmData.vars || name in asmData.params) { + writes[name] = 0; + appearances[name] = (appearances[name] || 0) - 1; // this appearance is a definition, offset the counting later + } + } else if (type == 'name') { + var name = node[1]; + if (name in asmData.vars || name in asmData.params) { + appearances[name] = (appearances[name] || 0) + 1; + } + } else if (type == 'return') { + hasReturn = true; + } else if (type == 'break') { + var label = node[1] || 0; + if (!label && breakCapturers > 0) return; // no label, and captured + if (label && (label in labels)) return; // label, and defined in this code, so captured + breaks[label || 0] = 0; + hasBreak = true; + } else if (type == 'continue') { + var label = node[1] || 0; + if (!label && continueCapturers > 0) return; // no label, and captured + if (label && (label in labels)) return; // label, and defined in this code, so captured + continues[label || 0] = 0; + hasContinue = true; + } else { + if (type in BREAK_CAPTURERS) { + breakCapturers++; + } + if (type in CONTINUE_CAPTURERS) { + continueCapturers++; + } + } + }, function(node, type) { + if (type in BREAK_CAPTURERS) { + breakCapturers--; + } + if (type in CONTINUE_CAPTURERS) { + continueCapturers--; + } + }); + + var reads = {}; + + for (var name in appearances) { + if (appearances[name] > 0) reads[name] = 0; + } + + return { writes: writes, reads: reads, hasReturn: hasReturn, breaks: breaks, continues: continues, labels: labels }; + } + + function makeAssign(dst, src) { + return ['assign', true, dst, src]; + } + function makeStackAccess(type, pos) { // TODO: float64, not 32 + return ['sub', ['name', type == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', pos]], ['num', '2']]]; + } + function makeIf(cond, then, else_) { + var ret = ['if', cond, ['block', then]]; + if (else_) ret.push(['block', else_]); + return ret; + } + function makeComparison(left, comp, right) { + return ['binary', comp, left, right]; + } + + var CONTROL_BREAK = 1, CONTROL_BREAK_LABEL = 2, CONTROL_CONTINUE = 3, CONTROL_CONTINUE_LABEL = 4, CONTROL_RETURN_VOID = 5, CONTROL_RETURN_INT = 6, CONTROL_RETURN_DOUBLE = 7; + var sizeToOutline = extraInfo.sizeToOutline; var level = 0; - function doOutline(func, asmData, stats, i, end) { - printErr(' do outline ' + [func[1], level, 'range:', i, end, 'of', stats.length]); - return [emptyNode()]; + function doOutline(func, asmData, stats, start, end) { + printErr(' do outline ' + [func[1], level, 'range:', start, end, 'of', stats.length]); + var code = stats.slice(start, end+1); + var newIdent = func[1] + '$' + (asmData.splitCounter++); + // add spills and reads before and after the call to the outlined code, and in the outlined code itself + var codeInfo = analyzeCode(func, asmData, code); + var reps = []; + for (var v in codeInfo.reads) { + if (v != 'sp') { + reps.push(['stat', ['assign', true, ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]); + code.unshift(['stat', ['assign', true, ['name', v], ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]]]]); + } + } + reps.push(['stat', ['call', ['name', newIdent], [['name', 'sp']]]]); + for (var v in codeInfo.writes) { + reps.push(['stat', ['assign', true, ['name', v], ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]]]]); + code.push(['stat', ['assign', true, ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]); + } + // Generate new function + if (codeInfo.hasReturn || codeInfo.hasBreak || codeInfo.hasContinue) { + // we need to capture all control flow using a top-level labeled one-time loop in the outlined function + code = [['label', 'OL', ['do', ['num', 0], ['block', code]]]]; + var breakCapturers = 0; + var continueCapturers = 0; + traverse(code, function(node, type) { + // replace all break/continue/returns with code to break out of the main one-time loop, and set the control data + if (type == 'return') { + var ret = ['break', 'OL']; + if (!node[1]) { + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', CONTROL_RETURN_VOID]), ret]; + } else { + var type = detectAsmCoercion(node[1], asmData); + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', type == ASM_INT ? CONTROL_RETURN_INT : CONTROL_RETURN_DOUBLE]), ret]; + ret = ['seq', makeAssign(makeStackAccess(type, asmData.controlDataStackPos), node[1]), ret]; + } + return ret; + } else if (type == 'break') { + var label = node[1] || 0; + if (label == 'OL') return; // this was just added before us, it is new replacement code + if (!label && breakCapturers > 0) return; // no label, and captured + if (label && (label in codeInfo.labels)) return; // label, and defined in this code, so captured + var ret = ['break', 'OL']; + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', label ? CONTROL_BREAK_LABEL : CONTROL_BREAK]), ret]; + if (label) { + assert(label in codeInfo.labels, label + ' in ' + keys(codeInfo.labels)); + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlDataStackPos), ['num', codeInfo.labels[label]]), ret]; + } + return ret; + } else if (type == 'continue') { + var label = node[1] || 0; + if (!label && continueCapturers > 0) return; // no label, and captured + if (label && (label in codeInfo.labels)) return; // label, and defined in this code, so captured + var ret = ['break', 'OL']; + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', label ? CONTROL_CONTINUE_LABEL : CONTROL_CONTINUE]), ret]; + if (label) { + ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlDataStackPos), ['num', codeInfo.labels[label]]), ret]; + } + return ret; + } else { + if (type in BREAK_CAPTURERS) { + breakCapturers++; + } + if (type in CONTINUE_CAPTURERS) { + continueCapturers++; + } + } + }, function(node, type) { + if (type in BREAK_CAPTURERS) { + breakCapturers--; + } + if (type in CONTINUE_CAPTURERS) { + continueCapturers--; + } + }); + // read the control data at the callsite to the outlined function + if (codeInfo.hasReturn) { + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_RETURN_VOID]), + [['stat', ['return']]] + )); + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_RETURN_INT]), + [['stat', ['return', makeStackAccess(ASM_INT, asmData.controlDataStackPos)]]] + )); + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_RETURN_DOUBLE]), + [['stat', ['return', makeStackAccess(ASM_DOUBLE, asmData.controlDataStackPos)]]] + )); + } + if (codeInfo.hasBreak) { + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_BREAK]), + ['stat', ['break']] + )); + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_BREAK_LABEL]), + ['stat', ['break', makeStackAccess(ASM_INT, asmData.controlDataStackPos)]] + )); + } + if (codeInfo.hasContinue) { + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_CONTINUE]), + ['stat', ['break']] + )); + reps.push(makeIf( + makeComparison(makeStackAccess(ASM_INT, asmData.controlStackPos), '==', ['num', CONTROL_CONTINUE_LABEL]), + ['stat', ['continue', makeStackAccess(ASM_INT, asmData.controlDataStackPos)]] + )); + } + } + var newFunc = ['defun', newIdent, ['sp'], code]; + var newAsmData = { params: { sp: ASM_INT }, vars: {} }; + for (var v in codeInfo.reads) { + newAsmData.vars[v] = getAsmType(asmData, v); + } + for (var v in codeInfo.writes) { + newAsmData.vars[v] = getAsmType(asmData, v); + } + denormalizeAsm(newFunc, newAsmData); + // replace in stats + stats.splice.apply(stats, [start, end-start+1].concat(reps)); + return [newFunc]; } - function outlineStatements(func, asmData, stats) { + function outlineStatements(func, asmData, stats, maxSize) { level++; if (measureSize(stats) < sizeToOutline) return; var ret = []; @@ -2973,7 +3221,7 @@ function outline(ast) { traverse(stat, function(node, type) { if (type == 'block') { if (measureSize(node) >= sizeToOutline) { - var subRet = outlineStatements(func, asmData, node[1]); + var subRet = outlineStatements(func, asmData, node[1], maxSize); if (subRet && subRet.length > 0) ret.push.apply(ret, subRet); } return null; // do not recurse into children, outlineStatements will do so if necessary @@ -2983,17 +3231,19 @@ function outline(ast) { continue; } sizeSeen += size; - if (sizeSeen >= sizeToOutline) { - if (i == 0 && end == stats.length-1) { - // we have the full range here, so inlining would do nothing useful - if (stats.length >= 2) { - // at least split this function in half - i = Math.floor(stats.length/2); - end = stats.length-1; - } else { - break; - } + // If this is big enough to outline, but no too big (if very close to the size of the full function, + // outlining is pointless; remove stats from the end to try to achieve the good case), then outline. + // Also, try to reduce the size if it is much larger than the hoped-for size + while ((sizeSeen > maxSize || sizeSeen > 2*sizeToOutline) && i < end) { + sizeSeen -= measureSize(stats[end]); + if (sizeSeen >= sizeToOutline) { + end--; + } else { + sizeSeen += measureSize(stats[end]); // abort, this took away too much + break; } + } + if (sizeSeen >= sizeToOutline && sizeSeen <= maxSize) { ret.push.apply(ret, doOutline(func, asmData, stats, i, end)); // outline [i, .. ,end] inclusive sizeSeen = 0; end = i-1; @@ -3005,29 +3255,44 @@ function outline(ast) { // - var newFuncs = []; + if (ast[0] !== 'toplevel') { + assert(ast[0] == 'defun'); + ast = ['toplevel', [ast]]; + } - traverseGeneratedFunctions(ast, function(func) { - var asmData = normalizeAsm(func); - var size = measureSize(func); - if (size >= sizeToOutline) { - aggressiveVariableElimination(func, asmData); - var ret = outlineStatements(func, asmData, getStatements(func)); - if (ret && ret.length > 0) newFuncs.push.apply(newFuncs, ret); - } - denormalizeAsm(func, asmData); - }); + var funcs = ast[1]; + + var more = true; + while (more) { + more = false; - if (newFuncs.length > 0) { - // add new functions to the toplevel, or create a toplevel if there isn't one - if (ast[0] === 'toplevel') { - var stats = ast[1]; - stats.push.apply(stats, newFuncs); - } else if (ast[0] === 'defun') { - newFuncs.unshift(copy(ast)); - ast.length = 0; - ast[0] = 'toplevel'; - ast[1] = newFuncs; + var newFuncs = []; + + funcs.forEach(function(func) { + var asmData = normalizeAsm(func); + var size = measureSize(func); + if (size >= sizeToOutline) { + aggressiveVariableElimination(func, asmData); + analyzeFunction(func, asmData); + var ret = outlineStatements(func, asmData, getStatements(func), 0.5*size); + if (ret && ret.length > 0) newFuncs.push.apply(newFuncs, ret); + } + denormalizeAsm(func, asmData); + }); + + // TODO: control flow: route returns and breaks. outlined code should have all breaks/continues/returns break into the outermost scope, + // after setting a state variable, etc. + + if (newFuncs.length > 0) { + // We have outlined. Add stack support: header in which we allocate enough stack space TODO + // If sp was not present before, add it and before each return, pop the stack. also a final pop if not ending with a return TODO + // (none of this should be done in inner functions, of course, just the original) + + // add new functions to the toplevel, or create a toplevel if there isn't one + ast[1].push.apply(ast[1], newFuncs); + + funcs = newFuncs; + more = true; } } } @@ -3057,12 +3322,13 @@ function fixDotZero(js) { }); } -function asmLoopOptimizer(ast) { +function asmLastOpts(ast) { traverseGeneratedFunctions(ast, function(fun) { - // This is at the end of the pipeline, we can assume all other optimizations are done, and we modify loops - // into shapes that might confuse other passes traverse(fun, function(node, type) { if (type === 'while' && node[1][0] === 'num' && node[1][1] === 1 && node[2][0] === 'block') { + // This is at the end of the pipeline, we can assume all other optimizations are done, and we modify loops + // into shapes that might confuse other passes + // while (1) { .. if (..) { break } } ==> do { .. } while(..) var stats = node[2][1]; var last = stats[stats.length-1]; @@ -3090,6 +3356,11 @@ function asmLoopOptimizer(ast) { node[1] = simplifyNotCompsDirect(['unary-prefix', '!', conditionToBreak]); return node; } + } else if (type == 'binary' && node[1] == '&' && node[3][0] == 'unary-prefix' && node[3][1] == '-' && node[3][2][0] == 'num' && node[3][2][1] == 1) { + // Change &-1 into |0, at this point the hint is no longer needed + node[1] = '|'; + node[3] = node[3][2]; + node[3][1] = 0; } }); }); @@ -3148,7 +3419,7 @@ arguments_.slice(1).forEach(function(arg) { passes[arg](ast); }); if (asm && last) { - asmLoopOptimizer(ast); // TODO: move out of last, to make last faster when done later (as in side modules) + asmLastOpts(ast); // TODO: move out of last, to make last faster when done later (as in side modules) prepDotZero(ast); } var js = astToSrc(ast, minifyWhitespace), old; |