diff options
Diffstat (limited to 'tools/eliminator.js')
-rw-r--r-- | tools/eliminator.js | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/tools/eliminator.js b/tools/eliminator.js new file mode 100644 index 00000000..47fd7597 --- /dev/null +++ b/tools/eliminator.js @@ -0,0 +1,514 @@ +/* + A script to eliminate redundant variables common in Emscripted code. + + A variable === eliminateable if (it matches a leaf of that condition tree: + + Single-def + Uses only side-effect-free nodes + Unused + * + Has at most MAX_USES uses + No mutations to any dependencies between def && last use + No global dependencies or no indirect accesses between def && use + * + + TODO(max99x): Eliminate single-def undefined-initialized vars with no uses + between declaration && definition. +*/ + +// Imports. +var uglify = require('../tools/eliminator/node_modules/uglify-js'); +var fs = require('fs'); +var os = require('os'); + +function set() { + var args = typeof arguments[0] === 'object' ? arguments[0] : arguments; + var ret = {}; + for (var i = 0; i < args.length; i++) { + ret[args[i]] = 0; + } + return ret; +} + +// Functions which have been generated by Emscripten. We optimize only those. +var generatedFunctions = {}; +var GENERATED_FUNCTIONS_MARKER = '// EMSCRIPTEN_GENERATED_FUNCTIONS:'; +function isGenerated (ident) { + return ident in generatedFunctions; +} + +// Maximum number of uses to consider a variable not worth eliminating. +// The risk with using > 1 === that a variable used by another can have +// a chain that leads to exponential uses +var MAX_USES = 1; + +// The UglifyJs code generator settings to use. +var GEN_OPTIONS = { + ascii_only: true, + beautify: true, + indent_level: 2 +}; + +// Node types which can be evaluated without side effects. +var NODES_WITHOUT_SIDE_EFFECTS = { + name: true, + num: true, + string: true, + binary: true, + sub: true, + 'unary-prefix': true // ++x can have side effects, but we never have that in generated code +}; + +// Nodes which may break control flow. Moving a variable beyond them may have +// side effects. +var CONTROL_FLOW_NODES = { + new: true, + throw: true, + call: true, + label: true, + debugger: true +}; + +var ANALYZE_BLOCK_TYPES = { + 'switch': true, + 'if': true, + 'try': true, + 'do': true, + 'while': true, + 'for': true, + 'for-in': true +}; + +// Traverses a JavaScript syntax tree rooted at the given node calling the given +// callback for each node. +// that.arg node: The root of the AST. +// that.arg callback: The callback to call for each node. This will be called with +// the node as the first argument && its type as the second. If false is +// returned, the traversal === stopped. If a non-undefined value === returned, +// it replaces the passed node in the tree. +// that.returns: If the root node was replaced, the new root node. If the traversal +// was stopped, false. Otherwise undefined. +function traverse(node, callback) { + var type = node[0]; + if (typeof type == 'string') { + result = callback(node, type); + if (result) return result; + } + for (var index = 0; index < node.length; index++) { + var subnode = node[index]; + if (subnode && typeof subnode == 'object' && subnode.length) { + // NOTE: For-in nodes have unspecified var mutations. Skip them. + if (type == 'for-in' && subnode[0] == 'var') continue; + var subresult = traverse(subnode, callback); + if (subresult === false) { + return false; + } else if (subresult) { + node[index] = subresult; + } + } + } +} + +// A class for eliminating redundant variables from JavaScript. Give it an AST +// function/defun node && call run() to apply the optimization (in-place). +var that = that; // there is always a single global Eliminator instance +function Eliminator(func) { + that = this; + // The statements of the function to analyze. + that.body = func[3]; + + // Identifier stats. Each of these objects === indexed by the identifier name. + // Whether the identifier === a local variable. + that.isLocal = {}; + // Whether the identifier === never modified after initialization. + that.isSingleDef = {}; + // How many times the identifier === used. + that.useCount = {}; + // Whether the initial value of a single-def identifier uses only nodes + // evaluating which has no side effects. + that.usesOnlySimpleNodes = {}; + // Whether the identifier depends on any non-local name, perhaps indirectly. + that.dependsOnAGlobal = {}; + // Whether the dependencies of the single-def identifier may be mutated + // within its live range. + that.depsMutatedInLiveRange = {}; + // Maps a given single-def variable to the AST expression of its initial value. + that.initialValue = {}; + // Maps identifiers to single-def variables which reference it in their + // initial value, i.e., which other variables it affects. + that.affects = {}; + + // Runs the eliminator on a given function body updating the AST in-place. + // that.returns: The number of variables eliminated, or undefined if (skipped. + that.run = function() { + that.calculateBasicVarStats(); + that.analyzeInitialValues(); + that.calculateTransitiveDependencies(); + that.analyzeLiveRanges(); + + var toReplace = {}; + var eliminated = 0; + for (var varName in that.isSingleDef) { + if (that.isEliminateable(varName)) { + toReplace[varName] = that.initialValue[varName]; + eliminated++; + } + } + + that.removeDeclarations(toReplace); + that.collapseValues(toReplace); + that.updateUses(toReplace); + + return eliminated; + }; + + // Runs the basic variable scan pass. Fills the following member variables: + // isLocal + // isSingleDef + // useCount + // initialValue + that.calculateBasicVarStats = function() { + traverse(that.body, function(node, type) { + if (type === 'var') { + node[1].forEach(function(pair) { + var varName = pair[0]; + var varValue = pair[1]; + that.isLocal[varName] = true; + if (!varValue) varValue = ['name', 'undefined']; // XXX share? + that.isSingleDef[varName] = !that.isSingleDef.hasOwnProperty(varName); + that.initialValue[varName] = varValue; + that.useCount[varName] = 0; + }); + } else if (type === 'name') { + varName = node[1]; + if (that.useCount.hasOwnProperty(varName)) that.useCount[varName]++; + else that.isSingleDef[varName] = false; + } else if (type == 'assign') { + varName = node[2][1]; + if (that.isSingleDef[varName]) that.isSingleDef[varName] = false; + } + }); + }; + + // Analyzes the initial values of single-def variables. Requires basic variable + // stats to have been calculated. Fills the following member variables: + // affects + // dependsOnAGlobal + // usesOnlySimpleNodes + that.analyzeInitialValues = function() { + for (var varName in that.isSingleDef) { + if (!that.isSingleDef[varName]) continue; + that.usesOnlySimpleNodes[varName] = true; + traverse(that.initialValue[varName], function(node, type) { + if (!(type in NODES_WITHOUT_SIDE_EFFECTS)) { + that.usesOnlySimpleNodes[varName] = false; + } else if (type === 'name') { + var reference = node[1]; + if (reference != 'undefined') { + if (!that.affects[reference]) that.affects[reference] = {}; + if (!that.isLocal[reference]) that.dependsOnAGlobal[varName] = true; + that.affects[reference][varName] = true; + } + } + }); + } + }; + + // Updates the dependency graph (@affects) to its transitive closure && + // synchronizes that.dependsOnAGlobal to the new dependencies. + that.calculateTransitiveDependencies = function() { + var incomplete = true; + var todo = {}; + for (var element in that.affects) { + todo[element] = 1; + } + + //process.stdout.write 'pre ' + JSON.stringify(@affects, null, ' ') + '\n' + + while (incomplete) { + incomplete = false; + var nextTodo = {}; + for (var source in that.affects) { + var targets = that.affects[source]; + for (var target in targets) { + if (todo[target]) { + for (target2 in that.affects[target]) { + if (!targets[target2]) { + if (!that.isLocal[source]) that.dependsOnAGlobal[target2] = true; + targets[target2] = true; + nextTodo[source] = 1; + incomplete = true; + } + } + } + } + } + todo = nextTodo; + } + + //process.stdout.write 'post ' + JSON.stringify(@affects, null, ' ') + '\n' + }; + + // Analyzes the live ranges of single-def variables. Requires dependencies to + // have been calculated. Fills the following member variables: + // depsMutatedInLiveRange + that.analyzeLiveRanges = function() { + var isLive = {}; + + // Checks if (a given node may mutate any of the currently live variables. + function checkForMutations(node, type) { + var usedInThisStatement = {}; + if (type == 'assign' || type == 'call') { + traverse(node.slice(2, 4), function(node, type) { + if (type === 'name') usedInThisStatement[node[1]] = true; + }); + } + + if (type == 'assign' || type == 'unary-prefix' || type == 'unary-postfix') { + if (type === 'assign' || node[1] == '--' || node[1] == '++') { + var reference = node[2]; + while (reference[0] != 'name') { + reference = reference[1]; + } + reference = reference[1]; + if (that.affects[reference]) { + for (var varName in that.affects[reference]) { + if (isLive[varName]) { + isLive[varName] = false; + } + } + } + } + } + + if (type in CONTROL_FLOW_NODES) { + for (var varName in isLive) { + if (that.dependsOnAGlobal[varName] || !usedInThisStatement[varName]) { + isLive[varName] = false; + } + } + } else if (type === 'assign') { + for (var varName in isLive) { + if (that.dependsOnAGlobal[varName] && !usedInThisStatement[varName]) { + isLive[varName] = false; + } + } + } else if (type === 'name') { + var reference = node[1]; + if (that.isSingleDef[reference]) { + if (!isLive[reference]) { + that.depsMutatedInLiveRange[reference] = true; + } + } + } + } + + // Analyzes a block && all its children for variable ranges. Makes sure to + // account for the worst case of possible mutations. + function analyzeBlock(node, type) { + if (type in ANALYZE_BLOCK_TYPES) { + function traverseChild(child) { + if (child && typeof child == 'object' && child.length) { + var savedLive = {}; + for (var name in isLive) savedLive[name] = true; + traverse(child, analyzeBlock); + for (var name in isLive) { + if (!isLive[name]) savedLive[name] = false; + } + isLive = savedLive; + } + } + if (type === 'switch') { + traverseChild(node[1]); + for (var i = 0; i < node[2].length; i++) { + traverseChild(node[2][i]); + } + } else if (type == 'if' || type == 'try') { + for (var i = 0; i < node.length; i++) { + traverseChild(node[i]); + } + } else { + // Don't put anything from outside into the body of a loop. + isLive = {}; + node.forEach(traverseChild); + // Don't keep anything alive through a loop + isLive = {}; + } + return node; + } else if (type === 'var') { + for (var i = 0; i < node[1].length; i++) { + var varName = node[1][i][0]; +// all for-in are suspect + var varValue = node[1][i][1]; + if (varValue) traverse(varValue, checkForMutations); + // Mark the variable as live + if (that.isSingleDef[varName]) { + isLive[varName] = true; + } + // Mark variables that depend on it as no longer live + if (that.affects[varName]) { + for (var varNameDep in that.affects[varName]) { + if (isLive[varNameDep]) { + isLive[varNameDep] = false; + } + } + } + } + return node; + } else { + checkForMutations(node, type); + } + } + + traverse(that.body, analyzeBlock); + }; + + // Determines whether a given variable can be safely eliminated. Requires all + // analysis passes to have been run. + that.isEliminateable = function(varName) { + if (that.isSingleDef[varName] && that.usesOnlySimpleNodes[varName]) { + if (that.useCount[varName] == 0) { + return true; + } else if (that.useCount[varName] <= MAX_USES) { + return !that.depsMutatedInLiveRange[varName]; + } + } + return false; + }; + + // Removes all var declarations for the specified variables. + // that.arg toRemove: An object whose keys are the variable names to remove. + that.removeDeclarations = function(toRemove) { + traverse(that.body, function(node, type) { + if (type === 'var') { + var intactVars = node[1].filter(function(i) { return !toRemove.hasOwnProperty(i[0]) }); + if (intactVars.length) { + node[1] = intactVars; + return node; + } else { + return ['toplevel', []]; + } + } + }); + }; + + // Updates all the values for the given variables to eliminate reference to any + // of the other variables in the group. + // that.arg values: A map from variable names to their values as AST expressions. + that.collapseValues = function(values) { + var incomplete = true; + while (incomplete) { + incomplete = false; + for (var varName in values) { + var varValue = values[varName]; + var result = traverse(varValue, function(node, type) { + if (type == 'name' && values.hasOwnProperty(node[1]) && node[1] != varName) { + incomplete = true; + return values[node[1]]; + } + }); + if (result) values[varName] = result; + } + } + }; + + // Replaces all uses of the specified variables with their respective + // expressions. + // that.arg replacements: A map from variable names to AST expressions. + that.updateUses = function(replacements) { + traverse(that.body, function(node, type) { + if (type === 'name' && replacements.hasOwnProperty(node[1])) { + return replacements[node[1]]; + } + }); + }; +} + +// A class for optimizing expressions. We know that it === legitimate to collapse +// 5+7 in the generated code, as it will always be numerical, for example. XXX do we need this? here? +function ExpressionOptimizer(node) { + this.node = node; + + this.run = function() { + var that = this; + traverse(that.node, function(node, type) { + if (type === 'binary' && node[1] == '+') { + var names = []; + var num = 0; + var has_num = false; + var fail = false; + traverse(node, function(subNode, subType) { + if (subType === 'binary') { + if (subNode[1] != '+') { + fail = true; + return false; + } + } else if (subType === 'name') { + names.push(subNode[1]); + return; + } else if (subType === 'num') { + num += subNode[1]; + has_num = true; + return; + } else { + fail = true; + return false; + } + }); + if (!fail && has_num) { + var ret = ['num', num]; + for (var i = 0; i < names.length; i++) { + ret = ['binary', '+', ['name', names[i]], ret]; + } + return ret; + } + } + }); + }; +} + +// The main entry point. Reads JavaScript from stdin, runs the eliminator on each +// function, then writes the optimized result to stdout. +function main() { + // Get the parse tree. + //process.stderr.write(JSON.stringify(process.argv[2]) + '\n') + var src = fs.readFileSync(process.argv[2]).toString(); + + var generatedFunctionsLine = src.split('\n').filter(function(line) { + return line.indexOf(GENERATED_FUNCTIONS_MARKER) == 0; + }); + generatedFunctions = set(eval(generatedFunctionsLine[0].replace(GENERATED_FUNCTIONS_MARKER, ''))); + + var ast = uglify.parser.parse(src); + + //process.stderr.write('1 ' + JSON.stringify(ast, null, 2) + '\n') + + // Run on all functions. + traverse(ast, function(node, type) { + if ((type == 'defun' || type == 'function') && isGenerated(node[1])) { + // Run the eliminator + //process.stderr.write (node[1] || '(anonymous)') + '\n' + var eliminated = new Eliminator(node).run(); + // Run the expression optimizer + new ExpressionOptimizer(node[3]).run(); + } + }); + + // Write out the optimized code. + // NOTE: For large file, can't generate code for the whole file in a single + // call due to the v8 memory limit. Writing out root children instead. + for (var i = 0; i < ast[1].length; i++) { + var node = ast[1][i]; + + // Parse && recompile again, to remove unneeded lines XXX is this worth the parse time? + var src2 = uglify.uglify.gen_code(node, GEN_OPTIONS); + var node2 = uglify.parser.parse(src2); + + process.stdout.write(uglify.uglify.gen_code(node2, GEN_OPTIONS)); + process.stdout.write('\n'); + } + process.stdout.write(generatedFunctionsLine + '\n'); +} + +main(); + |