aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/analyzer.js879
-rw-r--r--src/compiler.js44
-rw-r--r--src/intertyper.js615
-rw-r--r--src/jsifier.js663
-rw-r--r--src/parseTools.js368
-rw-r--r--src/parser.js2572
-rw-r--r--src/settings.js2
-rw-r--r--src/utility.js2
8 files changed, 2572 insertions, 2573 deletions
diff --git a/src/analyzer.js b/src/analyzer.js
new file mode 100644
index 00000000..c9ccf82a
--- /dev/null
+++ b/src/analyzer.js
@@ -0,0 +1,879 @@
+// Analyze intertype data
+
+VAR_NATIVE = 'native';
+VAR_NATIVIZED = 'nativized';
+VAR_EMULATED = 'emulated';
+
+function cleanFunc(func) {
+ func.lines = func.lines.filter(function(line) { return line.intertype !== null });
+ func.labels.forEach(function(label) {
+ label.lines = label.lines.filter(function(line) { return line.intertype !== null });
+ });
+}
+
+function analyzer(data) {
+//print('zz analaz')
+ substrate = new Substrate('Analyzer');
+
+ // Sorter
+ substrate.addZyme('Sorter', {
+ processItem: function(item) {
+ item.items.sort(function (a, b) { return a.lineNum - b.lineNum });
+ this.forwardItem(item, 'Gatherer');
+ },
+ });
+
+ // Gatherer
+ substrate.addZyme('Gatherer', {
+ processItem: function(item) {
+ // Single-liners
+ ['globalVariable', 'functionStub', 'type'].forEach(function(intertype) {
+ var temp = splitter(item.items, function(item) { return item.intertype == intertype });
+ item[intertype + 's'] = temp.splitOut;
+ item.items = temp.leftIn;
+ });
+ // Functions & labels
+ item.functions = []
+ for (var i = 0; i < item.items.length; i++) {
+ var subItem = item.items[i];
+ if (subItem.intertype == 'function') {
+ item.functions.push(subItem);
+ subItem.endLineNum = null;
+ subItem.lines = [];
+ subItem.labels = [];
+ } else if (subItem.intertype == 'functionEnd') {
+ item.functions.slice(-1)[0].endLineNum = subItem.lineNum;
+ } else if (subItem.intertype == 'label') {
+ item.functions.slice(-1)[0].labels.push(subItem);
+ subItem.lines = [];
+ } else if (item.functions.slice(-1)[0].endLineNum === null) {
+ // Internal line
+ item.functions.slice(-1)[0].lines.push(subItem);
+ item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem);
+ } else {
+ print("ERROR: what is this? " + JSON.stringify(subItem));
+ }
+ }
+ delete item.items;
+ this.forwardItem(item, 'Identinicer');
+ },
+ });
+
+ // IdentiNicer
+ substrate.addZyme('Identinicer', {
+ processItem: function(output) {
+ walkJSON(output, function(item) {
+ ['', '2', '3', '4', '5'].forEach(function(ext) {
+ if (item && item['ident' + ext])
+ item['ident' + ext] = toNiceIdent(item['ident' + ext]);
+ });
+ });
+ this.forwardItem(output, 'Typevestigator');
+ }
+ });
+
+ function addType(type, data) {
+ if (type.length == 1) return;
+ if (data.types[type]) return;
+ if (['internal', 'inbounds', 'void'].indexOf(type) != -1) return;
+ dprint('types', '// addType: ' + type);
+ var check = new RegExp(/^\[(\d+)\ x\ (.*)\]$/g).exec(type);
+ // 'blocks': [14 x %struct.X] etc.
+ if (check) {
+ var num = parseInt(check[1]);
+ var subType = check[2];
+ data.types[type] = {
+ name_: type,
+ fields: range(num).map(function() { return subType }),
+ lineNum: '?',
+ };
+ return;
+ }
+ if (['['].indexOf(type) != -1) return;
+ if (isNumberType(type) || isPointerType(type)) return;
+ data.types[type] = {
+ name_: type,
+ fields: [ 'int32' ], // XXX
+ flatSize: 1,
+ lineNum: '?',
+ };
+ }
+
+ // Typevestigator
+ substrate.addZyme('Typevestigator', {
+ processItem: function(data) {
+ // Convert types list to dict
+ var old = data.types;
+ data.types = {};
+ old.forEach(function(type) { data.types[type.name_] = type });
+
+ // Find additional types
+ walkJSON(data, function(item) {
+ if (!item) return;
+ if (item.type) {
+ addType(!item.type.text ? item.type : item.type.text, data);
+ }
+ if (item.type2) {
+ addType(!item.type2.text ? item.type2 : item.type2.text, data);
+ }
+ });
+ this.forwardItem(data, 'Typeanalyzer');
+ }
+ });
+
+ // Type analyzer
+ substrate.addZyme('Typeanalyzer', {
+ processItem: function(item) {
+ // 'fields' is the raw list of LLVM fields. However, we embed
+ // child structures into parent structures, basically like C.
+ // So { int, { int, int }, int } would be represented as
+ // an Array of 4 ints. getelementptr on the parent would take
+ // values 0, 1, 2, where 2 is the entire middle structure.
+ // We also need to be careful with getelementptr to child
+ // structures - we return a pointer to the same slab, just
+ // a different offset. Likewise, need to be careful for
+ // getelementptr of 2 (the last int) - it's real index is 4.
+ // The benefit of this approach is inheritance -
+ // { { ancestor } , etc. } = descendant
+ // In this case it is easy to bitcast ancestor to descendant
+ // pointers - nothing needs to be done. If the ancestor were
+ // a new slab, it would need some pointer to the outer one
+ // for casting in that direction.
+ // TODO: bitcasts of non-inheritance cases of embedding (not at start)
+ var more = true;
+ while (more) {
+ more = false;
+ values(item.types).forEach(function(type) {
+ if (type.flatIndexes) return;
+ var ready = true;
+ type.fields.forEach(function(field) {
+ //print('// zz getT: ' + type.name_ + ' : ' + field);
+ if (isStructType(field)) {
+ if (!item.types[field]) {
+ addType(field, item);
+ ready = false;
+ } else {
+ if (!item.types[field].flatIndexes) {
+ ready = false;
+ }
+ }
+ }
+ });
+ if (!ready) {
+ more = true;
+ return;
+ }
+ type.flatSize = 0;
+ var sizes = [];
+ type.flatIndexes = type.fields.map(function(field) {
+ var soFar = type.flatSize;
+ var size = 1;
+ if (isStructType(field)) {
+ size = item.types[field].flatSize;
+ }
+ type.flatSize += size;
+ sizes.push(size);
+ return soFar;
+ });
+ if (dedup(sizes).length == 1) {
+ type.flatFactor = sizes[0];
+ }
+ type.needsFlattening = (this.flatFactor != 1);
+ dprint('types', 'type: ' + type.name_ + ' : ' + JSON.stringify(type.fields));
+ dprint('types', ' has final size of ' + type.flatSize + ', flatting: ' + type.needsFlattening + ' ? ' + (type.flatFactor ? type.flatFactor : JSON.stringify(type.flatIndexes)));
+ });
+ }
+
+ this.forwardItem(item, 'VariableAnalyzer');
+ },
+ });
+
+ // Variable analyzer
+ substrate.addZyme('VariableAnalyzer', {
+ processItem: function(item) {
+ item.functions.forEach(function(func) {
+ func.variables = {};
+
+ // LLVM is SSA, so we always have a single assignment/write. We care about
+ // the reads/other uses.
+ walkJSON(func.lines, function(item) {
+ if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr', 'fastgetelementptrload'].indexOf(item.value.intertype) != -1) {
+ func.variables[item.ident] = {
+ ident: item.ident,
+ type: item.value.type.text,
+ origin: item.value.intertype,
+ uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]),
+ };
+ }
+ });
+
+ for (vname in func.variables) {
+ var variable = func.variables[vname];
+
+ // Whether the value itself is used. For an int, always yes. For a pointer,
+ // we might never use the pointer's value - we might always just store to it /
+ // read from it. If so, then we can optimize away the pointer.
+ variable.hasValueTaken = false;
+ // Whether our address was used. If not, then we do not need to bother with
+ // implementing this variable in a way that other functions can access it.
+ variable.hasAddrTaken = false;
+
+ variable.pointingLevels = pointingLevels(variable.type);
+
+ // Analysis!
+
+ if (variable.pointingLevels > 0) {
+ // Pointers
+ variable.loads = 0;
+ variable.stores = 0;
+
+ func.lines.forEach(function(line) {
+ //print(dump(line))
+ if (line.intertype == 'store' && line.ident == vname) {
+ variable.stores ++;
+ } else if ((line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) ||
+ (line.intertype == 'fastgetelementptrload' && line.ident == vname)) {
+ variable.loads ++;
+ }
+ });
+
+ variable.otherUses = variable.uses - variable.loads - variable.stores;
+ if (variable.otherUses > 0)
+ variable.hasValueTaken = true;
+ }
+
+ // Decision time
+
+ if (variable.origin == 'getelementptr') {
+ // Use our implementation that emulates pointers etc.
+ variable.impl = VAR_EMULATED;
+ } else if ( variable.pointingLevels === 0 && !variable.hasAddrTaken ) {
+ // A simple int value, can be implemented as a native variable
+ variable.impl = VAR_NATIVE;
+ } else if ( variable.pointingLevels === 1 && variable.origin === 'alloca' && !isStructPointerType(variable.type) && !variable.hasAddrTaken && !variable.hasValueTaken ) {
+ // A pointer to a value which is only accessible through this pointer. Basically
+ // a local value on the stack, which nothing fancy is done on. So we can
+ // optimize away the pointing altogether, and just have a native variable
+ variable.impl = VAR_NATIVIZED;
+ } else {
+ variable.impl = VAR_EMULATED;
+ }
+ dprint('vars', '// var ' + vname + ': ' + JSON.stringify(variable));
+ }
+ });
+ this.forwardItem(item, 'Relooper');
+ },
+ });
+
+ // ReLooper - reconstruct nice loops, as much as possible
+ substrate.addZyme('Relooper', {
+ processItem: function(item) {
+ var that = this;
+ function finish() {
+ that.forwardItem(item, 'Optimizer');
+ }
+
+ // Tools
+
+ function replaceLabels(line, labelId, toLabelId) {
+ //print('// XXX replace ' + labelId + ' with ' + toLabelId);
+ function process(item) {
+ ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel', 'defaultLabel'].forEach(function(id) {
+ if (item[id] && item[id] == labelId) {
+ item[id] = toLabelId;
+ //print(' replaced!');
+ }
+ });
+ }
+ if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
+ process(line);
+ } else if (line.intertype == 'switch') {
+ process(line);
+ line.switchLabels.forEach(process);
+ }
+ }
+
+ function replaceLabelLabels(label, labelId, toLabelId) {
+ label.lines.forEach(function(line) { replaceLabels(line, labelId, toLabelId) });
+ return label;
+ }
+
+ function replaceInLabels(labels, toReplace, replaceWith) {
+ //print('// XXX replaceIn ' + toReplace + ' with ' + replaceWith);
+ assertEq(!replaceWith || toReplace.length == 1, true); // TODO: implement other case
+ labels.forEach(function(label) {
+ ['inLabels'].forEach(function(l) {
+ label[l] = label[l].map(function(labelId) { return toReplace.indexOf(labelId) == -1 ? labelId : replaceWith})
+ .filter(function(labelId) { return !!labelId });
+ });
+ });
+ return labels;
+ }
+
+ function calcLabelBranchingData(labels, labelsDict) {
+ item.functions.forEach(function(func) {
+ labels.forEach(function(label) {
+ label.outLabels = [];
+ label.inLabels = [];
+ label.hasReturn = false;
+ label.hasBreak = false;
+ });
+ });
+ // Find direct branchings
+ labels.forEach(function(label) {
+ dprint('find direct branchings at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
+ label.lines.forEach(function(line) {
+ function process(item) {
+ ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel'].forEach(function(id) {
+ if (item[id]) {
+ if (item[id][0] == 'B') { // BREAK, BCONT, BNOPP
+ label.hasBreak = true;
+ } else {
+ label.outLabels.push(item[id]);
+ labelsDict[item[id]].inLabels.push(label.ident);
+ }
+ }
+ });
+ }
+ if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
+ process(line);
+ } else if (line.intertype == 'switch') {
+ process(line);
+ line.switchLabels.forEach(process);
+ }
+
+ label.hasReturn |= line.intertype == 'return';
+ });
+ });
+ // Find all incoming and all outgoing - recursively
+ labels.forEach(function(label) {
+ label.allInLabels = [];
+ label.allOutLabels = [];
+ //! MustGetTo ignores return - |if (x) return; else Y| must get to Y.
+ label.mustGetHere = [];
+ });
+
+ var worked = true;
+ while (worked) {
+ worked = false;
+ labels.forEach(function(label) {
+ //print('zz at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
+ function inout(s, l) {
+ var temp = label[s].slice(0);
+ label[s].forEach(function(label2Id) {
+ temp = temp.concat(labelsDict[label2Id][l]);
+ });
+ temp = dedup(temp);
+ temp.sort();
+ if (JSON.stringify(label[l]) != JSON.stringify(temp)) {
+ //print('zz noticed ' + label.ident + ' ? ' + s + ',' + l + ' : ' + label[s] + ' | ' + label[l]);
+ label[l] = temp;
+ worked = true;
+ }
+ }
+ inout('inLabels', 'allInLabels');
+ inout('outLabels', 'allOutLabels');
+ });
+ }
+
+ // Find all mustGetTos
+ labels.forEach(function(label) {
+ //print('path for: ' + label.ident + ',' + dump(label));
+ function walk(path, label) {
+ //print('path is: ' + getLabelIds(path.concat([label])));
+ // If all we are is a break/return - then stop here. Otherwise, continue to other path
+ //print('??? ' + label.hasReturn + ' : ' + label.hasBreak + ' : ' + label.outLabels.length);
+ if (label.hasReturn || (label.hasBreak && label.outLabels.length == 0)) {
+ //print('path done.');
+ return [path.concat([label])]
+ };
+ if (path.indexOf(label) != -1) {
+ //print('looping path - abort it.');
+ return []; // loop - drop this path
+ }
+ path = path.concat([label]);
+ return label.outLabels.map(function(outLabelId) { return walk(path, labelsDict[outLabelId]) })
+ .reduce(function(a, b) { return a.concat(b) }, [])
+ .filter(function(path) { return path.length > 0 });
+ }
+ var paths = walk([], label).map(function(path) { return getLabelIds(path) });
+ //print('XXX paths: ' + JSON.stringify(paths));
+ var possibles = dedup(paths.reduce(function(a,b) { return a.concat(b) }, []));
+ label.mustGetTo = possibles.filter(function(possible) {
+ return paths.filter(function(path) { return path.indexOf(possible) == -1 }) == 0;
+ }).filter(function(possible) { return possible != label.ident });
+ //print('XXX must get to: ' + JSON.stringify(label.mustGetTo));
+ });
+
+ labels.forEach(function(label) {
+ label.mustGetTo.forEach(function (mustGetTo) {
+ labelsDict[mustGetTo].mustGetHere.push(label.ident);
+ });
+ });
+
+ if (dcheck('labelbranching')) {
+ labels.forEach(function(label) {
+ print('// label: ' + label.ident + ' :out : ' + JSON.stringify(label.outLabels));
+ print('// ' + label.ident + ' :in : ' + JSON.stringify(label.inLabels));
+ print('// ' + label.ident + ' :ALL out : ' + JSON.stringify(label.allOutLabels));
+ print('// ' + label.ident + ' :ALL in : ' + JSON.stringify(label.allInLabels));
+ print('// ZZZZZZ ' + label.ident + ' must get to all of ' + JSON.stringify(label.mustGetTo));
+ });
+ }
+ }
+
+/* // Disabled merging as it seems it just removes a return now and then.
+ function mergeLabels(label1Ident, label2Ident) {
+ var label1 = func.labelsDict[label1Ident];
+ var label2 = func.labelsDict[label2Ident];
+ label1.lines.pop();
+ label1.lines = label1.lines.concat(label2.lines);
+ label1.outLabels = label2.outLabels;
+ label2.lines = null;
+ func.labels = func.labels.filter(function(label) { return !!label.lines });
+print('// zz Merged away! ' + label2.ident + ' into ' + label1.ident);
+ delete func.labelsDict[label2.ident];
+ replaceInLabels(func.labels, [label2.ident], label1.ident);
+ }
+//print('Labels pre merge : ' + getLabelIds(func.labels));
+
+ // Merge trivial labels
+ var worked = true;
+ while (worked) {
+ worked = false;
+ func.labels.forEach(function(label) {
+ if (label.lines === null) return; // We were deleted
+//print("// Consider: " + label.ident + ' : out/in: ' + label.outLabels.length + ',' + label.inLabels.length);// + dump(label.lines.slice(-1)[0]));
+ if (label.outLabels.length == 1 &&
+ label.lines.slice(-1)[0].intertype == 'branch' &&
+// label.lines.slice(-1)[0].label && // nonconditional branch
+ label.lines.slice(-1)[0].label == label.outLabels[0] &&
+ func.labelsDict[label.outLabels[0]].inLabels.length == 1) {
+//print("// and target: " + func.labelsDict[label.outLabels[0]].inLabels);
+ mergeLabels(label.ident, label.outLabels[0]);
+ worked = true;
+ }
+ });
+ }
+*/
+
+ // 'block': A self-enclosed part of the program, for example a loop or an if
+ function makeBlock(labels, entry, labelsDict) {
+ var def = {
+ type: 'emulated', // a block we cannot map to a nicer structure like a loop. We emulate it with a barbaric switch
+ labels: labels,
+ entry: entry,
+ };
+ if (!RELOOP) return def;
+
+ if (!entry) {
+ assertTrue(labels.length == 0);
+ return null; // Empty block - not even an entry
+ }
+
+ function getLastLine(block) {
+ //dprint('get last line at: ' + block.labels[0].ident);
+ if (block.next) return getLastLine(block.next);
+ switch(block.type) {
+ case 'loop':
+ return getLastLine(block.rest);
+ case 'if':
+ case 'breakingif':
+ return getLastLine(block.ifTrue);
+ case 'emulated':
+ case 'simple':
+ if (block.labels.length == 1) {
+ return block.labels[0].lines.slice(-1)[0];
+ } else {
+ //for (var i = 0; i < block.labels.length; i++)
+ //print('Remaining label is at line: ' + block.labels[i].lines[0].lineNum);
+ return null; // throw "Not clear what the last line is."
+ }
+ }
+ }
+ function getAll(fromId, beforeIds) {
+ beforeIds = beforeIds ? beforeIds : [];
+ //print("//getAll : " + fromId + ' : ' + beforeIds);
+ if (beforeIds && beforeIds.indexOf(fromId) != -1) return [];
+ //print("getAll proceeding");
+ var from = labelsDict[fromId];
+ return dedup([from].concat(
+ from.outLabels.map(function(outLabel) { return getAll(outLabel, beforeIds.concat(fromId)) })
+ .reduce(function(a,b) { return a.concat(b) }, [])
+ ), 'ident');
+ }
+ function isolate(label) {
+ label.inLabels = [];
+ label.outLabels = [];
+ return label;
+ }
+ dprint("\n\n// XXX MAKEBLOCK " + entry + ', num labels: ' + labels.length + ' and they are: ' + getLabelIds(labels));
+ if (labels.length == 0 || !entry) {
+ print('//empty labels or entry');
+ return;
+ }
+ function forLabelLines(labels, func) {
+ labels.forEach(function(label) {
+ label.lines.forEach(function(line) { func(line, label) });
+ });
+ }
+
+ // Begin
+
+ calcLabelBranchingData(labels, labelsDict);
+
+ var split = splitter(labels, function(label) { return label.ident == entry });
+ var first = split.splitOut[0];
+ if (!first) {
+ print("//no first line");
+ return;
+ }
+ var others = split.leftIn;
+ var lastLine = first.lines.slice(-1)[0];
+ dprint("// makeBlock " + entry + ' : ' + getLabelIds(labels) + ' IN: ' + first.inLabels + ' OUT: ' + first.outLabels);
+ // If we have one outgoing, and none incoming - make this a block of 1,
+ // and move on the others (forgetting ourself, so they are now also
+ // totally self-enclosed, once we start them)
+ if (first.inLabels.length == 0 && first.outLabels.length == 1) {
+ dprint(' Creating simple emulated');
+ assertEq(lastLine.intertype, 'branch');
+// assertEq(!!lastLine.label, true);
+ return {
+ type: 'simple',
+ labels: [replaceLabelLabels(first, first.outLabels[0], 'BNOPP')],
+ entry: entry,
+ next: makeBlock(replaceInLabels(others, entry), first.outLabels[0], labelsDict),
+ };
+ }
+ //print('// loop ? a');
+ // Looping structures - in some way, we can get back to here
+ if (first.outLabels.length > 0 && first.allInLabels.indexOf(entry) != -1) {
+ //print('// loop ? b');
+ // Look for outsiders - labels no longer capable of getting here. Those must be
+ // outside the loop. Insiders are those that can get back to the entry
+ var split2 = splitter(others, function(label) { return label.allOutLabels.indexOf(entry) == -1 });
+ var outsiders = split2.splitOut;
+ var insiders = split2.leftIn;
+ //print('// potential loop : in/out : ' + getLabelIds(insiders) + ' to ' + getLabelIds(outsiders));
+ // Hopefully exactly one of the outsiders is a 'pivot' - a label to which all those leaving
+ // the loop must go. Then even some |if (falala) { ... break; }| will get ...
+ // as an outsider, but it will actually still be in the loop
+ var pivots =
+ outsiders.filter(function(outsider) {
+ return insiders.filter(function(insider) {
+ return insider.mustGetTo.indexOf(outsider.ident) == -1;
+ }) == 0;
+ });
+ // Find the earliest pivot. They must be staggered, each leading to another,
+ // as all insiders must go through *all* of these. So we seek a pivot that
+ // is never reached by another pivot. That must be the one with fewest
+ // mustGetTo
+ //print("//pivots: " + pivots.length + ',' + JSON.stringify(getLabelIds(pivots)));
+ if (pivots.length >= 1) { // We have ourselves a loop
+ pivots.sort(function(a, b) { return b.mustGetTo.length - a.mustGetTo.length });
+ var pivot = pivots[0];
+ dprint(' Creating LOOP : ' + entry + ' insiders: ' + getLabelIds(insiders) + ' to pivot: ' + pivot.ident);
+ var otherLoopLabels = insiders;
+ var loopLabels = insiders.concat([first]);
+ var nextLabels = outsiders;
+ // Rework branches out of the loop into new 'break' labels
+ forLabelLines(loopLabels, function(line) {
+ replaceLabels(line, pivot.ident, 'BREAK' + entry);
+ });
+ // Rework branches to the inc part of the loop into |continues|
+ forLabelLines(loopLabels, function(line, label) {
+ if (0) {// XXX - make this work :label.outLabels.length == 1 && label.outLabels[0] == entry && !label.hasBreak && !label.hasReturn) {
+ // it can remove unneeded continues (but does too much right now, as the continue might have been
+ // placed into an emulated while(1) switch { }
+ replaceLabels(line, entry, 'BNOPP'); print("// zeezee " + line.lineNum);
+ } else {
+ replaceLabels(line, entry, 'BCONT' + entry);
+ }
+ });
+ // Fix inc branch into rest
+ var nextEntry = null;
+ first.outLabels.forEach(function(outLabel) {
+ if (outLabel != pivot.ident) {
+ replaceLabels(lastLine, outLabel, 'BNOPP');
+ nextEntry = outLabel;
+ }
+ });
+ if (nextEntry == entry) { // We loop back to ourselves, the entire loop is just us
+ nextEntry = null;
+ }
+ var ret = {
+ type: 'loop',
+ labels: loopLabels,
+ entry: entry,
+ inc: makeBlock([isolate(first)], entry, labelsDict),
+ rest: makeBlock(replaceInLabels(otherLoopLabels, entry), nextEntry, labelsDict),
+ };
+ dprint(' getting last line for block starting with ' + entry + ' and nextEntry: ' + nextEntry);
+ var lastLoopLine = getLastLine(nextEntry ? ret.rest : ret.inc);
+ if (lastLoopLine) {
+ replaceLabels(lastLoopLine, 'BCONT' + entry, 'BNOPP'); // Last line will feed back into the loop anyhow
+ }
+ ret.next = makeBlock(replaceInLabels(nextLabels, getLabelIds(loopLabels)), pivot.ident, labelsDict);
+ return ret;
+ }
+ }
+
+ // Try an 'if' structure
+ if (first.outLabels.length == 2) {
+ if (labelsDict[first.outLabels[1]].mustGetTo.indexOf(first.outLabels[0]) != -1) {
+ first.outLabels.push(first.outLabels.shift()); // Reverse order - normalize. Very fast check anyhow
+ }
+ //print('// if? labels are ' + JSON.stringify(first.outLabels));
+ if (labelsDict[first.outLabels[0]].mustGetTo.indexOf(first.outLabels[1]) != -1) {
+ var ifLabelId = first.outLabels[0];
+ var outLabelId = first.outLabels[1];
+ // 0 - the if area. 1 - where we all exit to later
+ var ifTrueLabels = getAll(ifLabelId, [outLabelId]);
+ var ifLabels = ifTrueLabels.concat([first]);
+ var nextLabels = getAll(outLabelId);
+ // If we can get to the outside in more than 2 ways (one from if, one from True clause) - have breaks
+ var breaking = labelsDict[outLabelId].allInLabels.length > 2;
+ dprint(' Creating XXX IF: ' + getLabelIds(ifTrueLabels) + ' to ' + outLabelId + ' ==> ' + getLabelIds(nextLabels) + ' breaking: ' + breaking);
+ //print('// if separation: ' + labels.length + ' = ' + ifLabels.length + ' + ' + nextLabels.length + ' (' + ifTrueLabels.length + ')');
+ if (breaking) {
+ // Rework branches out of the if into new 'break' labels
+ forLabelLines(ifTrueLabels, function(line) {
+ replaceLabels(line, outLabelId, 'BREAK' + entry);
+ });
+ }
+ // Remove branching op - we will do it manually
+ replaceLabels(lastLine, ifLabelId, 'BNOPP');
+ replaceLabels(lastLine, outLabelId, 'BNOPP');
+ return {
+ type: (breaking ? 'breaking' : '') + 'if',
+ labels: ifLabels,
+ entry: entry,
+ ifVar: lastLine.ident,
+ check: makeBlock([isolate(first)], entry, labelsDict),
+ ifTrue: makeBlock(replaceInLabels(ifTrueLabels, entry), ifLabelId, labelsDict),
+ next: makeBlock(replaceInLabels(nextLabels, getLabelIds(ifLabels)), outLabelId, labelsDict),
+ };
+ }
+ }
+
+ // Give up on this structure - emulate it
+ dprint(' Creating complex emulated');
+ return def;
+ }
+
+ // TODO: each of these can be run in parallel
+ item.functions.forEach(function(func) {
+ dprint('relooping', "// relooping function: " + func.ident);
+ func.labelsDict = {};
+ func.labels.forEach(function(label) {
+ func.labelsDict[label.ident] = label;
+ });
+ func.block = makeBlock(func.labels, toNiceIdent('%entry'), func.labelsDict);
+ });
+
+ return finish();
+ },
+ });
+
+ // Optimizer
+ substrate.addZyme('Optimizer', {
+ processItem: function(item) {
+ var that = this;
+ function finish() {
+ item.__finalResult__ = true;
+ return [item];
+ }
+ if (!OPTIMIZE) return finish();
+
+ // Check if a line has side effects *aside* from an explicit assign if it has one
+ function isLineSideEffecting(line) {
+ if (line.intertype == 'assign' && line.value.intertype !== 'call') return false;
+ if (['fastgetelementptrload'].indexOf(line.intertype) != -1) return false;
+ return true;
+ }
+
+ function replaceVars(line, ident, replaceWith) {
+ if (!replaceWith) {
+ print('// Not replacing ' + dump(ident) + ' : ' + dump(replaceWith));
+ return false;
+ }
+ var found = false;
+ // assigns, loads, mathops
+ var POSSIBLE_VARS = ['ident', 'ident2'];
+ for (var i = 0; i < POSSIBLE_VARS.length; i++) {
+ var possible = POSSIBLE_VARS[i];
+ if (line[possible] == ident) {
+ line[possible] = replaceWith;
+ found = true;
+ }
+ if (line.value && line.value[possible] == ident) {
+ line.value[possible] = replaceWith;
+ found = true;
+ }
+ }
+ // getelementptr, call params
+ [line, line.value].forEach(function(element) {
+ if (!element || !element.params) return;
+ var params = element.params;
+ for (var j = 0; j < params.length; j++) {
+ var param = params[j];
+ if (param.intertype == 'value' && param.ident == ident) {
+ param.ident = replaceWith;
+ found = true;
+ }
+ }
+ });
+ return found;
+ }
+
+ // Fast getelementptr loads
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
+ b.intertype == 'assign' && b.value.intertype == 'load' &&
+ a.ident == b.value.ident && func.variables[a.ident].uses == 1) {
+// print("// LOADSUSPECT: " + i + ',' + (i+1) + ':' + a.ident + ':' + b.value.ident);
+ a.intertype = 'fastgetelementptrload';
+ a.ident = b.ident;
+ b.intertype = null;
+ i++;
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // Fast getelementptr stores
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
+ b.intertype == 'store' && b.value.text &&
+ a.ident == b.ident && func.variables[a.ident].uses == 1) {
+ //print("// STORESUSPECT: " + a.lineNum + ',' + b.lineNum);
+ a.intertype = 'fastgetelementptrstore';
+ a.ident = toNiceIdent(b.value.text);
+ b.intertype = null;
+ i++;
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // TODO: Use for all that can
+ function optimizePairs(worker, minSlice, maxSlice) {
+ minSlice = minSlice ? minSlice : 2;
+ maxSlice = maxSlice ? maxSlice : 2;
+ item.functions.forEach(function(func) {
+ func.labels.forEach(function(label) {
+ for (var i = 0; i < label.lines.length-1; i++) {
+ for (var j = i+minSlice-1; j < Math.min(i+maxSlice+1, label.lines.length); j++) {
+ if (worker(func, label.lines.slice(i, j+1))) {
+ i += j-i;
+ break; // stop working on this i
+ }
+ }
+ }
+ });
+ cleanFunc(func);
+ });
+ }
+
+ // Fast bitcast&something after them
+ optimizePairs(function(func, lines) {
+ var a = lines[0], b = lines[1];
+ if (a.intertype == 'assign' && a.value.intertype == 'bitcast' &&
+ func.variables[a.ident].uses == 1 && replaceVars(b, a.ident, a.value.ident)) {
+ a.intertype = null;
+ return true;
+ }
+ });
+
+/*
+ // Remove unnecessary branches
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.labels.length-1; i++) {
+ var a = func.labels[i].lines.slice(-1)[0];
+ var b = func.labels[i+1];
+ if (a.intertype == 'branch' && a.label == b.ident) {
+ a.intertype = null;
+ }
+ }
+ cleanFunc(func);
+ });
+*/
+
+ // Remove temp variables around nativized
+ item.functions.forEach(function(func) {
+ // loads, mathops
+ var worked = true;
+ while (worked) {
+ worked = false;
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'load' &&
+ func.variables[a.value.ident] && // Not global
+ func.variables[a.value.ident].impl === VAR_NATIVIZED) {
+ //print('// ??zzzz ' + dump(a) + ',\n // ??zzbb' + dump(b));
+ // If target is only used on next line - do not need it.
+ if (func.variables[a.ident].uses == 1 &&
+ replaceVars(b, a.ident, a.value.ident)) {
+ a.intertype = null;
+ i ++;
+ worked = true;
+ }
+ }
+ }
+ cleanFunc(func);
+ }
+
+ // stores
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (b.intertype == 'store' &&
+ func.variables[b.ident] && // Not global
+ func.variables[b.ident].impl === VAR_NATIVIZED) {
+ // If target is only used on prev line - do not need it.
+ if (func.variables[b.value.ident] && func.variables[b.value.ident].uses == 1 &&
+ ['assign', 'fastgetelementptrload'].indexOf(a.intertype) != -1 && a.ident == b.value.ident) {
+ a.ident = b.ident;
+ a.overrideSSA = true;
+ b.intertype = null;
+ i ++;
+ }
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // Remove redundant vars - SLOW! XXX
+ optimizePairs(function(func, lines) {
+ // a - a line defining a var
+ // b - a line defining a var that is identical to a
+ // c - the only line using b, hopefully
+ var a = lines[0], b = lines[lines.length-2], c = lines[lines.length-1];
+ if (a.intertype == 'assign' && b.intertype == 'assign' &&
+ func.variables[b.ident] && func.variables[b.ident].uses == 1 &&
+ compareTokens(a.value, b.value) &&
+ lines.slice(0,-1).filter(isLineSideEffecting).length == 0 &&
+ replaceVars(c, b.ident, a.ident)) {
+ b.intertype = null;
+ return true;
+ }
+ }, 3, 12);
+
+ return finish();
+ },
+ });
+
+ substrate.addItem({
+ items: data,
+ }, 'Sorter');
+
+ return substrate.solve();
+}
+
diff --git a/src/compiler.js b/src/compiler.js
new file mode 100644
index 00000000..6f514f89
--- /dev/null
+++ b/src/compiler.js
@@ -0,0 +1,44 @@
+// LLVM => JavaScript compiler, main entry point
+
+// Prep - allow this to run in both SpiderMonkey and V8
+if (!this['load']) {
<