summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoralon@honor <none@none>2010-09-20 18:59:15 -0700
committeralon@honor <none@none>2010-09-20 18:59:15 -0700
commitfea809cb090c648f58f11561f7f25ce813d9944f (patch)
tree355bc25d3bab157799caf339f6917683aca9ac41 /src
parent9e66429fa4154058798be6cc554408697cc6bfc8 (diff)
refactor into files
--HG-- rename : src/parser.js => src/compiler.js
Diffstat (limited to 'src')
-rw-r--r--src/analyzer.js879
-rw-r--r--src/compiler.js44
-rw-r--r--src/intertyper.js615
-rw-r--r--src/jsifier.js663
-rw-r--r--src/parseTools.js368
-rw-r--r--src/parser.js2572
-rw-r--r--src/settings.js2
-rw-r--r--src/utility.js2
8 files changed, 2572 insertions, 2573 deletions
diff --git a/src/analyzer.js b/src/analyzer.js
new file mode 100644
index 00000000..c9ccf82a
--- /dev/null
+++ b/src/analyzer.js
@@ -0,0 +1,879 @@
+// Analyze intertype data
+
+VAR_NATIVE = 'native';
+VAR_NATIVIZED = 'nativized';
+VAR_EMULATED = 'emulated';
+
+function cleanFunc(func) {
+ func.lines = func.lines.filter(function(line) { return line.intertype !== null });
+ func.labels.forEach(function(label) {
+ label.lines = label.lines.filter(function(line) { return line.intertype !== null });
+ });
+}
+
+function analyzer(data) {
+//print('zz analaz')
+ substrate = new Substrate('Analyzer');
+
+ // Sorter
+ substrate.addZyme('Sorter', {
+ processItem: function(item) {
+ item.items.sort(function (a, b) { return a.lineNum - b.lineNum });
+ this.forwardItem(item, 'Gatherer');
+ },
+ });
+
+ // Gatherer
+ substrate.addZyme('Gatherer', {
+ processItem: function(item) {
+ // Single-liners
+ ['globalVariable', 'functionStub', 'type'].forEach(function(intertype) {
+ var temp = splitter(item.items, function(item) { return item.intertype == intertype });
+ item[intertype + 's'] = temp.splitOut;
+ item.items = temp.leftIn;
+ });
+ // Functions & labels
+ item.functions = []
+ for (var i = 0; i < item.items.length; i++) {
+ var subItem = item.items[i];
+ if (subItem.intertype == 'function') {
+ item.functions.push(subItem);
+ subItem.endLineNum = null;
+ subItem.lines = [];
+ subItem.labels = [];
+ } else if (subItem.intertype == 'functionEnd') {
+ item.functions.slice(-1)[0].endLineNum = subItem.lineNum;
+ } else if (subItem.intertype == 'label') {
+ item.functions.slice(-1)[0].labels.push(subItem);
+ subItem.lines = [];
+ } else if (item.functions.slice(-1)[0].endLineNum === null) {
+ // Internal line
+ item.functions.slice(-1)[0].lines.push(subItem);
+ item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem);
+ } else {
+ print("ERROR: what is this? " + JSON.stringify(subItem));
+ }
+ }
+ delete item.items;
+ this.forwardItem(item, 'Identinicer');
+ },
+ });
+
+ // IdentiNicer
+ substrate.addZyme('Identinicer', {
+ processItem: function(output) {
+ walkJSON(output, function(item) {
+ ['', '2', '3', '4', '5'].forEach(function(ext) {
+ if (item && item['ident' + ext])
+ item['ident' + ext] = toNiceIdent(item['ident' + ext]);
+ });
+ });
+ this.forwardItem(output, 'Typevestigator');
+ }
+ });
+
+ function addType(type, data) {
+ if (type.length == 1) return;
+ if (data.types[type]) return;
+ if (['internal', 'inbounds', 'void'].indexOf(type) != -1) return;
+ dprint('types', '// addType: ' + type);
+ var check = new RegExp(/^\[(\d+)\ x\ (.*)\]$/g).exec(type);
+ // 'blocks': [14 x %struct.X] etc.
+ if (check) {
+ var num = parseInt(check[1]);
+ var subType = check[2];
+ data.types[type] = {
+ name_: type,
+ fields: range(num).map(function() { return subType }),
+ lineNum: '?',
+ };
+ return;
+ }
+ if (['['].indexOf(type) != -1) return;
+ if (isNumberType(type) || isPointerType(type)) return;
+ data.types[type] = {
+ name_: type,
+ fields: [ 'int32' ], // XXX
+ flatSize: 1,
+ lineNum: '?',
+ };
+ }
+
+ // Typevestigator
+ substrate.addZyme('Typevestigator', {
+ processItem: function(data) {
+ // Convert types list to dict
+ var old = data.types;
+ data.types = {};
+ old.forEach(function(type) { data.types[type.name_] = type });
+
+ // Find additional types
+ walkJSON(data, function(item) {
+ if (!item) return;
+ if (item.type) {
+ addType(!item.type.text ? item.type : item.type.text, data);
+ }
+ if (item.type2) {
+ addType(!item.type2.text ? item.type2 : item.type2.text, data);
+ }
+ });
+ this.forwardItem(data, 'Typeanalyzer');
+ }
+ });
+
+ // Type analyzer
+ substrate.addZyme('Typeanalyzer', {
+ processItem: function(item) {
+ // 'fields' is the raw list of LLVM fields. However, we embed
+ // child structures into parent structures, basically like C.
+ // So { int, { int, int }, int } would be represented as
+ // an Array of 4 ints. getelementptr on the parent would take
+ // values 0, 1, 2, where 2 is the entire middle structure.
+ // We also need to be careful with getelementptr to child
+ // structures - we return a pointer to the same slab, just
+ // a different offset. Likewise, need to be careful for
+ // getelementptr of 2 (the last int) - it's real index is 4.
+ // The benefit of this approach is inheritance -
+ // { { ancestor } , etc. } = descendant
+ // In this case it is easy to bitcast ancestor to descendant
+ // pointers - nothing needs to be done. If the ancestor were
+ // a new slab, it would need some pointer to the outer one
+ // for casting in that direction.
+ // TODO: bitcasts of non-inheritance cases of embedding (not at start)
+ var more = true;
+ while (more) {
+ more = false;
+ values(item.types).forEach(function(type) {
+ if (type.flatIndexes) return;
+ var ready = true;
+ type.fields.forEach(function(field) {
+ //print('// zz getT: ' + type.name_ + ' : ' + field);
+ if (isStructType(field)) {
+ if (!item.types[field]) {
+ addType(field, item);
+ ready = false;
+ } else {
+ if (!item.types[field].flatIndexes) {
+ ready = false;
+ }
+ }
+ }
+ });
+ if (!ready) {
+ more = true;
+ return;
+ }
+ type.flatSize = 0;
+ var sizes = [];
+ type.flatIndexes = type.fields.map(function(field) {
+ var soFar = type.flatSize;
+ var size = 1;
+ if (isStructType(field)) {
+ size = item.types[field].flatSize;
+ }
+ type.flatSize += size;
+ sizes.push(size);
+ return soFar;
+ });
+ if (dedup(sizes).length == 1) {
+ type.flatFactor = sizes[0];
+ }
+ type.needsFlattening = (this.flatFactor != 1);
+ dprint('types', 'type: ' + type.name_ + ' : ' + JSON.stringify(type.fields));
+ dprint('types', ' has final size of ' + type.flatSize + ', flatting: ' + type.needsFlattening + ' ? ' + (type.flatFactor ? type.flatFactor : JSON.stringify(type.flatIndexes)));
+ });
+ }
+
+ this.forwardItem(item, 'VariableAnalyzer');
+ },
+ });
+
+ // Variable analyzer
+ substrate.addZyme('VariableAnalyzer', {
+ processItem: function(item) {
+ item.functions.forEach(function(func) {
+ func.variables = {};
+
+ // LLVM is SSA, so we always have a single assignment/write. We care about
+ // the reads/other uses.
+ walkJSON(func.lines, function(item) {
+ if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr', 'fastgetelementptrload'].indexOf(item.value.intertype) != -1) {
+ func.variables[item.ident] = {
+ ident: item.ident,
+ type: item.value.type.text,
+ origin: item.value.intertype,
+ uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]),
+ };
+ }
+ });
+
+ for (vname in func.variables) {
+ var variable = func.variables[vname];
+
+ // Whether the value itself is used. For an int, always yes. For a pointer,
+ // we might never use the pointer's value - we might always just store to it /
+ // read from it. If so, then we can optimize away the pointer.
+ variable.hasValueTaken = false;
+ // Whether our address was used. If not, then we do not need to bother with
+ // implementing this variable in a way that other functions can access it.
+ variable.hasAddrTaken = false;
+
+ variable.pointingLevels = pointingLevels(variable.type);
+
+ // Analysis!
+
+ if (variable.pointingLevels > 0) {
+ // Pointers
+ variable.loads = 0;
+ variable.stores = 0;
+
+ func.lines.forEach(function(line) {
+ //print(dump(line))
+ if (line.intertype == 'store' && line.ident == vname) {
+ variable.stores ++;
+ } else if ((line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) ||
+ (line.intertype == 'fastgetelementptrload' && line.ident == vname)) {
+ variable.loads ++;
+ }
+ });
+
+ variable.otherUses = variable.uses - variable.loads - variable.stores;
+ if (variable.otherUses > 0)
+ variable.hasValueTaken = true;
+ }
+
+ // Decision time
+
+ if (variable.origin == 'getelementptr') {
+ // Use our implementation that emulates pointers etc.
+ variable.impl = VAR_EMULATED;
+ } else if ( variable.pointingLevels === 0 && !variable.hasAddrTaken ) {
+ // A simple int value, can be implemented as a native variable
+ variable.impl = VAR_NATIVE;
+ } else if ( variable.pointingLevels === 1 && variable.origin === 'alloca' && !isStructPointerType(variable.type) && !variable.hasAddrTaken && !variable.hasValueTaken ) {
+ // A pointer to a value which is only accessible through this pointer. Basically
+ // a local value on the stack, which nothing fancy is done on. So we can
+ // optimize away the pointing altogether, and just have a native variable
+ variable.impl = VAR_NATIVIZED;
+ } else {
+ variable.impl = VAR_EMULATED;
+ }
+ dprint('vars', '// var ' + vname + ': ' + JSON.stringify(variable));
+ }
+ });
+ this.forwardItem(item, 'Relooper');
+ },
+ });
+
+ // ReLooper - reconstruct nice loops, as much as possible
+ substrate.addZyme('Relooper', {
+ processItem: function(item) {
+ var that = this;
+ function finish() {
+ that.forwardItem(item, 'Optimizer');
+ }
+
+ // Tools
+
+ function replaceLabels(line, labelId, toLabelId) {
+ //print('// XXX replace ' + labelId + ' with ' + toLabelId);
+ function process(item) {
+ ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel', 'defaultLabel'].forEach(function(id) {
+ if (item[id] && item[id] == labelId) {
+ item[id] = toLabelId;
+ //print(' replaced!');
+ }
+ });
+ }
+ if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
+ process(line);
+ } else if (line.intertype == 'switch') {
+ process(line);
+ line.switchLabels.forEach(process);
+ }
+ }
+
+ function replaceLabelLabels(label, labelId, toLabelId) {
+ label.lines.forEach(function(line) { replaceLabels(line, labelId, toLabelId) });
+ return label;
+ }
+
+ function replaceInLabels(labels, toReplace, replaceWith) {
+ //print('// XXX replaceIn ' + toReplace + ' with ' + replaceWith);
+ assertEq(!replaceWith || toReplace.length == 1, true); // TODO: implement other case
+ labels.forEach(function(label) {
+ ['inLabels'].forEach(function(l) {
+ label[l] = label[l].map(function(labelId) { return toReplace.indexOf(labelId) == -1 ? labelId : replaceWith})
+ .filter(function(labelId) { return !!labelId });
+ });
+ });
+ return labels;
+ }
+
+ function calcLabelBranchingData(labels, labelsDict) {
+ item.functions.forEach(function(func) {
+ labels.forEach(function(label) {
+ label.outLabels = [];
+ label.inLabels = [];
+ label.hasReturn = false;
+ label.hasBreak = false;
+ });
+ });
+ // Find direct branchings
+ labels.forEach(function(label) {
+ dprint('find direct branchings at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
+ label.lines.forEach(function(line) {
+ function process(item) {
+ ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel'].forEach(function(id) {
+ if (item[id]) {
+ if (item[id][0] == 'B') { // BREAK, BCONT, BNOPP
+ label.hasBreak = true;
+ } else {
+ label.outLabels.push(item[id]);
+ labelsDict[item[id]].inLabels.push(label.ident);
+ }
+ }
+ });
+ }
+ if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
+ process(line);
+ } else if (line.intertype == 'switch') {
+ process(line);
+ line.switchLabels.forEach(process);
+ }
+
+ label.hasReturn |= line.intertype == 'return';
+ });
+ });
+ // Find all incoming and all outgoing - recursively
+ labels.forEach(function(label) {
+ label.allInLabels = [];
+ label.allOutLabels = [];
+ //! MustGetTo ignores return - |if (x) return; else Y| must get to Y.
+ label.mustGetHere = [];
+ });
+
+ var worked = true;
+ while (worked) {
+ worked = false;
+ labels.forEach(function(label) {
+ //print('zz at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
+ function inout(s, l) {
+ var temp = label[s].slice(0);
+ label[s].forEach(function(label2Id) {
+ temp = temp.concat(labelsDict[label2Id][l]);
+ });
+ temp = dedup(temp);
+ temp.sort();
+ if (JSON.stringify(label[l]) != JSON.stringify(temp)) {
+ //print('zz noticed ' + label.ident + ' ? ' + s + ',' + l + ' : ' + label[s] + ' | ' + label[l]);
+ label[l] = temp;
+ worked = true;
+ }
+ }
+ inout('inLabels', 'allInLabels');
+ inout('outLabels', 'allOutLabels');
+ });
+ }
+
+ // Find all mustGetTos
+ labels.forEach(function(label) {
+ //print('path for: ' + label.ident + ',' + dump(label));
+ function walk(path, label) {
+ //print('path is: ' + getLabelIds(path.concat([label])));
+ // If all we are is a break/return - then stop here. Otherwise, continue to other path
+ //print('??? ' + label.hasReturn + ' : ' + label.hasBreak + ' : ' + label.outLabels.length);
+ if (label.hasReturn || (label.hasBreak && label.outLabels.length == 0)) {
+ //print('path done.');
+ return [path.concat([label])]
+ };
+ if (path.indexOf(label) != -1) {
+ //print('looping path - abort it.');
+ return []; // loop - drop this path
+ }
+ path = path.concat([label]);
+ return label.outLabels.map(function(outLabelId) { return walk(path, labelsDict[outLabelId]) })
+ .reduce(function(a, b) { return a.concat(b) }, [])
+ .filter(function(path) { return path.length > 0 });
+ }
+ var paths = walk([], label).map(function(path) { return getLabelIds(path) });
+ //print('XXX paths: ' + JSON.stringify(paths));
+ var possibles = dedup(paths.reduce(function(a,b) { return a.concat(b) }, []));
+ label.mustGetTo = possibles.filter(function(possible) {
+ return paths.filter(function(path) { return path.indexOf(possible) == -1 }) == 0;
+ }).filter(function(possible) { return possible != label.ident });
+ //print('XXX must get to: ' + JSON.stringify(label.mustGetTo));
+ });
+
+ labels.forEach(function(label) {
+ label.mustGetTo.forEach(function (mustGetTo) {
+ labelsDict[mustGetTo].mustGetHere.push(label.ident);
+ });
+ });
+
+ if (dcheck('labelbranching')) {
+ labels.forEach(function(label) {
+ print('// label: ' + label.ident + ' :out : ' + JSON.stringify(label.outLabels));
+ print('// ' + label.ident + ' :in : ' + JSON.stringify(label.inLabels));
+ print('// ' + label.ident + ' :ALL out : ' + JSON.stringify(label.allOutLabels));
+ print('// ' + label.ident + ' :ALL in : ' + JSON.stringify(label.allInLabels));
+ print('// ZZZZZZ ' + label.ident + ' must get to all of ' + JSON.stringify(label.mustGetTo));
+ });
+ }
+ }
+
+/* // Disabled merging as it seems it just removes a return now and then.
+ function mergeLabels(label1Ident, label2Ident) {
+ var label1 = func.labelsDict[label1Ident];
+ var label2 = func.labelsDict[label2Ident];
+ label1.lines.pop();
+ label1.lines = label1.lines.concat(label2.lines);
+ label1.outLabels = label2.outLabels;
+ label2.lines = null;
+ func.labels = func.labels.filter(function(label) { return !!label.lines });
+print('// zz Merged away! ' + label2.ident + ' into ' + label1.ident);
+ delete func.labelsDict[label2.ident];
+ replaceInLabels(func.labels, [label2.ident], label1.ident);
+ }
+//print('Labels pre merge : ' + getLabelIds(func.labels));
+
+ // Merge trivial labels
+ var worked = true;
+ while (worked) {
+ worked = false;
+ func.labels.forEach(function(label) {
+ if (label.lines === null) return; // We were deleted
+//print("// Consider: " + label.ident + ' : out/in: ' + label.outLabels.length + ',' + label.inLabels.length);// + dump(label.lines.slice(-1)[0]));
+ if (label.outLabels.length == 1 &&
+ label.lines.slice(-1)[0].intertype == 'branch' &&
+// label.lines.slice(-1)[0].label && // nonconditional branch
+ label.lines.slice(-1)[0].label == label.outLabels[0] &&
+ func.labelsDict[label.outLabels[0]].inLabels.length == 1) {
+//print("// and target: " + func.labelsDict[label.outLabels[0]].inLabels);
+ mergeLabels(label.ident, label.outLabels[0]);
+ worked = true;
+ }
+ });
+ }
+*/
+
+ // 'block': A self-enclosed part of the program, for example a loop or an if
+ function makeBlock(labels, entry, labelsDict) {
+ var def = {
+ type: 'emulated', // a block we cannot map to a nicer structure like a loop. We emulate it with a barbaric switch
+ labels: labels,
+ entry: entry,
+ };
+ if (!RELOOP) return def;
+
+ if (!entry) {
+ assertTrue(labels.length == 0);
+ return null; // Empty block - not even an entry
+ }
+
+ function getLastLine(block) {
+ //dprint('get last line at: ' + block.labels[0].ident);
+ if (block.next) return getLastLine(block.next);
+ switch(block.type) {
+ case 'loop':
+ return getLastLine(block.rest);
+ case 'if':
+ case 'breakingif':
+ return getLastLine(block.ifTrue);
+ case 'emulated':
+ case 'simple':
+ if (block.labels.length == 1) {
+ return block.labels[0].lines.slice(-1)[0];
+ } else {
+ //for (var i = 0; i < block.labels.length; i++)
+ //print('Remaining label is at line: ' + block.labels[i].lines[0].lineNum);
+ return null; // throw "Not clear what the last line is."
+ }
+ }
+ }
+ function getAll(fromId, beforeIds) {
+ beforeIds = beforeIds ? beforeIds : [];
+ //print("//getAll : " + fromId + ' : ' + beforeIds);
+ if (beforeIds && beforeIds.indexOf(fromId) != -1) return [];
+ //print("getAll proceeding");
+ var from = labelsDict[fromId];
+ return dedup([from].concat(
+ from.outLabels.map(function(outLabel) { return getAll(outLabel, beforeIds.concat(fromId)) })
+ .reduce(function(a,b) { return a.concat(b) }, [])
+ ), 'ident');
+ }
+ function isolate(label) {
+ label.inLabels = [];
+ label.outLabels = [];
+ return label;
+ }
+ dprint("\n\n// XXX MAKEBLOCK " + entry + ', num labels: ' + labels.length + ' and they are: ' + getLabelIds(labels));
+ if (labels.length == 0 || !entry) {
+ print('//empty labels or entry');
+ return;
+ }
+ function forLabelLines(labels, func) {
+ labels.forEach(function(label) {
+ label.lines.forEach(function(line) { func(line, label) });
+ });
+ }
+
+ // Begin
+
+ calcLabelBranchingData(labels, labelsDict);
+
+ var split = splitter(labels, function(label) { return label.ident == entry });
+ var first = split.splitOut[0];
+ if (!first) {
+ print("//no first line");
+ return;
+ }
+ var others = split.leftIn;
+ var lastLine = first.lines.slice(-1)[0];
+ dprint("// makeBlock " + entry + ' : ' + getLabelIds(labels) + ' IN: ' + first.inLabels + ' OUT: ' + first.outLabels);
+ // If we have one outgoing, and none incoming - make this a block of 1,
+ // and move on the others (forgetting ourself, so they are now also
+ // totally self-enclosed, once we start them)
+ if (first.inLabels.length == 0 && first.outLabels.length == 1) {
+ dprint(' Creating simple emulated');
+ assertEq(lastLine.intertype, 'branch');
+// assertEq(!!lastLine.label, true);
+ return {
+ type: 'simple',
+ labels: [replaceLabelLabels(first, first.outLabels[0], 'BNOPP')],
+ entry: entry,
+ next: makeBlock(replaceInLabels(others, entry), first.outLabels[0], labelsDict),
+ };
+ }
+ //print('// loop ? a');
+ // Looping structures - in some way, we can get back to here
+ if (first.outLabels.length > 0 && first.allInLabels.indexOf(entry) != -1) {
+ //print('// loop ? b');
+ // Look for outsiders - labels no longer capable of getting here. Those must be
+ // outside the loop. Insiders are those that can get back to the entry
+ var split2 = splitter(others, function(label) { return label.allOutLabels.indexOf(entry) == -1 });
+ var outsiders = split2.splitOut;
+ var insiders = split2.leftIn;
+ //print('// potential loop : in/out : ' + getLabelIds(insiders) + ' to ' + getLabelIds(outsiders));
+ // Hopefully exactly one of the outsiders is a 'pivot' - a label to which all those leaving
+ // the loop must go. Then even some |if (falala) { ... break; }| will get ...
+ // as an outsider, but it will actually still be in the loop
+ var pivots =
+ outsiders.filter(function(outsider) {
+ return insiders.filter(function(insider) {
+ return insider.mustGetTo.indexOf(outsider.ident) == -1;
+ }) == 0;
+ });
+ // Find the earliest pivot. They must be staggered, each leading to another,
+ // as all insiders must go through *all* of these. So we seek a pivot that
+ // is never reached by another pivot. That must be the one with fewest
+ // mustGetTo
+ //print("//pivots: " + pivots.length + ',' + JSON.stringify(getLabelIds(pivots)));
+ if (pivots.length >= 1) { // We have ourselves a loop
+ pivots.sort(function(a, b) { return b.mustGetTo.length - a.mustGetTo.length });
+ var pivot = pivots[0];
+ dprint(' Creating LOOP : ' + entry + ' insiders: ' + getLabelIds(insiders) + ' to pivot: ' + pivot.ident);
+ var otherLoopLabels = insiders;
+ var loopLabels = insiders.concat([first]);
+ var nextLabels = outsiders;
+ // Rework branches out of the loop into new 'break' labels
+ forLabelLines(loopLabels, function(line) {
+ replaceLabels(line, pivot.ident, 'BREAK' + entry);
+ });
+ // Rework branches to the inc part of the loop into |continues|
+ forLabelLines(loopLabels, function(line, label) {
+ if (0) {// XXX - make this work :label.outLabels.length == 1 && label.outLabels[0] == entry && !label.hasBreak && !label.hasReturn) {
+ // it can remove unneeded continues (but does too much right now, as the continue might have been
+ // placed into an emulated while(1) switch { }
+ replaceLabels(line, entry, 'BNOPP'); print("// zeezee " + line.lineNum);
+ } else {
+ replaceLabels(line, entry, 'BCONT' + entry);
+ }
+ });
+ // Fix inc branch into rest
+ var nextEntry = null;
+ first.outLabels.forEach(function(outLabel) {
+ if (outLabel != pivot.ident) {
+ replaceLabels(lastLine, outLabel, 'BNOPP');
+ nextEntry = outLabel;
+ }
+ });
+ if (nextEntry == entry) { // We loop back to ourselves, the entire loop is just us
+ nextEntry = null;
+ }
+ var ret = {
+ type: 'loop',
+ labels: loopLabels,
+ entry: entry,
+ inc: makeBlock([isolate(first)], entry, labelsDict),
+ rest: makeBlock(replaceInLabels(otherLoopLabels, entry), nextEntry, labelsDict),
+ };
+ dprint(' getting last line for block starting with ' + entry + ' and nextEntry: ' + nextEntry);
+ var lastLoopLine = getLastLine(nextEntry ? ret.rest : ret.inc);
+ if (lastLoopLine) {
+ replaceLabels(lastLoopLine, 'BCONT' + entry, 'BNOPP'); // Last line will feed back into the loop anyhow
+ }
+ ret.next = makeBlock(replaceInLabels(nextLabels, getLabelIds(loopLabels)), pivot.ident, labelsDict);
+ return ret;
+ }
+ }
+
+ // Try an 'if' structure
+ if (first.outLabels.length == 2) {
+ if (labelsDict[first.outLabels[1]].mustGetTo.indexOf(first.outLabels[0]) != -1) {
+ first.outLabels.push(first.outLabels.shift()); // Reverse order - normalize. Very fast check anyhow
+ }
+ //print('// if? labels are ' + JSON.stringify(first.outLabels));
+ if (labelsDict[first.outLabels[0]].mustGetTo.indexOf(first.outLabels[1]) != -1) {
+ var ifLabelId = first.outLabels[0];
+ var outLabelId = first.outLabels[1];
+ // 0 - the if area. 1 - where we all exit to later
+ var ifTrueLabels = getAll(ifLabelId, [outLabelId]);
+ var ifLabels = ifTrueLabels.concat([first]);
+ var nextLabels = getAll(outLabelId);
+ // If we can get to the outside in more than 2 ways (one from if, one from True clause) - have breaks
+ var breaking = labelsDict[outLabelId].allInLabels.length > 2;
+ dprint(' Creating XXX IF: ' + getLabelIds(ifTrueLabels) + ' to ' + outLabelId + ' ==> ' + getLabelIds(nextLabels) + ' breaking: ' + breaking);
+ //print('// if separation: ' + labels.length + ' = ' + ifLabels.length + ' + ' + nextLabels.length + ' (' + ifTrueLabels.length + ')');
+ if (breaking) {
+ // Rework branches out of the if into new 'break' labels
+ forLabelLines(ifTrueLabels, function(line) {
+ replaceLabels(line, outLabelId, 'BREAK' + entry);
+ });
+ }
+ // Remove branching op - we will do it manually
+ replaceLabels(lastLine, ifLabelId, 'BNOPP');
+ replaceLabels(lastLine, outLabelId, 'BNOPP');
+ return {
+ type: (breaking ? 'breaking' : '') + 'if',
+ labels: ifLabels,
+ entry: entry,
+ ifVar: lastLine.ident,
+ check: makeBlock([isolate(first)], entry, labelsDict),
+ ifTrue: makeBlock(replaceInLabels(ifTrueLabels, entry), ifLabelId, labelsDict),
+ next: makeBlock(replaceInLabels(nextLabels, getLabelIds(ifLabels)), outLabelId, labelsDict),
+ };
+ }
+ }
+
+ // Give up on this structure - emulate it
+ dprint(' Creating complex emulated');
+ return def;
+ }
+
+ // TODO: each of these can be run in parallel
+ item.functions.forEach(function(func) {
+ dprint('relooping', "// relooping function: " + func.ident);
+ func.labelsDict = {};
+ func.labels.forEach(function(label) {
+ func.labelsDict[label.ident] = label;
+ });
+ func.block = makeBlock(func.labels, toNiceIdent('%entry'), func.labelsDict);
+ });
+
+ return finish();
+ },
+ });
+
+ // Optimizer
+ substrate.addZyme('Optimizer', {
+ processItem: function(item) {
+ var that = this;
+ function finish() {
+ item.__finalResult__ = true;
+ return [item];
+ }
+ if (!OPTIMIZE) return finish();
+
+ // Check if a line has side effects *aside* from an explicit assign if it has one
+ function isLineSideEffecting(line) {
+ if (line.intertype == 'assign' && line.value.intertype !== 'call') return false;
+ if (['fastgetelementptrload'].indexOf(line.intertype) != -1) return false;
+ return true;
+ }
+
+ function replaceVars(line, ident, replaceWith) {
+ if (!replaceWith) {
+ print('// Not replacing ' + dump(ident) + ' : ' + dump(replaceWith));
+ return false;
+ }
+ var found = false;
+ // assigns, loads, mathops
+ var POSSIBLE_VARS = ['ident', 'ident2'];
+ for (var i = 0; i < POSSIBLE_VARS.length; i++) {
+ var possible = POSSIBLE_VARS[i];
+ if (line[possible] == ident) {
+ line[possible] = replaceWith;
+ found = true;
+ }
+ if (line.value && line.value[possible] == ident) {
+ line.value[possible] = replaceWith;
+ found = true;
+ }
+ }
+ // getelementptr, call params
+ [line, line.value].forEach(function(element) {
+ if (!element || !element.params) return;
+ var params = element.params;
+ for (var j = 0; j < params.length; j++) {
+ var param = params[j];
+ if (param.intertype == 'value' && param.ident == ident) {
+ param.ident = replaceWith;
+ found = true;
+ }
+ }
+ });
+ return found;
+ }
+
+ // Fast getelementptr loads
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
+ b.intertype == 'assign' && b.value.intertype == 'load' &&
+ a.ident == b.value.ident && func.variables[a.ident].uses == 1) {
+// print("// LOADSUSPECT: " + i + ',' + (i+1) + ':' + a.ident + ':' + b.value.ident);
+ a.intertype = 'fastgetelementptrload';
+ a.ident = b.ident;
+ b.intertype = null;
+ i++;
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // Fast getelementptr stores
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
+ b.intertype == 'store' && b.value.text &&
+ a.ident == b.ident && func.variables[a.ident].uses == 1) {
+ //print("// STORESUSPECT: " + a.lineNum + ',' + b.lineNum);
+ a.intertype = 'fastgetelementptrstore';
+ a.ident = toNiceIdent(b.value.text);
+ b.intertype = null;
+ i++;
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // TODO: Use for all that can
+ function optimizePairs(worker, minSlice, maxSlice) {
+ minSlice = minSlice ? minSlice : 2;
+ maxSlice = maxSlice ? maxSlice : 2;
+ item.functions.forEach(function(func) {
+ func.labels.forEach(function(label) {
+ for (var i = 0; i < label.lines.length-1; i++) {
+ for (var j = i+minSlice-1; j < Math.min(i+maxSlice+1, label.lines.length); j++) {
+ if (worker(func, label.lines.slice(i, j+1))) {
+ i += j-i;
+ break; // stop working on this i
+ }
+ }
+ }
+ });
+ cleanFunc(func);
+ });
+ }
+
+ // Fast bitcast&something after them
+ optimizePairs(function(func, lines) {
+ var a = lines[0], b = lines[1];
+ if (a.intertype == 'assign' && a.value.intertype == 'bitcast' &&
+ func.variables[a.ident].uses == 1 && replaceVars(b, a.ident, a.value.ident)) {
+ a.intertype = null;
+ return true;
+ }
+ });
+
+/*
+ // Remove unnecessary branches
+ item.functions.forEach(function(func) {
+ for (var i = 0; i < func.labels.length-1; i++) {
+ var a = func.labels[i].lines.slice(-1)[0];
+ var b = func.labels[i+1];
+ if (a.intertype == 'branch' && a.label == b.ident) {
+ a.intertype = null;
+ }
+ }
+ cleanFunc(func);
+ });
+*/
+
+ // Remove temp variables around nativized
+ item.functions.forEach(function(func) {
+ // loads, mathops
+ var worked = true;
+ while (worked) {
+ worked = false;
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (a.intertype == 'assign' && a.value.intertype == 'load' &&
+ func.variables[a.value.ident] && // Not global
+ func.variables[a.value.ident].impl === VAR_NATIVIZED) {
+ //print('// ??zzzz ' + dump(a) + ',\n // ??zzbb' + dump(b));
+ // If target is only used on next line - do not need it.
+ if (func.variables[a.ident].uses == 1 &&
+ replaceVars(b, a.ident, a.value.ident)) {
+ a.intertype = null;
+ i ++;
+ worked = true;
+ }
+ }
+ }
+ cleanFunc(func);
+ }
+
+ // stores
+ for (var i = 0; i < func.lines.length-1; i++) {
+ var a = func.lines[i];
+ var b = func.lines[i+1];
+ if (b.intertype == 'store' &&
+ func.variables[b.ident] && // Not global
+ func.variables[b.ident].impl === VAR_NATIVIZED) {
+ // If target is only used on prev line - do not need it.
+ if (func.variables[b.value.ident] && func.variables[b.value.ident].uses == 1 &&
+ ['assign', 'fastgetelementptrload'].indexOf(a.intertype) != -1 && a.ident == b.value.ident) {
+ a.ident = b.ident;
+ a.overrideSSA = true;
+ b.intertype = null;
+ i ++;
+ }
+ }
+ }
+ cleanFunc(func);
+ });
+
+ // Remove redundant vars - SLOW! XXX
+ optimizePairs(function(func, lines) {
+ // a - a line defining a var
+ // b - a line defining a var that is identical to a
+ // c - the only line using b, hopefully
+ var a = lines[0], b = lines[lines.length-2], c = lines[lines.length-1];
+ if (a.intertype == 'assign' && b.intertype == 'assign' &&
+ func.variables[b.ident] && func.variables[b.ident].uses == 1 &&
+ compareTokens(a.value, b.value) &&
+ lines.slice(0,-1).filter(isLineSideEffecting).length == 0 &&
+ replaceVars(c, b.ident, a.ident)) {
+ b.intertype = null;
+ return true;
+ }
+ }, 3, 12);
+
+ return finish();
+ },
+ });
+
+ substrate.addItem({
+ items: data,
+ }, 'Sorter');
+
+ return substrate.solve();
+}
+
diff --git a/src/compiler.js b/src/compiler.js
new file mode 100644
index 00000000..6f514f89
--- /dev/null
+++ b/src/compiler.js
@@ -0,0 +1,44 @@
+// LLVM => JavaScript compiler, main entry point
+
+// Prep - allow this to run in both SpiderMonkey and V8
+if (!this['load']) {
+ load = function(f) { eval(snarf(f)) }
+}
+if (!this['read']) {
+ read = function(f) { snarf(f) }
+}
+
+load('settings.js');
+if (LABEL_DEBUG && RELOOP) throw "Cannot debug labels if they have been relooped!";
+
+load('utility.js');
+load('enzymatic.js');
+load('snippets.js');
+load('parseTools.js');
+load('intertyper.js');
+load('analyzer.js');
+load('jsifier.js');
+
+//===============================
+// Main
+//===============================
+
+// Override settings.js
+var settings = JSON.parse(readline());
+for (setting in settings) {
+ this[setting] = settings[setting];
+}
+
+// Read llvm
+var lines = [];
+var line;
+do {
+ line = readline();
+ if (line == null) break;
+ lines.push(line);
+} while(true);
+var data = lines.join("\n");
+
+// Do it
+print(JSify(analyzer(intertyper(data))));
+
diff --git a/src/intertyper.js b/src/intertyper.js
new file mode 100644
index 00000000..aeae6c7b
--- /dev/null
+++ b/src/intertyper.js
@@ -0,0 +1,615 @@
+// llvm => internal intermediate representation
+
+function intertyper(data) {
+ // Substrate
+
+ substrate = new Substrate('Intertyper');
+
+ // Line splitter.
+ substrate.addZyme('LineSplitter', {
+ processItem: function(item) {
+ var lines = item.llvmText.split('\n');
+ var ret = [];
+ var inContinual = false;
+ for (var i = 0; i < lines.length; i++) {
+ var line = lines[i];
+ if (inContinual || new RegExp(/^\ +to.*/g).test(line)) {
+ // to after invoke
+ ret.slice(-1)[0].lineText += line;
+ if (new RegExp(/^\ +\]/g).test(line)) { // end of llvm switch
+ inContinual = false;
+ }
+ } else {
+ ret.push({
+ lineText: line,
+ lineNum: i + 1,
+ });
+ if (new RegExp(/^\ +switch\ .*/g).test(line)) {
+ // beginning of llvm switch
+ inContinual = true;
+ }
+ }
+ }
+ this.forwardItems(ret.filter(function(item) { return item.lineText; }), 'Tokenizer');
+ },
+ });
+
+ // Line tokenizer
+ substrate.addZyme('Tokenizer', {
+ processItem: function(item, inner) {
+ var lineText = item.lineText + " ";
+ var tokens = [];
+ var tokenStart = -1;
+ var indent = -1;
+ var quotes = 0;
+ var lastToken = null;
+ var i = 0;
+ // Note: '{' is not an encloser, as its use in functions is split over many lines
+ var enclosers = {
+ '[': 0,
+ ']': '[',
+ '(': 0,
+ ')': '(',
+ '<': 0,
+ '>': '<',
+ };
+ function notEnclosed() {
+ if (enclosers['['] > 0 || enclosers['('] > 0 || enclosers['<'] > 0)
+ return false;
+ return true;
+ }
+ var that = this;
+ function tryStartToken() {
+ if (tokenStart == -1 && notEnclosed() && quotes == 0) {
+ //print("try START " + tokenStart + ',' + JSON.stringify(enclosers));
+ tokenStart = i;
+ }
+ }
+ function tryFinishToken(includeThis) {
+ if (tokenStart >= 0 && notEnclosed() && quotes == 0) {
+ //print("try finish " + tokenStart + ',' + JSON.stringify(enclosers));
+ var token = {
+ text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)),
+ };
+ if (token.text[0] in enclosers) {
+ token.item = that.processItem({
+ lineText: token.text.substr(1, token.text.length-2)
+ }, true);
+ token.type = token.text[0];
+ }
+ if (indent == -1) {
+ indent = tokenStart;
+ }
+ // merge certain tokens
+ if ( (lastToken && lastToken.text == '%' && token.text[0] == '"' ) ||
+ (lastToken && token.text.replace(/\*/g, '') == '') ) {
+ lastToken.text += token.text;
+ } else if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
+ lastToken.text += ' ' + token.text;
+ } else if (lastToken && token.text[token.text.length-1] == '}') {
+ var openBrace = tokens.length-1;
+ while (tokens[openBrace].text != '{') openBrace --;
+ token = combineTokens(tokens.slice(openBrace+1));
+ tokens.splice(openBrace, tokens.length-openBrace+1);
+ tokens.push(token);
+ token.type = '{';
+ lastToken = token;
+ } else {
+ tokens.push(token);
+ lastToken = token;
+ }
+ // print("new token: " + dump(lastToken));
+ tokenStart = -1;
+ }
+ }
+ for (; i < lineText.length; i++) {
+ var letter = lineText[i];
+ //print("letter: " + letter);
+ switch (letter) {
+ case ' ':
+ tryFinishToken();
+ break;
+ case '"':
+ tryFinishToken();
+ tryStartToken();
+ quotes = 1-quotes;
+ break;
+ case ',':
+ tryFinishToken();
+ if (notEnclosed() && quotes == 0) {
+ tokens.push({ text: ',' });
+ }
+ break;
+ default:
+ if (letter in enclosers && quotes == 0) {
+ if (typeof enclosers[letter] === 'number') {
+ tryFinishToken();
+ tryStartToken();
+ enclosers[letter]++;
+ } else {
+ enclosers[enclosers[letter]]--;
+ tryFinishToken(true);
+ }
+ //print(' post-enclosers: ' + JSON.stringify(enclosers));
+ } else {
+ tryStartToken();
+ }
+ }
+ }
+ var item = {
+ tokens: tokens,
+ indent: indent,
+ lineNum: item.lineNum,
+ };
+ if (inner) {
+ return [item];
+ } else {
+ this.forwardItem(item, 'Triager');
+ }
+ },
+ });
+
+ substrate.addZyme('Triager', {
+ processItem: function(item) {
+ function triage() {
+ if (!item.intertype) {
+ if (item.tokens[0].text in searchable(';', 'target'))
+ return '/dev/null';
+ if (item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=')
+ return 'Global';
+ if (item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' &&
+ item.tokens.slice(-1)[0].text == '{')
+ return 'FuncHeader';
+ if (item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':')
+ return 'Label';
+ if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 &&
+ !item.intertype)
+ return 'Assign';
+ if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load')
+ return 'Load';
+ if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast')
+ return 'Bitcast';
+ if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr')
+ return 'GEP';
+ if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype)
+ return 'Call';
+ if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype)
+ return 'Invoke';
+ if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca')
+ return 'Alloca';
+ if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'phi')
+ return 'Phi';
+ if (item.indent === -1 && item.tokens && item.tokens.length >= 3 &&
+ ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr'].indexOf(item.tokens[0].text) != -1 && !item.intertype)
+ return 'Mathops';
+ if (item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' &&
+ !item.intertype)
+ return 'Store';
+ if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' &&
+ !item.intertype)
+ return 'Branch';
+ if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' &&
+ !item.intertype)
+ return 'Return';
+ if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'switch' &&
+ !item.intertype)
+ return 'Switch';
+ if (item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype)
+ return 'FuncEnd';
+ if (item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' &&
+ !item.intertype)
+ return 'External';
+ if (item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' &&
+ !item.intertype)
+ return 'Unreachable';
+ } else {
+ // Already intertyped
+ if (item.parentSlot)
+ return 'Reintegrator';
+ }
+ throw 'Invalid token, cannot triage: ' + dump(item);
+ }
+ this.forwardItem(item, triage(item));
+ },
+ });
+
+ // Line parsers to intermediate form
+
+ // globals: type or variable
+ substrate.addZyme('Global', {
+ processItem: function(item) {
+ if (item.tokens[2].text == 'type') {
+ //dprint('type/const linenum: ' + item.lineNum + ':' + dump(item));
+ var fields = [];
+ if (item.tokens[3].text != 'opaque') {
+ if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
+ item.tokens[3] = item.tokens[3].item[0];
+ }
+ var subTokens = item.tokens[3].tokens;
+ subTokens.push({text:','});
+ while (subTokens[0]) {
+ var stop = 1;
+ while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
+ fields.push(combineTokens(subTokens.slice(0, stop)).text);
+ subTokens.splice(0, stop+1);
+ }
+ }
+ return [{
+ __result__: true, // XXX can remove these
+ intertype: 'type',
+ name_: item.tokens[0].text,
+ fields: fields,
+ lineNum: item.lineNum,
+ }]
+ } else {
+ // variable
+ var ident = item.tokens[0].text;
+ while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 })
+ item.tokens.splice(2, 1);
+ var ret = {
+ __result__: true,
+ intertype: 'globalVariable',
+ ident: ident,
+ type: item.tokens[2],
+ lineNum: item.lineNum,
+ };
+ if (ident == '@llvm.global_ctors') {
+ ret.ctors = [];
+ var subTokens = item.tokens[3].item[0].tokens;
+ splitTokenList(subTokens).forEach(function(segment) {
+ ret.ctors.push(segment[1].tokens.slice(-1)[0].text);
+ });
+ } else {
+ if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
+ item.tokens[3] = item.tokens[3].item[0].tokens;
+ }
+
+ if (item.tokens[3].text == 'c')
+ item.tokens.splice(3, 1);
+ ret.value = item.tokens[3];
+ }
+ return [ret];
+ }
+ },
+ });
+ // function header
+ substrate.addZyme('FuncHeader', {
+ processItem: function(item) {
+ item.tokens = item.tokens.filter(function(token) {
+ return ['internal', 'signext', 'zeroext', 'nounwind', 'define', 'linkonce_odr', 'inlinehint', '{'].indexOf(token.text) == -1;
+ });
+ return [{
+ __result__: true,
+ intertype: 'function',
+ ident: item.tokens[1].text,
+ returnType: item.tokens[0],
+ params: item.tokens[2],
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // label
+ substrate.addZyme('Label', {
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'label',
+ ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1),
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+
+ // assignment
+ substrate.addZyme('Assign', {
+ processItem: function(item) {
+ var opIndex = findTokenText(item, '=');
+ var pair = splitItem({
+ intertype: 'assign',
+ ident: combineTokens(item.tokens.slice(0, opIndex)).text,
+ lineNum: item.lineNum,
+ }, 'value');
+ this.forwardItem(pair.parent, 'Reintegrator');
+ this.forwardItem(mergeInto(pair.child, { // Additional token, to be triaged and later re-integrated
+ indent: -1,
+ tokens: item.tokens.slice(opIndex+1),
+ }), 'Triager');
+ },
+ });
+ // reintegration - find intermediate representation-parsed items and
+ // place back in parents TODO: Optimize this code to optimal O(..)
+ substrate.addZyme('Reintegrator', makeReintegrator(function(parent, child) {
+ // Special re-integration behaviors
+ if (child.intertype == 'fastgetelementptrload') {
+ parent.intertype = 'fastgetelementptrload';
+ }
+ this.forwardItem(parent, '/dev/stdout');
+ }));
+
+ // 'load'
+ substrate.addZyme('Load', {
+ processItem: function(item) {
+ item.pointerType = item.tokens[1];
+ item.type = { text: removePointing(item.pointerType.text) };
+ if (item.tokens[2].text == 'getelementptr') {
+ var last = getTokenIndexByText(item.tokens, ';');
+ var gepTokens = item.tokens.slice(1, last); // without 'load'
+ var segment = [ gepTokens[2], gepTokens[0], null ].concat(gepTokens.slice(3));
+ var data = parseGetElementPtr(segment);
+ item.intertype = 'fastgetelementptrload';
+ item.type = data.type;
+ item.params = data.params;
+ item.pointer = { text: data.ident };
+ item.value = data.value;
+ } else {
+ item.intertype = 'load';
+ if (item.tokens[2].text == 'bitcast') {
+ item.pointer = item.tokens[3].item[0].tokens[1];
+ item.originalType = item.tokens[3].item[0].tokens[0];
+ } else {
+ item.pointer = item.tokens[2];
+ }
+ }
+ item.ident = item.pointer.text;
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'bitcast'
+ substrate.addZyme('Bitcast', {
+ processItem: function(item) {
+ item.intertype = 'bitcast';
+ item.type = item.tokens[1];
+ item.ident = item.tokens[2].text;
+ item.type2 = item.tokens[4];
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'getelementptr'
+ substrate.addZyme('GEP', {
+ processItem: function(item) {
+ var last = getTokenIndexByText(item.tokens, ';');
+ var segment = [ item.tokens[1], { text: null }, null, { item: [ {
+ tokens: item.tokens.slice(2, last)
+ } ] } ];
+ var data = parseGetElementPtr(segment);
+ item.intertype = 'getelementptr';
+ item.type = data.type;
+ item.params = data.params;
+ item.ident = data.ident;
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'call'
+ substrate.addZyme('Call', {
+ processItem: function(item) {
+ item.intertype = 'call';
+ if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) {
+ item.tokens.splice(1, 1);
+ }
+ item.type = item.tokens[1];
+ item.functionType = '';
+ while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
+ item.functionType += item.tokens[2].text;
+ item.tokens.splice(2, 1);
+ }
+ item.ident = item.tokens[2].text;
+ if (item.ident.substr(-2) == '()') {
+ // See comment in isStructType()
+ item.ident = item.ident.substr(0, item.ident.length-2);
+ // Also, we remove some spaces which might occur.
+ while (item.ident[item.ident.length-1] == ' ') {
+ item.ident = item.ident.substr(0, item.ident.length-1);
+ }
+ item.params = [];
+ } else {
+ item.params = parseParamTokens(item.tokens[3].item[0].tokens);
+ }
+ if (item.indent == 2) {
+ // standalone call - not in assign
+ item.standalone = true;
+ item.__result__ = true;
+ return [item];
+ }
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'invoke'
+ substrate.addZyme('Invoke', {
+ processItem: function(item) {
+ item.intertype = 'invoke';
+ item.type = item.tokens[1];
+ item.functionType = '';
+ while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
+ item.functionType += item.tokens[2].text;
+ item.tokens.splice(2, 1);
+ }
+ cleanOutTokens(['alignstack', 'alwaysinline', 'inlinehint', 'naked', 'noimplicitfloat', 'noinline', 'alwaysinline attribute.', 'noredzone', 'noreturn', 'nounwind', 'optsize', 'readnone', 'readonly', 'ssp', 'sspreq'], item.tokens, 4);
+ item.ident = item.tokens[2].text;
+ item.params = parseParamTokens(item.tokens[3].item[0].tokens);
+ item.toLabel = toNiceIdent(item.tokens[6].text);
+ item.unwindLabel = toNiceIdent(item.tokens[9].text);
+ if (item.indent == 2) {
+ // standalone call - not in assign
+ item.standalone = true;
+ item.__result__ = true;
+ return [item];
+ }
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'alloca'
+ substrate.addZyme('Alloca', {
+ processItem: function(item) {
+ item.intertype = 'alloca';
+ item.allocatedType = item.tokens[1];
+ item.type = { text: addPointing(item.tokens[1].text) }; // type of pointer we will get
+ item.type2 = { text: item.tokens[1].text }; // value we will create, and get a pointer to
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'phi'
+ substrate.addZyme('Phi', {
+ processItem: function(item) {
+ item.intertype = 'phi';
+ item.type = { text: item.tokens[1].text }
+ item.label1 = item.tokens[2].item[0].tokens[2].text;
+ item.value1 = item.tokens[2].item[0].tokens[0].text;
+ item.label2 = item.tokens[4].item[0].tokens[2].text;
+ item.value2 = item.tokens[4].item[0].tokens[0].text;
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // mathops
+ substrate.addZyme('Mathops', {
+ processItem: function(item) {
+ item.intertype = 'mathop';
+ item.op = item.tokens[0].text;
+ item.variant = null;
+ if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1);
+ if (['icmp', 'fcmp'].indexOf(item.op) != -1) {
+ item.variant = item.tokens[1].text;
+ item.tokens.splice(1, 1);
+ }
+ item.type = item.tokens[1];
+ item.ident = item.tokens[2].text;
+ item.ident2 = item.tokens[4].text;
+ item.ident3 = item.tokens[5] ? item.tokens[5].text : null;
+ item.ident4 = item.tokens[8] ? item.tokens[8].text : null;
+ dprint('mathop', item.op + ',' + item.variant + ',' + item.ident + ',' + item.value);
+ this.forwardItem(item, 'Reintegrator');
+ },
+ });
+ // 'store'
+ substrate.addZyme('Store', {
+ processItem: function(item) {
+ if (item.tokens[3].text != ',') {
+ assertEq(item.tokens[2].text, 'getelementptr');
+ // complex input - likely getelementptr
+ var commaIndex = 4;
+ while (item.tokens[commaIndex].text != ',') commaIndex ++;
+ return [{
+ __result__: true,
+ intertype: 'store',
+ valueType: item.tokens[1],
+ value: parseGetElementPtr(item.tokens.slice(1, commaIndex)),
+ pointerType: item.tokens[commaIndex+1],
+ pointer: item.tokens[commaIndex+2],
+ ident: item.tokens[commaIndex+2].text,
+ lineNum: item.lineNum,
+ }];
+ }
+ return [{
+ __result__: true,
+ intertype: 'store',
+ valueType: item.tokens[1],
+ value: addIdent(item.tokens[2]),
+ pointerType: item.tokens[4],
+ pointer: item.tokens[5],
+ ident: item.tokens[5].text,
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // 'br'
+ substrate.addZyme('Branch', {
+ processItem: function(item) {
+ if (item.tokens[1].text == 'label') {
+ return [{
+ __result__: true,
+ intertype: 'branch',
+ label: toNiceIdent(item.tokens[2].text),
+ lineNum: item.lineNum,
+ }];
+ } else {
+ return [{
+ __result__: true,
+ intertype: 'branch',
+ ident: item.tokens[2].text,
+ labelTrue: toNiceIdent(item.tokens[5].text),
+ labelFalse: toNiceIdent(item.tokens[8].text),
+ lineNum: item.lineNum,
+ }];
+ }
+ },
+ });
+ // 'ret'
+ substrate.addZyme('Return', {
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'return',
+ type: item.tokens[1].text,
+ value: item.tokens[2] ? item.tokens[2].text : null,
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // 'switch'
+ substrate.addZyme('Switch', {
+ processItem: function(item) {
+ function parseSwitchLabels(item) {
+ var ret = [];
+ var tokens = item.item[0].tokens;
+ while (tokens.length > 0) {
+ ret.push({
+ value: tokens[1].text,
+ label: toNiceIdent(tokens[4].text),
+ });
+ tokens = tokens.slice(5);
+ }
+ return ret;
+ }
+ return [{
+ __result__: true,
+ intertype: 'switch',
+ type: item.tokens[1].text,
+ ident: item.tokens[2].text,
+ defaultLabel: item.tokens[5].text,
+ switchLabels: parseSwitchLabels(item.tokens[6]),
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // function end
+ substrate.addZyme('FuncEnd', {
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'functionEnd',
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // external function stub
+ substrate.addZyme('External', {
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'functionStub',
+ ident: item.tokens[2].text,
+ returnType: item.tokens[1],
+ params: item.tokens[3],
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // 'unreachable'
+ substrate.addZyme('Unreachable', {
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'unreachable',
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+
+ // Input
+
+ substrate.addItem({
+ llvmText: data,
+ }, 'LineSplitter');
+
+ return substrate.solve();
+}
+
diff --git a/src/jsifier.js b/src/jsifier.js
new file mode 100644
index 00000000..e99917e7
--- /dev/null
+++ b/src/jsifier.js
@@ -0,0 +1,663 @@
+// Convert analyzed data to javascript
+
+function JSify(data) {
+ substrate = new Substrate('JSifyer');
+
+ var TYPES = data.types;
+
+ // type
+ substrate.addZyme('Type', {
+ processItem: function(item) {
+ var type = TYPES[item.name_];
+ if (type.needsFlattening) {
+ item.JS = 'var ' + toNiceIdent(item.name_) + '___FLATTENER = ' + JSON.stringify(TYPES[item.name_].flatIndexes) + ';';
+ } else {
+ item.JS = '// type: ' + item.name_;
+ }
+ item.__result__ = true;
+ return [item];
+ },
+ });
+
+ function makePointer(slab, pos) {
+ // XXX hardcoded ptr impl
+ if (slab == 'HEAP') return pos;
+ if (slab[0] != '[') {
+ slab = '[' + slab + ']';
+ }
+ return 'Pointer_make(' + slab + ', ' + (pos ? pos : 0) + ')';
+ // return '{ slab: ' + slab + ', pos: ' + (pos ? pos : 0) + ' }';
+ // return '[' + slab + ', ' + (pos ? pos : 0) + ']';
+ }
+
+ function makeGetSlab(ptr) {
+ // XXX hardcoded ptr impl
+ // return ptr + '.slab';
+ return 'HEAP';
+ }
+
+ function makeGetPos(ptr) {
+ // XXX hardcoded ptr impl
+ // return ptr + '.pos';
+ return ptr;
+ }
+
+ function makeGetValue(ptr, pos, noNeedFirst) {
+ return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + ']';
+ }
+
+ function makeSetValue(ptr, pos, value, noNeedFirst) {
+ if (SAFE_HEAP) {
+ return 'SAFE_HEAP_STORE(' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + ', ' + value + ')';
+ } else {
+ return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + '] = ' + value;
+ }
+ }
+
+ function makeEmptyStruct(type) {
+ dprint('types', '??makeemptystruct?? ' + dump(type));
+ // XXX hardcoded ptr impl
+ var ret = [];
+ var typeData = TYPES[type];
+ assertTrue(typeData);
+ for (var i = 0; i < typeData.flatSize; i++) {
+ ret.push(0);
+ }
+ return ret;
+ }
+
+ // Gets an entire constant expression
+ function parseConst(value, type) {
+ dprint('gconst', '//yyyyy ' + JSON.stringify(value) + ',' + type + '\n');
+ if (isNumberType(type) || pointingLevels(type) == 1) {
+ return makePointer(parseNumerical(value.text));
+ } else if (value.text == 'zeroinitializer') {
+ return makePointer(JSON.stringify(makeEmptyStruct(type)));
+ } else if (value.text && value.text[0] == '"') {
+ value.text = value.text.substr(1, value.text.length-2);
+ return makePointer(JSON.stringify(parseLLVMString(value.text)) + ' /* ' + value.text + '*/');
+ } else {
+ // Gets an array of constant items, separated by ',' tokens
+ function handleSegments(tokens) {
+ // Handle a single segment (after comma separation)
+ function handleSegment(segment) {
+ dprint('gconst', '// seggg: ' + JSON.stringify(segment) + '\n' + '\n')
+ if (segment[1].text == 'null') {
+ return '0';
+ } else if (segment[1].text == 'zeroinitializer') {
+ return JSON.stringify(makeEmptyStruct(segment[0].text));
+ } else if (segment[1].text == 'getelementptr') {
+ return finalizeGetElementPtr(parseGetElementPtr(segment));
+ } else if (segment[1].text in searchable('bitcast', 'inttoptr', 'ptrtoint')) {
+ var type = segment[2].item[0].tokens.slice(-1)[0].text; // TODO: Use this?
+ return handleSegment(segment[2].item[0].tokens.slice(0, -2));
+ } else if (segment[1].text == 'add') {
+ var subSegments = splitTokenList(segment[2].item[0].tokens);
+ return '(' + handleSegment(subSegments[0]) + ' + ' + handleSegment(subSegments[1]) + ')';
+ } else if (segment[1].type == '{') {
+ return '[' + handleSegments(segment[1].tokens) + ']';
+ } else if (segment.length == 2) {
+ return parseNumerical(toNiceIdent(segment[1].text));
+ } else {
+ throw 'Invalid segment: ' + dump(segment);
+ }
+ };
+ return splitTokenList(tokens).map(handleSegment).map(parseNumerical).join(', ');
+ }
+ if (value.item) {
+ // list of items
+ return makePointer('[ ' + handleSegments(value.item[0].tokens) + ' ]');
+ } else if (value.type == '{') {
+ // struct
+ return makePointer('[ ' + handleSegments(value.tokens) + ' ]');
+ } else if (value[0]) {
+ return makePointer('[ ' + handleSegments(value[0].tokens) + ' ]');
+ } else {
+ throw '// failzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value);
+ }
+ }
+ }
+
+ // globalVariablw
+ substrate.addZyme('GlobalVariable', {
+ processItem: function(item) {
+ dprint('gconst', '// zz global Cons: ' + dump(item) + ' :: ' + dump(item.value));
+ if (item.ident == '_llvm_global_ctors') {
+ item.JS = '\n__globalConstructor__ = function() {\n' +
+ item.ctors.map(function(ctor) { return ' ' + toNiceIdent(ctor) + '();' }).join('\n') +
+ '\n}\n';
+ } else if (item.type.text == 'external') {
+ item.JS = 'var ' + item.ident + ' = ' + '0; /* external value? */';
+ } else {
+ // GETTER - lazy loading, fixes issues with ordering.
+ item.JS = 'this.__defineGetter__("' + item.ident + '", function() { delete ' + item.ident + '; ' + item.ident + ' = ' + parseConst(item.value, item.type.text) + '; return ' + item.ident + ' });';
+ }
+ item.__result__ = true;
+ return [item];
+ },
+ });
+
+ // functionStub
+ substrate.addZyme('FunctionStub', {
+ processItem: function(item) {
+ var shortident = item.ident.substr(1);
+ if (shortident in Snippets) {
+ item.JS = item.ident + ' = ' + Snippets[shortident].toString();
+ } else {
+ item.JS = '// stub for ' + item.ident;
+ }
+ item.__result__ = true;
+ return [item];
+ },
+ });
+
+ // function splitter
+ substrate.addZyme('FunctionSplitter', {
+ processItem: function(item) {
+ var ret = [item];
+ item.splitItems = 0;
+ item.labels.forEach(function(label) {
+ label.lines.forEach(function(line) {
+ line.func = item.ident;
+ line.funcData = item;
+ line.parentLabel = label.ident;
+ ret.push(line);
+ item.splitItems ++;
+ });
+ });
+
+ this.forwardItems(ret, 'FuncLineTriager');
+ },
+ });
+ // function reconstructor & post-JS optimizer
+ substrate.addZyme('FunctionReconstructor', {
+ funcs: {},
+ seen: {},
+ processItem: function(item) {
+ if (this.seen[item.__uid__]) return;
+ if (item.intertype == 'function') {
+ this.funcs[item.ident] = item;
+ item.relines = {};
+ this.seen[item.__uid__] = true;
+ return;
+ }
+ var line = item;
+ var func = this.funcs[line.func];
+ if (!func) return;
+
+ // Re-insert our line
+ this.seen[item.__uid__] = true;
+ var label = func.labels.filter(function(label) { return label.ident == line.parentLabel })[0];
+ label.lines = label.lines.map(function(line2) {
+ return (line2.lineNum !== line.lineNum) ? line2 : line;
+ });
+ func.splitItems --;
+ // OLD delete line.funcData; // clean up
+ if (func.splitItems > 0) return;
+
+ // We have this function all reconstructed, go and finalize it's JS!
+
+ var hasVarArgs = false, hasPhi = false;
+ var params = parseParamTokens(func.params.item[0].tokens).map(function(param) {
+ hasPhi = hasPhi || param.intertype == 'phi';
+ if (param.intertype == 'varargs') {
+ hasVarArgs = true;
+ return null;
+ }
+ return toNiceIdent(param.ident);
+ }).filter(function(param) { return param != null });;
+
+ func.JS = '\nfunction ' + func.ident + '(' + params.join(', ') + ') {\n';
+ if (LABEL_DEBUG) func.JS += " print(INDENT + ' Entering: " + func.ident + "'); INDENT += ' ';\n";
+
+ if (hasVarArgs) {
+ func.JS += ' __numArgs__ = ' + params.length + ';\n';
+ }
+
+ // Walk function blocks and generate JS
+ function walkBlock(block, indent) {
+ if (!block) return '';
+ function getLabelLines(label, indent) {
+ var ret = '';
+ if (LABEL_DEBUG) {
+ ret += indent + " print(INDENT + '" + func.ident + ":" + label.ident + "');\n";
+ }
+ return ret + label.lines.map(function(line) { return indent + line.JS + (line.comment ? ' // ' + line.comment : '') }).join('\n');
+ }
+ var ret = '';
+ if (block.type == 'emulated' || block.type == 'simple') {
+ if (block.labels.length > 1) {
+ ret += indent + 'var __label__ = ' + getLabelId(block.entry) + '; /* ' + block.entry + ' */\n';
+ ret += indent + 'while(1) switch(__label__) {\n';
+ ret += block.labels.map(function(label) {
+ return indent + ' case ' + getLabelId(label.ident) + ': // ' + label.ident + '\n'
+ + getLabelLines(label, indent + ' ');
+ }).join('\n');
+ ret += '\n' + indent + '}';
+ } else {
+ ret += getLabelLines(block.labels[0], indent);
+ }
+ ret += '\n';
+ } else if (block.type == 'loop') {
+// if (mustGetTo(first.outLabels[0], [first.ident, first.outLabels[1]])) { /* left branch must return here, or go to right branch */
+ ret += indent + block.entry + ': while(1) {\n';
+ ret += walkBlock(block.inc, indent + ' ');
+ ret += walkBlock(block.rest, indent + ' ');
+ ret += indent + '}\n';
+ } else if (block.type == 'breakingif') {
+ ret += walkBlock(block.check, indent);
+ ret += indent + block.entry + ': do { if (' + block.ifVar + ') {\n';
+ ret += walkBlock(block.ifTrue, indent + ' ');
+ ret += indent + '} } while(0);\n';
+ } else if (block.type == 'if') {
+ ret += walkBlock(block.check, indent);
+ ret += indent + 'if (' + block.ifVar + ') {\n';
+ ret += walkBlock(block.ifTrue, indent + ' ');
+ ret += indent + '}\n';
+ } else {
+ ret = 'XXXXXXXXX!';
+ }
+ return ret + walkBlock(block.next, indent);
+ }
+ func.JS += walkBlock(func.block, ' ');
+ // Finalize function
+ if (LABEL_DEBUG) func.JS += " INDENT = INDENT.substr(0, INDENT.length-2);\n";
+ func.JS += '}\n';
+ func.__result__ = true;
+ return func;
+ },
+ });
+
+ function getVarData(funcData, ident) {
+ if (funcData.variables[ident]) {
+ return funcData.variables[ident].impl;
+ } else {
+ return 'emulated'; // All global are emulated
+ }
+ }
+
+ substrate.addZyme('FuncLineTriager', {
+ processItem: function(item) {
+ if (item.intertype == 'function') {
+ this.forwardItem(item, 'FunctionReconstructor');
+ } else if (item.JS) {
+ if (item.parentLineNum) {
+ this.forwardItem(item, 'AssignReintegrator');
+ } else {
+ this.forwardItem(item, 'FunctionReconstructor');
+ }
+ } else {
+ this.forwardItem(item, 'Intertype:' + item.intertype);
+ }
+ },
+ });
+
+ // assignment
+ substrate.addZyme('Intertype:assign', {
+ processItem: function(item) {
+ var pair = splitItem(item, 'value', ['funcData']);
+ this.forwardItem(pair.parent, 'AssignReintegrator');
+ this.forwardItem(pair.child, 'FuncLineTriager');
+ },
+ });
+ substrate.addZyme('AssignReintegrator', makeReintegrator(function(item, child) {
+ // 'var', since this is SSA - first assignment is the only assignment, and where it is defined
+ item.JS = (item.overrideSSA ? '' : 'var ') + toNiceIdent(item.ident);
+
+ var type = item.value.type.text;
+ var value = parseNumerical(item.value.JS);
+ //print("zz var: " + item.JS);
+ var impl = getVarData(item.funcData, item.ident);
+ switch (impl) {
+ case VAR_NATIVE: {
+ break;
+ }
+ case VAR_NATIVIZED: {
+ // SSA, so this must be the alloca. No need for a value
+ if (!item.overrideSSA) value = '';
+ break;
+ }
+ case VAR_EMULATED: {
+ break;
+ }
+ default: print('zz unknown impl: ' + impl);
+ }
+ if (value)
+ item.JS += ' = ' + value;
+ item.JS += ';';
+
+ this.forwardItem(item, 'FunctionReconstructor');
+ }));
+
+ // Function lines
+ function makeFuncLineZyme(intertype, func) {
+ substrate.addZyme('Intertype:' + intertype, {
+ processItem: function(item) {
+ item.JS = func(item);
+ if (!item.JS) throw "XXX - no JS generated for " + dump(item);
+ this.forwardItem(item, 'FuncLineTriager');
+ },
+ });
+ }
+ makeFuncLineZyme('store', function(item) {
+ //print('// zzqqzz ' + dump(item.value) + ' :::: ' + dump(item.pointer) + ' :::: ');
+ var ident = toNiceIdent(item.ident);
+ var value;
+ if (item.value.intertype == 'getelementptr') {
+ value = finalizeGetElementPtr(item.value);
+ } else {
+ value = toNiceIdent(item.value.ident);
+ }
+ if (pointingLevels(item.pointerType.text) == 1) {
+ value = parseNumerical(value, removePointing(item.pointerType.text));
+ }
+ //print("// zz seek " + ident + ',' + dump(item));
+ var impl = getVarData(item.funcData, item.ident);
+ var ret;
+ switch (impl) {
+ case VAR_NATIVIZED: ret = ident + ' = ' + value + ';'; break; // We have the actual value here
+ case VAR_EMULATED: ret = makeSetValue(ident, 0, value) + ';'; break;
+ default: print('zz unknown [store] impl: ' + impl);
+ }
+/*
+ if (LINEDEBUG && value) {
+ ret += '\nprint(INDENT + "' + ident + ' == " + JSON.stringify(' + ident + '));';
+ ret += '\nprint(INDENT + "' + ident + ' == " + (' + ident + ' && ' + ident + '.toString ? ' + ident + '.toString() : ' + ident + '));';
+ }
+*/
+ return ret;
+ });
+
+ var LABEL_IDs = {};
+ var LABEL_ID_COUNTER = 0;
+ function getLabelId(label) {
+ //print('needs id: ' + label + ' : ' + JSON.stringify(LABEL_IDs));
+ label = toNiceIdent(label);
+ if (label in LABEL_IDs) return LABEL_IDs[label];
+ return LABEL_IDs[label] = LABEL_ID_COUNTER ++;
+ }
+
+ function makeBranch(label) {
+ if (label[0] == 'B') {
+ if (label[1] == 'R') {
+ return 'break ' + label.substr(5) + ';';
+ } else if (label[1] == 'C') {
+ return 'continue ' + label.substr(5) + ';';
+ } else { // NOPP
+ return ';'; // Returning no text might confuse this parser
+ }
+ } else {
+ // FIXME: We should use __lastLabel__ only if we actually need
+ // it, which is in the case of phi being used on the label.
+ return '__lastLabel__ = __label__; __label__ = ' + getLabelId(label) + '; break;';
+ }
+ }
+
+ makeFuncLineZyme('branch', function(item) {
+ //print('branch: ' + dump(item));
+ if (!item.ident) {
+ return makeBranch(item.label);
+ } else {
+ var labelTrue = makeBranch(item.labelTrue);
+ var labelFalse = makeBranch(item.labelFalse);
+ if (labelTrue == ';' && labelFalse == ';') return ';';
+ var head = 'if (' + item.ident + ') { ';
+ var head2 = 'if (!(' + item.ident + ')) { ';
+ var else_ = ' } else { ';
+ var tail = ' }';
+ if (labelTrue == ';') {
+ return head2 + labelFalse + tail;
+ } else if (labelFalse == ';') {
+ return head + labelTrue + tail;
+ } else {
+ return head + labelTrue + else_ + labelFalse + tail;
+ }
+ }
+ });
+ makeFuncLineZyme('switch', function(item) {
+ var ret = '';
+ var first = true;
+ item.switchLabels.forEach(function(switchLabel) {
+ if (!first) {
+ ret += 'else ';
+ } else {
+ first = false;
+ }
+ ret += 'if (' + item.ident + ' == ' + switchLabel.value + ') {\n';
+ ret += ' ' + makeBranch(switchLabel.label) + '\n';
+ ret += '}\n';
+ });
+ ret += 'else {\n';
+ ret += makeBranch(item.defaultLabel) + '\n';
+ ret += '}\n';
+ if (item.value) {
+ ret += ' ' + toNiceIdent(item.value);
+ }
+ return ret;
+ });
+ makeFuncLineZyme('return', function(item) {
+ var ret = '';
+ if (LABEL_DEBUG) ret += "INDENT = INDENT.substr(0, INDENT.length-2);\n";
+ ret += 'return';
+ if (item.value) {
+ ret += ' ' + toNiceIdent(item.value);
+ }
+ return ret + ';';
+ });
+ makeFuncLineZyme('invoke', function(item) {
+ // Wrapping in a function lets us easily return values if we are
+ // in an assignment
+ var ret = '(function() { try { return '
+ + makeFunctionCall(item.ident, item.params) + '; '
+ + '__THREW__ = false } catch(e) { '
+ + '__THREW__ = true; '
+ + '} })(); if (!__THREW__) { ' + makeBranch(item.toLabel) + ' } else { ' + makeBranch(item.unwindLabel) + ' }';
+ return ret;
+ });
+ makeFuncLineZyme('load', function(item) {
+ //print('// zz LOAD ' + dump(item) + ' :: ' + dump(item.tokens));
+ var ident = toNiceIdent(item.ident);
+ var impl = getVarData(item.funcData, item.ident);
+ switch (impl) {
+ case VAR_NATIVIZED: {
+ return ident; // We have the actual value here
+ }
+ case VAR_EMULATED: return makeGetValue(ident);
+ default: return "unknown [load] impl: " + impl;
+ }
+ });
+ makeFuncLineZyme('alloca', function(item) {
+ dprint('alloca', dump(item));
+ if (pointingLevels(item.allocatedType.text) == 0 && isStructType(item.allocatedType.text)) {
+ // TODO: allocate on a stack, not on the heap (we currently leak all this)
+ return makePointer(JSON.stringify(makeEmptyStruct(item.allocatedType.text)));
+ } else {
+ return makePointer('[0]');
+ }
+ });
+ makeFuncLineZyme('phi', function(item) {
+ dprint('phi', dump(item));
+ return '__lastLabel__ == ' + getLabelId(item.label1) + ' ? ' + toNiceIdent(item.value1) + ' : ' + toNiceIdent(item.value2);
+ });
+ makeFuncLineZyme('mathop', function(item) { with(item) {
+ dprint('mathop', 'mathop: ' + dump(item));
+ switch (item.op) {
+ case 'add': return ident + ' + ' + ident2;
+ case 'sub': return ident + ' - ' + ident2;
+ case 'sdiv': case 'udiv': return 'Math.floor(' + ident + ' / ' + ident2 + ')';
+ case 'mul': return ident + ' * ' + ident2;
+ case 'urem': case 'srem': return 'Math.floor(' + ident + ' % ' + ident2 + ')';
+ case 'or': return ident + ' | ' + ident2;
+ case 'and': return ident + ' & ' + ident2;
+ case 'xor': return ident + ' ^ ' + ident2;
+ case 'shl': case 'ashl': case 'lshl': return ident + ' << ' + ident2;
+ case 'shr': case 'ashr': case 'lshr': return ident + ' >> ' + ident2;
+ case 'fadd': return ident + ' + ' + ident2;
+ case 'fsub': return ident + ' - ' + ident2;
+ case 'fdiv': return ident + ' / ' + ident2;
+ case 'fmul': return ident + ' * ' + ident2;
+ case 'uitofp': case 'sitofp': return ident;
+ case 'fptoui': case 'fptosi': return 'Math.floor(' + ident + ')';
+ case 'icmp': {
+ switch (variant) {
+ case 'uge': case 'sge': return '0+(' + ident + ' >= ' + ident2 + ')';
+ case 'ule': case 'sle': return '0+(' + ident + ' <= ' + ident2 + ')';
+ case 'ugt': case 'sgt': return '0+(' + ident + ' > ' + ident2 + ')';
+ case 'ult': case 'slt': return '0+(' + ident + ' < ' + ident2 + ')';
+ case 'ne': case 'une': return '0+(' + ident + ' != ' + ident2 + ')';
+ case 'eq': return '0+(' + ident + ' == ' + ident2 + ')';
+ default: throw 'Unknown icmp variant: ' + variant
+ }
+ }
+ case 'fcmp': {
+ switch (variant) {
+ case 'uge': case 'oge': return '0+(' + ident + ' >= ' + ident2 + ')';
+ case 'ule': case 'ole': return '0+(' + ident + ' <= ' + ident2 + ')';
+ case 'ugt': case 'ogt': return '0+(' + ident + ' > ' + ident2 + ')';
+ case 'ult': case 'olt': return '0+(' + ident + ' < ' + ident2 + ')';
+ case 'une': case 'one': return '0+(' + ident + ' != ' + ident2 + ')';
+ case 'ueq': case 'oeq': return '0+(' + ident + ' == ' + ident2 + ')';
+ default: throw 'Unknown fcmp variant: ' + variant
+ }
+ }
+ case 'zext': case 'fpext': case 'trunc': case 'sext': case 'fptrunc': return ident;
+ case 'select': return '(' + ident + ' ? ' + ident3 + ' : ' + ident4 + ')';
+ case 'ptrtoint': {
+ if (type.text != 'i8*') print('// XXX Warning: Risky ptrtoint operation on line ' + lineNum);
+ return ident;
+ }
+ case 'inttoptr': {
+ print('// XXX Warning: inttoptr operation on line ' + lineNum);
+ return ident;
+ }
+ default: throw 'Unknown mathcmp op: ' + item.op
+ }
+ } });
+/*
+ return [{
+ __result__: true,
+ intertype: 'mathop',
+ op: op,
+ variant: variant,
+ type: item.tokens[1].text,
+ ident: item.tokens[2].text,
+ value: item.tokens[4].text,
+ lineNum: item.lineNum,
+ }];
+*/
+
+ function getGetElementPtrIndexes(item) {
+ var ident = item.ident;
+ var type = item.params[0].type.text; // param 0 == type
+ // struct pointer, struct*, and getting a ptr to an element in that struct. Param 1 is which struct, then we have items in that
+ // struct, and possibly further substructures, all embedded
+ // can also be to 'blocks': [8 x i32]*, not just structs
+ type = removePointing(type);
+ var indexes = [makeGetPos(ident)];
+ var offset = toNiceIdent(item.params[1].ident);
+ if (offset != 0) {
+ indexes.push((isStructType(type) && TYPES[type].flatSize != 1 ? TYPES[type].flatSize + '*' : '') + offset);
+ }
+ item.params.slice(2, item.params.length).forEach(function(arg) {
+ var curr = toNiceIdent(arg.ident);
+ // TODO: If index is constant, optimize
+ var typeData = TYPES[type];
+ if (isStructType(type) && typeData.needsFlattening) {
+ if (typeData.flatFactor) {
+ indexes.push(curr + '*' + typeData.flatFactor);
+ } else {
+ indexes.push(toNiceIdent(type) + '___FLATTENER[' + curr + ']');
+ }
+ } else {
+ if (curr != 0) {
+ indexes.push(curr);
+ }
+ }
+ type = TYPES[type] ? TYPES[type].fields[curr] : '';
+ });
+ return indexes.join('+');
+ }
+
+ function finalizeGetElementPtr(item) {
+ //print('//zz finalize: ' + dump(item.params));
+ // TODO: statically combine indexes here if consts
+ return makePointer(makeGetSlab(item.ident), getGetElementPtrIndexes(item));
+ }
+
+ function finalizeBitcast(item) {
+ //print('//zz finalizeBC: ' + dump(item));
+ return item.ident;
+ }
+
+ makeFuncLineZyme('bitcast', function(item) {
+ // XXX Don't we need to copy ptr - i.e. create new ones (at least if uses > just the next line)?
+ // XXX hardcoded ptr impl - as ptrs are ints, we don't need to copy
+ var ident = toNiceIdent(item.ident);
+ return ident;
+ });
+ function makeFunctionCall(ident, params) {
+ // Special cases
+ if (ident == '_llvm_va_start') {
+ var args = 'Array.prototype.slice.call(arguments, __numArgs__)';
+ var data = 'Pointer_make([' + args + '.length].concat(' + args + '), 0)';
+ if (SAFE_HEAP) {
+ return 'SAFE_HEAP_STORE(' + params[0].ident + ', ' + data + ', 0)';
+ } else {
+ return 'HEAP[' + params[0].ident + '] = ' + data;
+ }
+ } else if (ident == '_llvm_va_end') {
+ return ';'
+ }
+
+ var params = params.map(function(param) {
+ if (param.intertype === 'getelementptr') {
+ return finalizeGetElementPtr(param);
+ } else if (param.intertype === 'bitcast') {
+ return finalizeBitcast(param);
+ } else {
+ return toNiceIdent(param.ident); //.value;//'??arg ' + param.intertype + '??';
+ }
+ });
+ return ident + '(' + params.join(', ') + ')';
+ }
+ makeFuncLineZyme('getelementptr', function(item) { return finalizeGetElementPtr(item) });
+ makeFuncLineZyme('call', function(item) {
+ return makeFunctionCall(item.ident, item.params) + (item.standalone ? ';' : '');
+ });
+
+ // Optimzed intertypes
+
+ makeFuncLineZyme('fastgetelementptrload', function(item) {
+//print('// FAST ' + dump(item));
+ return 'var ' + item.ident + ' = ' + makeGetValue(item.value.ident, getGetElementPtrIndexes(item.value), true) + ';';
+ });
+ makeFuncLineZyme('fastgetelementptrstore', function(item) {
+//print('// FAST ' + dump(item));
+ return makeSetValue(item.value.ident, getGetElementPtrIndexes(item.value), item.ident, true) + ';';
+ });
+
+ makeFuncLineZyme('unreachable', function(item) { return '// unreachable' });
+
+ // Final combiner
+
+ function finalCombiner(items) {
+ var ret = items.filter(function(item) { return item.intertype == 'type' });
+ ret = ret.concat(items.filter(function(item) { return item.intertype == 'globalVariable' }));
+ ret.push('\n');
+ ret = ret.concat(items.filter(function(item) { return item.intertype == 'functionStub' }));
+ ret.push('\n');
+ ret = ret.concat(items.filter(function(item) { return item.intertype == 'function' }));
+ return ret.map(function(item) { return item.JS }).join('\n');
+ }
+
+ // Data
+
+ substrate.addItems(values(data.types).filter(function(type) { return type.lineNum != '?' }), 'Type');
+ substrate.addItems(data.globalVariables, 'GlobalVariable');
+ substrate.addItems(data.functions, 'FunctionSplitter');
+ substrate.addItems(data.functionStubs, 'FunctionStub');
+
+ return preprocess(read('preamble.js')) + finalCombiner(substrate.solve()) + read('postamble.js');
+// return finalCombiner(substrate.solve());
+}
+
diff --git a/src/parseTools.js b/src/parseTools.js
new file mode 100644
index 00000000..2a689889
--- /dev/null
+++ b/src/parseTools.js
@@ -0,0 +1,368 @@
+// Various tools for parsing llvm
+
+// Simple #if/else/endif preprocessing for a file. Checks if the
+// ident checked is true in our global.
+function preprocess(text) {
+ var lines = text.split('\n');
+ var ret = '';
+ var show = true;
+ for (var i = 0; i < lines.length; i++) {
+ var line = lines[i];
+ if (line[0] != '#') {
+ if (show) {
+ ret += line + '\n';
+ }
+ } else {
+ if (line[1] == 'i') { // if
+ var ident = line.substr(4);
+ show = !!this[ident];
+ } else if (line[2] == 'l') { // else
+ show = !show;
+ } else if (line[2] == 'n') { // endif
+ show = true;
+ } else {
+ throw "Unclear preprocessor command: " + line;
+ }
+ }
+ }
+ return ret;
+}
+
+function addPointing(type) { return type + '*' }
+function removePointing(type, num) {
+ if (num === 0) return type;
+ return type.substr(0, type.length-(num ? num : 1))
+}
+
+function pointingLevels(type) {
+ if (!type) return 0;
+ var ret = 0;
+ var len1 = type.length - 1;
+ while (type[len1-ret] === '*') {
+ ret ++;
+ }
+ return ret;
+}
+
+function toNiceIdent(ident) {
+ if (parseFloat(ident) == ident) return ident;
+ if (ident == 'null') return '0'; // see parseNumerical
+ return ident.replace(/[" \.@%:<>,\*]/g, '_');
+}
+
+function isNumberType(type) {
+ var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double'];
+ return types.indexOf(type) != -1;
+}
+
+function isStructPointerType(type) {
+ // This test is necessary for clang - in llvm-gcc, we
+ // could check for %struct. The downside is that %1 can
+ // be either a variable or a structure, and we guess it is
+ // a struct, which can lead to |call i32 %5()| having
+ // |%5()| as a function call (like |i32 (i8*)| etc.). So
+ // we must check later on, in call(), where we have more
+ // context, to differentiate such cases.
+ // A similar thing happns in isStructType()
+ return !isNumberType(type) && type[0] == '%';
+}
+
+function isStructType(type) {
+ if (isPointerType(type)) return false;
+ if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs
+ // See comment in isStructPointerType()
+ return !isNumberType(type) && type[0] == '%';
+}
+
+function isPointerType(type) { // TODO!
+ return pointingLevels(type) > 0;
+}
+
+function isVoidType(type) {
+ return type == 'void';
+}
+
+function isType(type) { // TODO!
+ return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type);
+}
+
+// Detects a function definition, ([...|type,[type,...]])
+function isFunctionDef(token) {
+ var text = token.text;
+ var pointing = pointingLevels(text);
+ var nonPointing = removePointing(text, pointing);
+ if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')')
+ return false;
+ if (nonPointing == '(...)') return true;
+ if (!token.item) return false;
+ var fail = false;
+ splitTokenList(token.item[0].tokens).forEach(function(segment) {
+ var subtoken = segment[0];
+ fail = fail || !isType(subtoken.text) || segment.length > 1;
+ });
+ return !fail;
+}
+
+function addIdent(token) {
+ token.ident = token.text;
+ return token;
+}
+
+function combineTokens(tokens) {
+ var ret = {
+ lineNum: tokens[0].lineNum,
+ text: '',
+ tokens: [],
+ };
+ tokens.forEach(function(token) {
+ ret.text += token.text;
+ ret.tokens.push(token);
+ });
+ return ret;
+}
+
+function compareTokens(a, b) {
+ var aId = a.__uid__;
+ var bId = b.__uid__;
+ a.__uid__ = 0;
+ b.__uid__ = 0;
+ var ret = JSON.stringify(a) == JSON.stringify(b);
+ a.__uid__ = aId;
+ b.__uid__ = bId;
+ return ret;
+}
+
+function getTokenIndexByText(tokens, text) {
+ var i = 0;
+ while (tokens[i].text != ';') i++;
+ return i;
+}
+
+function findTokenText(item, text) {
+ for (var i = 0; i < item.tokens.length; i++) {
+ if (item.tokens[i].text == text) return i;
+ }
+ return -1;
+}
+
+// Splits a list of tokens separated by commas. For example, a list of arguments in a function call
+function splitTokenList(tokens) {
+ if (tokens.length == 0) return [];
+ if (tokens.slice(-1)[0].text != ',') tokens.push({text:','});
+ var ret = [];
+ var seg = [];
+ tokens.forEach(function(token) {
+ if (token.text == ',') {
+ ret.push(seg);
+ seg = [];
+ } else {
+ seg.push(token);
+ }
+ });
+ return ret;
+}
+
+// Splits an item, with the intent of later reintegration
+function splitItem(parent, childSlot, copySlots) {
+ if (!copySlots) copySlots = [];
+ if (!parent[childSlot]) parent[childSlot] = {};
+ var child = parent[childSlot];
+ parent[childSlot] = null;
+ child.parentUid = parent.__uid__;
+ child.parentSlot = childSlot;
+ child.parentLineNum = child.lineNum = parent.lineNum;
+ copySlots.forEach(function(slot) { child[slot] = parent[slot] });
+ return {
+ parent: parent,
+ child: child,
+ };
+}
+
+function makeReintegrator(afterFunc) {
+ // reintegration - find intermediate representation-parsed items and
+ // place back in parents TODO: Optimize this code to optimal O(..)
+ return {
+ process: function(items) {
+ var ret = [];
+ for (var i = 0; i < items.length; i++) {
+ var found = false;
+ if (items[i] && items[i].parentSlot) {
+ var child = items[i];
+ for (var j = 0; j < items.length; j++) {
+ if (items[j] && items[j].lineNum == items[i].parentLineNum) {
+ var parent = items[j];
+ // process the pair
+ parent[child.parentSlot] = child;
+ delete child.parentLineNum;
+ afterFunc.call(this, parent, child);
+
+ items[i] = null;
+ items[j] = null;
+ found = true;
+ break;
+ }
+ }
+ }
+ }
+ this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully
+ return ret;
+ }
+ };
+}
+
+function parseParamTokens(params) {
+ if (params.length === 0) return [];
+ var ret = [];
+ if (params[params.length-1].text != ',') {
+ params.push({ text: ',' });
+ }
+ var absIndex = 0;
+ while (params.length > 0) {
+ var i = 0;
+ while (params[i].text != ',') i++;
+ var segment = params.slice(0, i);
+ params = params.slice(i+1);
+ segment = cleanSegment(segment);
+ if (segment.length == 1) {
+ if (segment[0].text == '...') {
+ ret.push({
+ intertype: 'varargs',
+ });
+ } else {
+ // Clang sometimes has a parameter with just a type,
+ // no name... the name is implied to be %{the index}
+ ret.push({
+ intertype: 'value',
+ type: segment[0],
+ value: null,
+ ident: '_' + absIndex,
+ });
+ }
+ } else if (segment[1].text === 'getelementptr') {
+ ret.push(parseGetElementPtr(segment));
+ } else if (segment[1].text === 'bitcast') {
+ ret.push(parseBitcast(segment));
+ } else {
+ if (segment[2] && segment[2].text == 'to') { // part of bitcast params
+ segment = segment.slice(0, 2);
+ }
+ while (segment.length > 2) {
+ segment[0].text += segment[1].text;
+ segment.splice(1, 1); // TODO: merge tokens nicely
+ }
+ ret.push({
+ intertype: 'value',
+ type: segment[0],
+ value: segment[1],
+ ident: segment[1].text,
+ });
+ // } else {
+ // throw "what is this params token? " + JSON.stringify(segment);
+ }
+ absIndex ++;
+ }
+ return ret;
+}
+
+function cleanSegment(segment) {
+ if (segment.length == 1) return segment;
+ while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) {
+ segment.splice(1, 1);
+ }
+ return segment;
+}
+
+// Expects one of the several LVM getelementptr formats:
+// a qualifier, a type, a null, then an () item with tokens
+function parseGetElementPtr(segment) {
+//print("Parse GTP: " + dump(segment));
+ segment = segment.slice(0);
+ segment = cleanSegment(segment);
+ assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1);
+ //dprint('// zz: ' + dump(segment) + '\n\n\n');
+ var ret = {
+ intertype: 'getelementptr',
+ type: segment[0],
+ params: parseParamTokens(segment[3].item[0].tokens),
+ };
+ ret.ident = toNiceIdent(ret.params[0].ident);
+ return ret;
+}
+
+// TODO: use this
+function parseBitcast(segment) {
+ //print('zz parseBC pre: ' + dump(segment));
+ var ret = {
+ intertype: 'bitcast',
+ type: segment[0],
+ params: parseParamTokens(segment[2].item[0].tokens),
+ };
+ ret.ident = toNiceIdent(ret.params[0].ident);
+//print('zz parseBC: ' + dump(ret));
+ return ret;
+}
+
+function cleanOutTokens(filterOut, tokens, index) {
+ while (filterOut.indexOf(tokens[index].text) != -1) {
+ tokens.splice(index, 1);
+ }
+}
+
+function _HexToInt(stringy) {
+ var ret = 0;
+ var mul = 1;
+ var base;
+ for (var i = (stringy.length - 1); i >= 0; i = i - 1) {
+ if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) {
+ base = "A".charCodeAt(0) - 10;
+ } else {
+ base = "0".charCodeAt(0);
+ }
+ ret = ret + (mul*(stringy.charCodeAt(i) - base));
+ mul = mul * 16;
+ }
+ return ret;
+}
+
+function IEEEUnHex(stringy) {
+ var a = _HexToInt(stringy.substr(2, 8));
+ var b = _HexToInt(stringy.substr(10));
+ var e = (a >> ((52 - 32) & 0x7ff)) - 1023;
+ return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e));
+}
+
+function parseNumerical(value, type) {
+ if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') {
+ // Hexadecimal double value, as the llvm docs say,
+ // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants."
+ return IEEEUnHex(value);
+ }
+ if (value == 'null') {
+ // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.)
+ return '0';
+ }
+ return value;
+}
+
+// \0Dsometext is really '\r', then sometext
+// This function returns an array of int values
+function parseLLVMString(str) {
+ var ret = [];
+ var i = 0;
+ while (i < str.length) {
+ var chr = str[i];
+ if (chr != '\\') {
+ ret.push(chr.charCodeAt(0));
+ i++;
+ } else {
+ ret.push(_HexToInt(str[i+1]+str[i+2]));
+ i += 3;
+ }
+ }
+ return ret;
+}
+
+function getLabelIds(labels) {
+ return labels.map(function(label) { return label.ident });
+}
+
diff --git a/src/parser.js b/src/parser.js
deleted file mode 100644
index c904cde1..00000000
--- a/src/parser.js
+++ /dev/null
@@ -1,2572 +0,0 @@
-// LLVM parser
-//============
-
-/*
- * TODO:
- * * Re-use variables (of the same kind, native/nativized vs. emulated).
- */
-
-// Prep - allow this to run in both SpiderMonkey and V8
-
-if (!this['load']) {
- load = function(f) { eval(snarf(f)) }
-}
-if (!this['read']) {
- read = function(f) { snarf(f) }
-}
-
-load('settings.js');
-if (LABEL_DEBUG && RELOOP) throw "Cannot debug labels if they have been relooped!";
-
-load('utility.js');
-load('enzymatic.js');
-load('snippets.js');
-
-// Tools
-
-// Simple #if/else/endif preprocessing for a file. Checks if the
-// ident checked is true in our global.
-function preprocess(text) {
- var lines = text.split('\n');
- var ret = '';
- var show = true;
- for (var i = 0; i < lines.length; i++) {
- var line = lines[i];
- if (line[0] != '#') {
- if (show) {
- ret += line + '\n';
- }
- } else {
- if (line[1] == 'i') { // if
- var ident = line.substr(4);
- show = !!this[ident];
- } else if (line[2] == 'l') { // else
- show = !show;
- } else if (line[2] == 'n') { // endif
- show = true;
- } else {
- throw "Unclear preprocessor command: " + line;
- }
- }
- }
- return ret;
-}
-
-function addPointing(type) { return type + '*' }
-function removePointing(type, num) {
- if (num === 0) return type;
- return type.substr(0, type.length-(num ? num : 1))
-}
-
-function pointingLevels(type) {
- if (!type) return 0;
- var ret = 0;
- var len1 = type.length - 1;
- while (type[len1-ret] === '*') {
- ret ++;
- }
- return ret;
-}
-
-function toNiceIdent(ident) {
- if (parseFloat(ident) == ident) return ident;
- if (ident == 'null') return '0'; // see parseNumerical
- return ident.replace(/[" \.@%:<>,\*]/g, '_');
-}
-
-function isNumberType(type) {
- var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double'];
- return types.indexOf(type) != -1;
-}
-
-function isStructPointerType(type) {
- // This test is necessary for clang - in llvm-gcc, we
- // could check for %struct. The downside is that %1 can
- // be either a variable or a structure, and we guess it is
- // a struct, which can lead to |call i32 %5()| having
- // |%5()| as a function call (like |i32 (i8*)| etc.). So
- // we must check later on, in call(), where we have more
- // context, to differentiate such cases.
- // A similar thing happns in isStructType()
- return !isNumberType(type) && type[0] == '%';
-}
-
-function isStructType(type) {
- if (isPointerType(type)) return false;
- if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs
- // See comment in isStructPointerType()
- return !isNumberType(type) && type[0] == '%';
-}
-
-function isPointerType(type) { // TODO!
- return pointingLevels(type) > 0;
-}
-
-function isVoidType(type) {
- return type == 'void';
-}
-
-function isType(type) { // TODO!
- return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type);
-}
-
-// Detects a function definition, ([...|type,[type,...]])
-function isFunctionDef(token) {
- var text = token.text;
- var pointing = pointingLevels(text);
- var nonPointing = removePointing(text, pointing);
- if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')')
- return false;
- if (nonPointing == '(...)') return true;
- if (!token.item) return false;
- var fail = false;
- splitTokenList(token.item[0].tokens).forEach(function(segment) {
- var subtoken = segment[0];
- fail = fail || !isType(subtoken.text) || segment.length > 1;
- });
- return !fail;
-}
-
-function addIdent(token) {
- token.ident = token.text;
- return token;
-}
-
-function combineTokens(tokens) {
- var ret = {
- lineNum: tokens[0].lineNum,
- text: '',
- tokens: [],
- };
- tokens.forEach(function(token) {
- ret.text += token.text;
- ret.tokens.push(token);
- });
- return ret;
-}
-
-function compareTokens(a, b) {
- var aId = a.__uid__;
- var bId = b.__uid__;
- a.__uid__ = 0;
- b.__uid__ = 0;
- var ret = JSON.stringify(a) == JSON.stringify(b);
- a.__uid__ = aId;
- b.__uid__ = bId;
- return ret;
-}
-
-function getTokenIndexByText(tokens, text) {
- var i = 0;
- while (tokens[i].text != ';') i++;
- return i;
-}
-
-function findTokenText(item, text) {
- for (var i = 0; i < item.tokens.length; i++) {
- if (item.tokens[i].text == text) return i;
- }
- return -1;
-}
-
-// Splits a list of tokens separated by commas. For example, a list of arguments in a function call
-function splitTokenList(tokens) {
- if (tokens.length == 0) return [];
- if (tokens.slice(-1)[0].text != ',') tokens.push({text:','});
- var ret = [];
- var seg = [];
- tokens.forEach(function(token) {
- if (token.text == ',') {
- ret.push(seg);
- seg = [];
- } else {
- seg.push(token);
- }
- });
- return ret;
-}
-
-// Splits an item, with the intent of later reintegration
-function splitItem(parent, childSlot, copySlots) {
- if (!copySlots) copySlots = [];
- if (!parent[childSlot]) parent[childSlot] = {};
- var child = parent[childSlot];
- parent[childSlot] = null;
- child.parentUid = parent.__uid__;
- child.parentSlot = childSlot;
- child.parentLineNum = child.lineNum = parent.lineNum;
- copySlots.forEach(function(slot) { child[slot] = parent[slot] });
- return {
- parent: parent,
- child: child,
- };
-}
-
-function makeReintegrator(afterFunc) {
- // reintegration - find intermediate representation-parsed items and
- // place back in parents TODO: Optimize this code to optimal O(..)
- return {
- process: function(items) {
- var ret = [];
- for (var i = 0; i < items.length; i++) {
- var found = false;
- if (items[i] && items[i].parentSlot) {
- var child = items[i];
- for (var j = 0; j < items.length; j++) {
- if (items[j] && items[j].lineNum == items[i].parentLineNum) {
- var parent = items[j];
- // process the pair
- parent[child.parentSlot] = child;
- delete child.parentLineNum;
- afterFunc.call(this, parent, child);
-
- items[i] = null;
- items[j] = null;
- found = true;
- break;
- }
- }
- }
- }
- this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully
- return ret;
- }
- };
-}
-
-function parseParamTokens(params) {
- if (params.length === 0) return [];
- var ret = [];
- if (params[params.length-1].text != ',') {
- params.push({ text: ',' });
- }
- var absIndex = 0;
- while (params.length > 0) {
- var i = 0;
- while (params[i].text != ',') i++;
- var segment = params.slice(0, i);
- params = params.slice(i+1);
- segment = cleanSegment(segment);
- if (segment.length == 1) {
- if (segment[0].text == '...') {
- ret.push({
- intertype: 'varargs',
- });
- } else {
- // Clang sometimes has a parameter with just a type,
- // no name... the name is implied to be %{the index}
- ret.push({
- intertype: 'value',
- type: segment[0],
- value: null,
- ident: '_' + absIndex,
- });
- }
- } else if (segment[1].text === 'getelementptr') {
- ret.push(parseGetElementPtr(segment));
- } else if (segment[1].text === 'bitcast') {
- ret.push(parseBitcast(segment));
- } else {
- if (segment[2] && segment[2].text == 'to') { // part of bitcast params
- segment = segment.slice(0, 2);
- }
- while (segment.length > 2) {
- segment[0].text += segment[1].text;
- segment.splice(1, 1); // TODO: merge tokens nicely
- }
- ret.push({
- intertype: 'value',
- type: segment[0],
- value: segment[1],
- ident: segment[1].text,
- });
- // } else {
- // throw "what is this params token? " + JSON.stringify(segment);
- }
- absIndex ++;
- }
- return ret;
-}
-
-function cleanSegment(segment) {
- if (segment.length == 1) return segment;
- while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) {
- segment.splice(1, 1);
- }
- return segment;
-}
-
-// Expects one of the several LVM getelementptr formats:
-// a qualifier, a type, a null, then an () item with tokens
-function parseGetElementPtr(segment) {
-//print("Parse GTP: " + dump(segment));
- segment = segment.slice(0);
- segment = cleanSegment(segment);
- assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1);
- //dprint('// zz: ' + dump(segment) + '\n\n\n');
- var ret = {
- intertype: 'getelementptr',
- type: segment[0],
- params: parseParamTokens(segment[3].item[0].tokens),
- };
- ret.ident = toNiceIdent(ret.params[0].ident);
- return ret;
-}
-
-// TODO: use this
-function parseBitcast(segment) {
- //print('zz parseBC pre: ' + dump(segment));
- var ret = {
- intertype: 'bitcast',
- type: segment[0],
- params: parseParamTokens(segment[2].item[0].tokens),
- };
- ret.ident = toNiceIdent(ret.params[0].ident);
-//print('zz parseBC: ' + dump(ret));
- return ret;
-}
-
-function cleanOutTokens(filterOut, tokens, index) {
- while (filterOut.indexOf(tokens[index].text) != -1) {
- tokens.splice(index, 1);
- }
-}
-
-function _HexToInt(stringy) {
- var ret = 0;
- var mul = 1;
- var base;
- for (var i = (stringy.length - 1); i >= 0; i = i - 1) {
- if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) {
- base = "A".charCodeAt(0) - 10;
- } else {
- base = "0".charCodeAt(0);
- }
- ret = ret + (mul*(stringy.charCodeAt(i) - base));
- mul = mul * 16;
- }
- return ret;
-}
-
-function IEEEUnHex(stringy) {
- var a = _HexToInt(stringy.substr(2, 8));
- var b = _HexToInt(stringy.substr(10));
- var e = (a >> ((52 - 32) & 0x7ff)) - 1023;
- return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e));
-}
-
-function parseNumerical(value, type) {
- if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') {
- // Hexadecimal double value, as the llvm docs say,
- // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants."
- return IEEEUnHex(value);
- }
- if (value == 'null') {
- // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.)
- return '0';
- }
- return value;
-}
-
-// \0Dsometext is really '\r', then sometext
-// This function returns an array of int values
-function parseLLVMString(str) {
- var ret = [];
- var i = 0;
- while (i < str.length) {
- var chr = str[i];
- if (chr != '\\') {
- ret.push(chr.charCodeAt(0));
- i++;
- } else {
- ret.push(_HexToInt(str[i+1]+str[i+2]));
- i += 3;
- }
- }
- return ret;
-}
-
-function getLabelIds(labels) {
- return labels.map(function(label) { return label.ident });
-}
-
-// =======================
-
-// llvm => intertypes
-function intertyper(data) {
- // Substrate
-
- substrate = new Substrate('Intertyper');
-
- // Line splitter.
- substrate.addZyme('LineSplitter', {
- processItem: function(item) {
- var lines = item.llvmText.split('\n');
- var ret = [];
- var inContinual = false;
- for (var i = 0; i < lines.length; i++) {
- var line = lines[i];
- if (inContinual || new RegExp(/^\ +to.*/g).test(line)) {
- // to after invoke
- ret.slice(-1)[0].lineText += line;
- if (new RegExp(/^\ +\]/g).test(line)) { // end of llvm switch
- inContinual = false;
- }
- } else {
- ret.push({
- lineText: line,
- lineNum: i + 1,
- });
- if (new RegExp(/^\ +switch\ .*/g).test(line)) {
- // beginning of llvm switch
- inContinual = true;
- }
- }
- }
- this.forwardItems(ret.filter(function(item) { return item.lineText; }), 'Tokenizer');
- },
- });
-
- // Line tokenizer
- substrate.addZyme('Tokenizer', {
- processItem: function(item, inner) {
- var lineText = item.lineText + " ";
- var tokens = [];
- var tokenStart = -1;
- var indent = -1;
- var quotes = 0;
- var lastToken = null;
- var i = 0;
- // Note: '{' is not an encloser, as its use in functions is split over many lines
- var enclosers = {
- '[': 0,
- ']': '[',
- '(': 0,
- ')': '(',
- '<': 0,
- '>': '<',
- };
- function notEnclosed() {
- if (enclosers['['] > 0 || enclosers['('] > 0 || enclosers['<'] > 0)
- return false;
- return true;
- }
- var that = this;
- function tryStartToken() {
- if (tokenStart == -1 && notEnclosed() && quotes == 0) {
- //print("try START " + tokenStart + ',' + JSON.stringify(enclosers));
- tokenStart = i;
- }
- }
- function tryFinishToken(includeThis) {
- if (tokenStart >= 0 && notEnclosed() && quotes == 0) {
- //print("try finish " + tokenStart + ',' + JSON.stringify(enclosers));
- var token = {
- text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)),
- };
- if (token.text[0] in enclosers) {
- token.item = that.processItem({
- lineText: token.text.substr(1, token.text.length-2)
- }, true);
- token.type = token.text[0];
- }
- if (indent == -1) {
- indent = tokenStart;
- }
- // merge certain tokens
- if ( (lastToken && lastToken.text == '%' && token.text[0] == '"' ) ||
- (lastToken && token.text.replace(/\*/g, '') == '') ) {
- lastToken.text += token.text;
- } else if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
- lastToken.text += ' ' + token.text;
- } else if (lastToken && token.text[token.text.length-1] == '}') {
- var openBrace = tokens.length-1;
- while (tokens[openBrace].text != '{') openBrace --;
- token = combineTokens(tokens.slice(openBrace+1));
- tokens.splice(openBrace, tokens.length-openBrace+1);
- tokens.push(token);
- token.type = '{';
- lastToken = token;
- } else {
- tokens.push(token);
- lastToken = token;
- }
- // print("new token: " + dump(lastToken));
- tokenStart = -1;
- }
- }
- for (; i < lineText.length; i++) {
- var letter = lineText[i];
- //print("letter: " + letter);
- switch (letter) {
- case ' ':
- tryFinishToken();
- break;
- case '"':
- tryFinishToken();
- tryStartToken();
- quotes = 1-quotes;
- break;
- case ',':
- tryFinishToken();
- if (notEnclosed() && quotes == 0) {
- tokens.push({ text: ',' });
- }
- break;
- default:
- if (letter in enclosers && quotes == 0) {
- if (typeof enclosers[letter] === 'number') {
- tryFinishToken();
- tryStartToken();
- enclosers[letter]++;
- } else {
- enclosers[enclosers[letter]]--;
- tryFinishToken(true);
- }
- //print(' post-enclosers: ' + JSON.stringify(enclosers));
- } else {
- tryStartToken();
- }
- }
- }
- var item = {
- tokens: tokens,
- indent: indent,
- lineNum: item.lineNum,
- };
- if (inner) {
- return [item];
- } else {
- this.forwardItem(item, 'Triager');
- }
- },
- });
-
- substrate.addZyme('Triager', {
- processItem: function(item) {
- function triage() {
- if (!item.intertype) {
- if (item.tokens[0].text in searchable(';', 'target'))
- return '/dev/null';
- if (item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=')
- return 'Global';
- if (item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' &&
- item.tokens.slice(-1)[0].text == '{')
- return 'FuncHeader';
- if (item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':')
- return 'Label';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 &&
- !item.intertype)
- return 'Assign';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load')
- return 'Load';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast')
- return 'Bitcast';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr')
- return 'GEP';
- if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype)
- return 'Call';
- if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype)
- return 'Invoke';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca')
- return 'Alloca';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'phi')
- return 'Phi';
- if (item.indent === -1 && item.tokens && item.tokens.length >= 3 &&
- ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr'].indexOf(item.tokens[0].text) != -1 && !item.intertype)
- return 'Mathops';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' &&
- !item.intertype)
- return 'Store';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' &&
- !item.intertype)
- return 'Branch';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' &&
- !item.intertype)
- return 'Return';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'switch' &&
- !item.intertype)
- return 'Switch';
- if (item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype)
- return 'FuncEnd';
- if (item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' &&
- !item.intertype)
- return 'External';
- if (item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' &&
- !item.intertype)
- return 'Unreachable';
- } else {
- // Already intertyped
- if (item.parentSlot)
- return 'Reintegrator';
- }
- throw 'Invalid token, cannot triage: ' + dump(item);
- }
- this.forwardItem(item, triage(item));
- },
- });
-
- // Line parsers to intermediate form
-
- // globals: type or variable
- substrate.addZyme('Global', {
- processItem: function(item) {
- if (item.tokens[2].text == 'type') {
- //dprint('type/const linenum: ' + item.lineNum + ':' + dump(item));
- var fields = [];
- if (item.tokens[3].text != 'opaque') {
- if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
- item.tokens[3] = item.tokens[3].item[0];
- }
- var subTokens = item.tokens[3].tokens;
- subTokens.push({text:','});
- while (subTokens[0]) {
- var stop = 1;
- while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
- fields.push(combineTokens(subTokens.slice(0, stop)).text);
- subTokens.splice(0, stop+1);
- }
- }
- return [{
- __result__: true, // XXX can remove these
- intertype: 'type',
- name_: item.tokens[0].text,
- fields: fields,
- lineNum: item.lineNum,
- }]
- } else {
- // variable
- var ident = item.tokens[0].text;
- while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 })
- item.tokens.splice(2, 1);
- var ret = {
- __result__: true,
- intertype: 'globalVariable',
- ident: ident,
- type: item.tokens[2],
- lineNum: item.lineNum,
- };
- if (ident == '@llvm.global_ctors') {
- ret.ctors = [];
- var subTokens = item.tokens[3].item[0].tokens;
- splitTokenList(subTokens).forEach(function(segment) {
- ret.ctors.push(segment[1].tokens.slice(-1)[0].text);
- });
- } else {
- if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
- item.tokens[3] = item.tokens[3].item[0].tokens;
- }
-
- if (item.tokens[3].text == 'c')
- item.tokens.splice(3, 1);
- ret.value = item.tokens[3];
- }
- return [ret];
- }
- },
- });
- // function header
- substrate.addZyme('FuncHeader', {
- processItem: function(item) {
- item.tokens = item.tokens.filter(function(token) {
- return ['internal', 'signext', 'zeroext', 'nounwind', 'define', 'linkonce_odr', 'inlinehint', '{'].indexOf(token.text) == -1;
- });
- return [{
- __result__: true,
- intertype: 'function',
- ident: item.tokens[1].text,
- returnType: item.tokens[0],
- params: item.tokens[2],
- lineNum: item.lineNum,
- }];
- },
- });
- // label
- substrate.addZyme('Label', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'label',
- ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1),
- lineNum: item.lineNum,
- }];
- },
- });
-
- // assignment
- substrate.addZyme('Assign', {
- processItem: function(item) {
- var opIndex = findTokenText(item, '=');
- var pair = splitItem({
- intertype: 'assign',
- ident: combineTokens(item.tokens.slice(0, opIndex)).text,
- lineNum: item.lineNum,
- }, 'value');
- this.forwardItem(pair.parent, 'Reintegrator');
- this.forwardItem(mergeInto(pair.child, { // Additional token, to be triaged and later re-integrated
- indent: -1,
- tokens: item.tokens.slice(opIndex+1),
- }), 'Triager');
- },
- });
- // reintegration - find intermediate representation-parsed items and
- // place back in parents TODO: Optimize this code to optimal O(..)
- substrate.addZyme('Reintegrator', makeReintegrator(function(parent, child) {
- // Special re-integration behaviors
- if (child.intertype == 'fastgetelementptrload') {
- parent.intertype = 'fastgetelementptrload';
- }
- this.forwardItem(parent, '/dev/stdout');
- }));
-
- // 'load'
- substrate.addZyme('Load', {
- processItem: function(item) {
- item.pointerType = item.tokens[1];
- item.type = { text: removePointing(item.pointerType.text) };
- if (item.tokens[2].text == 'getelementptr') {
- var last = getTokenIndexByText(item.tokens, ';');
- var gepTokens = item.tokens.slice(1, last); // without 'load'
- var segment = [ gepTokens[2], gepTokens[0], null ].concat(gepTokens.slice(3));
- var data = parseGetElementPtr(segment);
- item.intertype = 'fastgetelementptrload';
- item.type = data.type;
- item.params = data.params;
- item.pointer = { text: data.ident };
- item.value = data.value;
- } else {
- item.intertype = 'load';
- if (item.tokens[2].text == 'bitcast') {
- item.pointer = item.tokens[3].item[0].tokens[1];
- item.originalType = item.tokens[3].item[0].tokens[0];
- } else {
- item.pointer = item.tokens[2];
- }
- }
- item.ident = item.pointer.text;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'bitcast'
- substrate.addZyme('Bitcast', {
- processItem: function(item) {
- item.intertype = 'bitcast';
- item.type = item.tokens[1];
- item.ident = item.tokens[2].text;
- item.type2 = item.tokens[4];
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'getelementptr'
- substrate.addZyme('GEP', {
- processItem: function(item) {
- var last = getTokenIndexByText(item.tokens, ';');
- var segment = [ item.tokens[1], { text: null }, null, { item: [ {
- tokens: item.tokens.slice(2, last)
- } ] } ];
- var data = parseGetElementPtr(segment);
- item.intertype = 'getelementptr';
- item.type = data.type;
- item.params = data.params;
- item.ident = data.ident;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'call'
- substrate.addZyme('Call', {
- processItem: function(item) {
- item.intertype = 'call';
- if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) {
- item.tokens.splice(1, 1);
- }
- item.type = item.tokens[1];
- item.functionType = '';
- while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
- item.functionType += item.tokens[2].text;
- item.tokens.splice(2, 1);
- }
- item.ident = item.tokens[2].text;
- if (item.ident.substr(-2) == '()') {
- // See comment in isStructType()
- item.ident = item.ident.substr(0, item.ident.length-2);
- // Also, we remove some spaces which might occur.
- while (item.ident[item.ident.length-1] == ' ') {
- item.ident = item.ident.substr(0, item.ident.length-1);
- }
- item.params = [];
- } else {
- item.params = parseParamTokens(item.tokens[3].item[0].tokens);
- }
- if (item.indent == 2) {
- // standalone call - not in assign
- item.standalone = true;
- item.__result__ = true;
- return [item];
- }
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'invoke'
- substrate.addZyme('Invoke', {
- processItem: function(item) {
- item.intertype = 'invoke';
- item.type = item.tokens[1];
- item.functionType = '';
- while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
- item.functionType += item.tokens[2].text;
- item.tokens.splice(2, 1);
- }
- cleanOutTokens(['alignstack', 'alwaysinline', 'inlinehint', 'naked', 'noimplicitfloat', 'noinline', 'alwaysinline attribute.', 'noredzone', 'noreturn', 'nounwind', 'optsize', 'readnone', 'readonly', 'ssp', 'sspreq'], item.tokens, 4);
- item.ident = item.tokens[2].text;
- item.params = parseParamTokens(item.tokens[3].item[0].tokens);
- item.toLabel = toNiceIdent(item.tokens[6].text);
- item.unwindLabel = toNiceIdent(item.tokens[9].text);
- if (item.indent == 2) {
- // standalone call - not in assign
- item.standalone = true;
- item.__result__ = true;
- return [item];
- }
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'alloca'
- substrate.addZyme('Alloca', {
- processItem: function(item) {
- item.intertype = 'alloca';
- item.allocatedType = item.tokens[1];
- item.type = { text: addPointing(item.tokens[1].text) }; // type of pointer we will get
- item.type2 = { text: item.tokens[1].text }; // value we will create, and get a pointer to
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'phi'
- substrate.addZyme('Phi', {
- processItem: function(item) {
- item.intertype = 'phi';
- item.type = { text: item.tokens[1].text }
- item.label1 = item.tokens[2].item[0].tokens[2].text;
- item.value1 = item.tokens[2].item[0].tokens[0].text;
- item.label2 = item.tokens[4].item[0].tokens[2].text;
- item.value2 = item.tokens[4].item[0].tokens[0].text;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // mathops
- substrate.addZyme('Mathops', {
- processItem: function(item) {
- item.intertype = 'mathop';
- item.op = item.tokens[0].text;
- item.variant = null;
- if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1);
- if (['icmp', 'fcmp'].indexOf(item.op) != -1) {
- item.variant = item.tokens[1].text;
- item.tokens.splice(1, 1);
- }
- item.type = item.tokens[1];
- item.ident = item.tokens[2].text;
- item.ident2 = item.tokens[4].text;
- item.ident3 = item.tokens[5] ? item.tokens[5].text : null;
- item.ident4 = item.tokens[8] ? item.tokens[8].text : null;
- dprint('mathop', item.op + ',' + item.variant + ',' + item.ident + ',' + item.value);
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'store'
- substrate.addZyme('Store', {
- processItem: function(item) {
- if (item.tokens[3].text != ',') {
- assertEq(item.tokens[2].text, 'getelementptr');
- // complex input - likely getelementptr
- var commaIndex = 4;
- while (item.tokens[commaIndex].text != ',') commaIndex ++;
- return [{
- __result__: true,
- intertype: 'store',
- valueType: item.tokens[1],
- value: parseGetElementPtr(item.tokens.slice(1, commaIndex)),
- pointerType: item.tokens[commaIndex+1],
- pointer: item.tokens[commaIndex+2],
- ident: item.tokens[commaIndex+2].text,
- lineNum: item.lineNum,
- }];
- }
- return [{
- __result__: true,
- intertype: 'store',
- valueType: item.tokens[1],
- value: addIdent(item.tokens[2]),
- pointerType: item.tokens[4],
- pointer: item.tokens[5],
- ident: item.tokens[5].text,
- lineNum: item.lineNum,
- }];
- },
- });
- // 'br'
- substrate.addZyme('Branch', {
- processItem: function(item) {
- if (item.tokens[1].text == 'label') {
- return [{
- __result__: true,
- intertype: 'branch',
- label: toNiceIdent(item.tokens[2].text),
- lineNum: item.lineNum,
- }];
- } else {
- return [{
- __result__: true,
- intertype: 'branch',
- ident: item.tokens[2].text,
- labelTrue: toNiceIdent(item.tokens[5].text),
- labelFalse: toNiceIdent(item.tokens[8].text),
- lineNum: item.lineNum,
- }];
- }
- },
- });
- // 'ret'
- substrate.addZyme('Return', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'return',
- type: item.tokens[1].text,
- value: item.tokens[2] ? item.tokens[2].text : null,
- lineNum: item.lineNum,
- }];
- },
- });
- // 'switch'
- substrate.addZyme('Switch', {
- processItem: function(item) {
- function parseSwitchLabels(item) {
- var ret = [];
- var tokens = item.item[0].tokens;
- while (tokens.length > 0) {
- ret.push({
- value: tokens[1].text,
- label: toNiceIdent(tokens[4].text),
- });
- tokens = tokens.slice(5);
- }
- return ret;
- }
- return [{
- __result__: true,
- intertype: 'switch',
- type: item.tokens[1].text,
- ident: item.tokens[2].text,
- defaultLabel: item.tokens[5].text,
- switchLabels: parseSwitchLabels(item.tokens[6]),
- lineNum: item.lineNum,
- }];
- },
- });
- // function end
- substrate.addZyme('FuncEnd', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'functionEnd',
- lineNum: item.lineNum,
- }];
- },
- });
- // external function stub
- substrate.addZyme('External', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'functionStub',
- ident: item.tokens[2].text,
- returnType: item.tokens[1],
- params: item.tokens[3],
- lineNum: item.lineNum,
- }];
- },
- });
- // 'unreachable'
- substrate.addZyme('Unreachable', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'unreachable',
- lineNum: item.lineNum,
- }];
- },
- });
-
- // Input
-
- substrate.addItem({
- llvmText: data,
- }, 'LineSplitter');
-
- return substrate.solve();
-}
-
-// Analyze intertype data
-
-VAR_NATIVE = 'native';
-VAR_NATIVIZED = 'nativized';
-VAR_EMULATED = 'emulated';
-
-function cleanFunc(func) {
- func.lines = func.lines.filter(function(line) { return line.intertype !== null });
- func.labels.forEach(function(label) {
- label.lines = label.lines.filter(function(line) { return line.intertype !== null });
- });
-}
-
-function analyzer(data) {
-//print('zz analaz')
- substrate = new Substrate('Analyzer');
-
- // Sorter
- substrate.addZyme('Sorter', {
- processItem: function(item) {
- item.items.sort(function (a, b) { return a.lineNum - b.lineNum });
- this.forwardItem(item, 'Gatherer');
- },
- });
-
- // Gatherer
- substrate.addZyme('Gatherer', {
- processItem: function(item) {
- // Single-liners
- ['globalVariable', 'functionStub', 'type'].forEach(function(intertype) {
- var temp = splitter(item.items, function(item) { return item.intertype == intertype });
- item[intertype + 's'] = temp.splitOut;
- item.items = temp.leftIn;
- });
- // Functions & labels
- item.functions = []
- for (var i = 0; i < item.items.length; i++) {
- var subItem = item.items[i];
- if (subItem.intertype == 'function') {
- item.functions.push(subItem);
- subItem.endLineNum = null;
- subItem.lines = [];
- subItem.labels = [];
- } else if (subItem.intertype == 'functionEnd') {
- item.functions.slice(-1)[0].endLineNum = subItem.lineNum;
- } else if (subItem.intertype == 'label') {
- item.functions.slice(-1)[0].labels.push(subItem);
- subItem.lines = [];
- } else if (item.functions.slice(-1)[0].endLineNum === null) {
- // Internal line
- item.functions.slice(-1)[0].lines.push(subItem);
- item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem);
- } else {
- print("ERROR: what is this? " + JSON.stringify(subItem));
- }
- }
- delete item.items;
- this.forwardItem(item, 'Identinicer');
- },
- });
-
- // IdentiNicer
- substrate.addZyme('Identinicer', {
- processItem: function(output) {
- walkJSON(output, function(item) {
- ['', '2', '3', '4', '5'].forEach(function(ext) {
- if (item && item['ident' + ext])
- item['ident' + ext] = toNiceIdent(item['ident' + ext]);
- });
- });
- this.forwardItem(output, 'Typevestigator');
- }
- });
-
- function addType(type, data) {
- if (type.length == 1) return;
- if (data.types[type]) return;
- if (['internal', 'inbounds', 'void'].indexOf(type) != -1) return;
- dprint('types', '// addType: ' + type);
- var check = new RegExp(/^\[(\d+)\ x\ (.*)\]$/g).exec(type);
- // 'blocks': [14 x %struct.X] etc.
- if (check) {
- var num = parseInt(check[1]);
- var subType = check[2];
- data.types[type] = {
- name_: type,
- fields: range(num).map(function() { return subType }),
- lineNum: '?',
- };
- return;
- }
- if (['['].indexOf(type) != -1) return;
- if (isNumberType(type) || isPointerType(type)) return;
- data.types[type] = {
- name_: type,
- fields: [ 'int32' ], // XXX
- flatSize: 1,
- lineNum: '?',
- };
- }
-
- // Typevestigator
- substrate.addZyme('Typevestigator', {
- processItem: function(data) {
- // Convert types list to dict
- var old = data.types;
- data.types = {};
- old.forEach(function(type) { data.types[type.name_] = type });
-
- // Find additional types
- walkJSON(data, function(item) {
- if (!item) return;
- if (item.type) {
- addType(!item.type.text ? item.type : item.type.text, data);
- }
- if (item.type2) {
- addType(!item.type2.text ? item.type2 : item.type2.text, data);
- }
- });
- this.forwardItem(data, 'Typeanalyzer');
- }
- });
-
- // Type analyzer
- substrate.addZyme('Typeanalyzer', {
- processItem: function(item) {
- // 'fields' is the raw list of LLVM fields. However, we embed
- // child structures into parent structures, basically like C.
- // So { int, { int, int }, int } would be represented as
- // an Array of 4 ints. getelementptr on the parent would take
- // values 0, 1, 2, where 2 is the entire middle structure.
- // We also need to be careful with getelementptr to child
- // structures - we return a pointer to the same slab, just
- // a different offset. Likewise, need to be careful for
- // getelementptr of 2 (the last int) - it's real index is 4.
- // The benefit of this approach is inheritance -
- // { { ancestor } , etc. } = descendant
- // In this case it is easy to bitcast ancestor to descendant
- // pointers - nothing needs to be done. If the ancestor were
- // a new slab, it would need some pointer to the outer one
- // for casting in that direction.
- // TODO: bitcasts of non-inheritance cases of embedding (not at start)
- var more = true;
- while (more) {
- more = false;
- values(item.types).forEach(function(type) {
- if (type.flatIndexes) return;
- var ready = true;
- type.fields.forEach(function(field) {
- //print('// zz getT: ' + type.name_ + ' : ' + field);
- if (isStructType(field)) {
- if (!item.types[field]) {
- addType(field, item);
- ready = false;
- } else {
- if (!item.types[field].flatIndexes) {
- ready = false;
- }
- }
- }
- });
- if (!ready) {
- more = true;
- return;
- }
- type.flatSize = 0;
- var sizes = [];
- type.flatIndexes = type.fields.map(function(field) {
- var soFar = type.flatSize;
- var size = 1;
- if (isStructType(field)) {
- size = item.types[field].flatSize;
- }
- type.flatSize += size;
- sizes.push(size);
- return soFar;
- });
- if (dedup(sizes).length == 1) {
- type.flatFactor = sizes[0];
- }
- type.needsFlattening = (this.flatFactor != 1);
- dprint('types', 'type: ' + type.name_ + ' : ' + JSON.stringify(type.fields));
- dprint('types', ' has final size of ' + type.flatSize + ', flatting: ' + type.needsFlattening + ' ? ' + (type.flatFactor ? type.flatFactor : JSON.stringify(type.flatIndexes)));
- });
- }
-
- this.forwardItem(item, 'VariableAnalyzer');
- },
- });
-
- // Variable analyzer
- substrate.addZyme('VariableAnalyzer', {
- processItem: function(item) {
- item.functions.forEach(function(func) {
- func.variables = {};
-
- // LLVM is SSA, so we always have a single assignment/write. We care about
- // the reads/other uses.
- walkJSON(func.lines, function(item) {
- if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr', 'fastgetelementptrload'].indexOf(item.value.intertype) != -1) {
- func.variables[item.ident] = {
- ident: item.ident,
- type: item.value.type.text,
- origin: item.value.intertype,
- uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]),
- };
- }
- });
-
- for (vname in func.variables) {
- var variable = func.variables[vname];
-
- // Whether the value itself is used. For an int, always yes. For a pointer,
- // we might never use the pointer's value - we might always just store to it /
- // read from it. If so, then we can optimize away the pointer.
- variable.hasValueTaken = false;
- // Whether our address was used. If not, then we do not need to bother with
- // implementing this variable in a way that other functions can access it.
- variable.hasAddrTaken = false;
-
- variable.pointingLevels = pointingLevels(variable.type);
-
- // Analysis!
-
- if (variable.pointingLevels > 0) {
- // Pointers
- variable.loads = 0;
- variable.stores = 0;
-
- func.lines.forEach(function(line) {
- //print(dump(line))
- if (line.intertype == 'store' && line.ident == vname) {
- variable.stores ++;
- } else if ((line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) ||
- (line.intertype == 'fastgetelementptrload' && line.ident == vname)) {
- variable.loads ++;
- }
- });
-
- variable.otherUses = variable.uses - variable.loads - variable.stores;
- if (variable.otherUses > 0)
- variable.hasValueTaken = true;
- }
-
- // Decision time
-
- if (variable.origin == 'getelementptr') {
- // Use our implementation that emulates pointers etc.
- variable.impl = VAR_EMULATED;
- } else if ( variable.pointingLevels === 0 && !variable.hasAddrTaken ) {
- // A simple int value, can be implemented as a native variable
- variable.impl = VAR_NATIVE;
- } else if ( variable.pointingLevels === 1 && variable.origin === 'alloca' && !isStructPointerType(variable.type) && !variable.hasAddrTaken && !variable.hasValueTaken ) {
- // A pointer to a value which is only accessible through this pointer. Basically
- // a local value on the stack, which nothing fancy is done on. So we can
- // optimize away the pointing altogether, and just have a native variable
- variable.impl = VAR_NATIVIZED;
- } else {
- variable.impl = VAR_EMULATED;
- }
- dprint('vars', '// var ' + vname + ': ' + JSON.stringify(variable));
- }
- });
- this.forwardItem(item, 'Relooper');
- },
- });
-
- // ReLooper - reconstruct nice loops, as much as possible
- substrate.addZyme('Relooper', {
- processItem: function(item) {
- var that = this;
- function finish() {
- that.forwardItem(item, 'Optimizer');
- }
-
- // Tools
-
- function replaceLabels(line, labelId, toLabelId) {
- //print('// XXX replace ' + labelId + ' with ' + toLabelId);
- function process(item) {
- ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel', 'defaultLabel'].forEach(function(id) {
- if (item[id] && item[id] == labelId) {
- item[id] = toLabelId;
- //print(' replaced!');
- }
- });
- }
- if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
- process(line);
- } else if (line.intertype == 'switch') {
- process(line);
- line.switchLabels.forEach(process);
- }
- }
-
- function replaceLabelLabels(label, labelId, toLabelId) {
- label.lines.forEach(function(line) { replaceLabels(line, labelId, toLabelId) });
- return label;
- }
-
- function replaceInLabels(labels, toReplace, replaceWith) {
- //print('// XXX replaceIn ' + toReplace + ' with ' + replaceWith);
- assertEq(!replaceWith || toReplace.length == 1, true); // TODO: implement other case
- labels.forEach(function(label) {
- ['inLabels'].forEach(function(l) {
- label[l] = label[l].map(function(labelId) { return toReplace.indexOf(labelId) == -1 ? labelId : replaceWith})
- .filter(function(labelId) { return !!labelId });
- });
- });
- return labels;
- }
-
- function calcLabelBranchingData(labels, labelsDict) {
- item.functions.forEach(function(func) {
- labels.forEach(function(label) {
- label.outLabels = [];
- label.inLabels = [];
- label.hasReturn = false;
- label.hasBreak = false;
- });
- });
- // Find direct branchings
- labels.forEach(function(label) {
- dprint('find direct branchings at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
- label.lines.forEach(function(line) {
- function process(item) {
- ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel'].forEach(function(id) {
- if (item[id]) {
- if (item[id][0] == 'B') { // BREAK, BCONT, BNOPP
- label.hasBreak = true;
- } else {
- label.outLabels.push(item[id]);
- labelsDict[item[id]].inLabels.push(label.ident);
- }
- }
- });
- }
- if (['branch', 'invoke'].indexOf(line.intertype) != -1) {
- process(line);
- } else if (line.intertype == 'switch') {
- process(line);
- line.switchLabels.forEach(process);
- }
-
- label.hasReturn |= line.intertype == 'return';
- });
- });
- // Find all incoming and all outgoing - recursively
- labels.forEach(function(label) {
- label.allInLabels = [];
- label.allOutLabels = [];
- //! MustGetTo ignores return - |if (x) return; else Y| must get to Y.
- label.mustGetHere = [];
- });
-
- var worked = true;
- while (worked) {
- worked = false;
- labels.forEach(function(label) {
- //print('zz at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels);
- function inout(s, l) {
- var temp = label[s].slice(0);
- label[s].forEach(function(label2Id) {
- temp = temp.concat(labelsDict[label2Id][l]);
- });
- temp = dedup(temp);
- temp.sort();
- if (JSON.stringify(label[l]) != JSON.stringify(temp)) {
- //print('zz noticed ' + label.ident + ' ? ' + s + ',' + l + ' : ' + label[s] + ' | ' + label[l]);
- label[l] = temp;
- worked = true;
- }
- }
- inout('inLabels', 'allInLabels');
- inout('outLabels', 'allOutLabels');
- });
- }
-
- // Find all mustGetTos
- labels.forEach(function(label) {
- //print('path for: ' + label.ident + ',' + dump(label));
- function walk(path, label) {
- //print('path is: ' + getLabelIds(path.concat([label])));
- // If all we are is a break/return - then stop here. Otherwise, continue to other path
- //print('??? ' + label.hasReturn + ' : ' + label.hasBreak + ' : ' + label.outLabels.length);
- if (label.hasReturn || (label.hasBreak && label.outLabels.length == 0)) {
- //print('path done.');
- return [path.concat([label])]
- };
- if (path.indexOf(label) != -1) {
- //print('looping path - abort it.');
- return []; // loop - drop this path
- }
- path = path.concat([label]);
- return label.outLabels.map(function(outLabelId) { return walk(path, labelsDict[outLabelId]) })
- .reduce(function(a, b) { return a.concat(b) }, [])
- .filter(function(path) { return path.length > 0 });
- }
- var paths = walk([], label).map(function(path) { return getLabelIds(path) });
- //print('XXX paths: ' + JSON.stringify(paths));
- var possibles = dedup(paths.reduce(function(a,b) { return a.concat(b) }, []));
- label.mustGetTo = possibles.filter(function(possible) {
- return paths.filter(function(path) { return path.indexOf(possible) == -1 }) == 0;
- }).filter(function(possible) { return possible != label.ident });
- //print('XXX must get to: ' + JSON.stringify(label.mustGetTo));
- });
-
- labels.forEach(function(label) {
- label.mustGetTo.forEach(function (mustGetTo) {
- labelsDict[mustGetTo].mustGetHere.push(label.ident);
- });
- });
-
- if (dcheck('labelbranching')) {
- labels.forEach(function(label) {
- print('// label: ' + label.ident + ' :out : ' + JSON.stringify(label.outLabels));
- print('// ' + label.ident + ' :in : ' + JSON.stringify(label.inLabels));
- print('// ' + label.ident + ' :ALL out : ' + JSON.stringify(label.allOutLabels));
- print('// ' + label.ident + ' :ALL in : ' + JSON.stringify(label.allInLabels));
- print('// ZZZZZZ ' + label.ident + ' must get to all of ' + JSON.stringify(label.mustGetTo));
- });
- }
- }
-
-/* // Disabled merging as it seems it just removes a return now and then.
- function mergeLabels(label1Ident, label2Ident) {
- var label1 = func.labelsDict[label1Ident];
- var label2 = func.labelsDict[label2Ident];
- label1.lines.pop();
- label1.lines = label1.lines.concat(label2.lines);
- label1.outLabels = label2.outLabels;
- label2.lines = null;
- func.labels = func.labels.filter(function(label) { return !!label.lines });
-print('// zz Merged away! ' + label2.ident + ' into ' + label1.ident);
- delete func.labelsDict[label2.ident];
- replaceInLabels(func.labels, [label2.ident], label1.ident);
- }
-//print('Labels pre merge : ' + getLabelIds(func.labels));
-
- // Merge trivial labels
- var worked = true;
- while (worked) {
- worked = false;
- func.labels.forEach(function(label) {
- if (label.lines === null) return; // We were deleted
-//print("// Consider: " + label.ident + ' : out/in: ' + label.outLabels.length + ',' + label.inLabels.length);// + dump(label.lines.slice(-1)[0]));
- if (label.outLabels.length == 1 &&
- label.lines.slice(-1)[0].intertype == 'branch' &&
-// label.lines.slice(-1)[0].label && // nonconditional branch
- label.lines.slice(-1)[0].label == label.outLabels[0] &&
- func.labelsDict[label.outLabels[0]].inLabels.length == 1) {
-//print("// and target: " + func.labelsDict[label.outLabels[0]].inLabels);
- mergeLabels(label.ident, label.outLabels[0]);
- worked = true;
- }
- });
- }
-*/
-
- // 'block': A self-enclosed part of the program, for example a loop or an if
- function makeBlock(labels, entry, labelsDict) {
- var def = {
- type: 'emulated', // a block we cannot map to a nicer structure like a loop. We emulate it with a barbaric switch
- labels: labels,
- entry: entry,
- };
- if (!RELOOP) return def;
-
- if (!entry) {
- assertTrue(labels.length == 0);
- return null; // Empty block - not even an entry
- }
-
- function getLastLine(block) {
- //dprint('get last line at: ' + block.labels[0].ident);
- if (block.next) return getLastLine(block.next);
- switch(block.type) {
- case 'loop':
- return getLastLine(block.rest);
- case 'if':
- case 'breakingif':
- return getLastLine(block.ifTrue);
- case 'emulated':
- case 'simple':
- if (block.labels.length == 1) {
- return block.labels[0].lines.slice(-1)[0];
- } else {
- //for (var i = 0; i < block.labels.length; i++)
- //print('Remaining label is at line: ' + block.labels[i].lines[0].lineNum);
- return null; // throw "Not clear what the last line is."
- }
- }
- }
- function getAll(fromId, beforeIds) {
- beforeIds = beforeIds ? beforeIds : [];
- //print("//getAll : " + fromId + ' : ' + beforeIds);
- if (beforeIds && beforeIds.indexOf(fromId) != -1) return [];
- //print("getAll proceeding");
- var from = labelsDict[fromId];
- return dedup([from].concat(
- from.outLabels.map(function(outLabel) { return getAll(outLabel, beforeIds.concat(fromId)) })
- .reduce(function(a,b) { return a.concat(b) }, [])
- ), 'ident');
- }
- function isolate(label) {
- label.inLabels = [];
- label.outLabels = [];
- return label;
- }
- dprint("\n\n// XXX MAKEBLOCK " + entry + ', num labels: ' + labels.length + ' and they are: ' + getLabelIds(labels));
- if (labels.length == 0 || !entry) {
- print('//empty labels or entry');
- return;
- }
- function forLabelLines(labels, func) {
- labels.forEach(function(label) {
- label.lines.forEach(function(line) { func(line, label) });
- });
- }
-
- // Begin
-
- calcLabelBranchingData(labels, labelsDict);
-
- var split = splitter(labels, function(label) { return label.ident == entry });
- var first = split.splitOut[0];
- if (!first) {
- print("//no first line");
- return;
- }
- var others = split.leftIn;
- var lastLine = first.lines.slice(-1)[0];
- dprint("// makeBlock " + entry + ' : ' + getLabelIds(labels) + ' IN: ' + first.inLabels + ' OUT: ' + first.outLabels);
- // If we have one outgoing, and none incoming - make this a block of 1,
- // and move on the others (forgetting ourself, so they are now also
- // totally self-enclosed, once we start them)
- if (first.inLabels.length == 0 && first.outLabels.length == 1) {
- dprint(' Creating simple emulated');
- assertEq(lastLine.intertype, 'branch');
-// assertEq(!!lastLine.label, true);
- return {
- type: 'simple',
- labels: [replaceLabelLabels(first, first.outLabels[0], 'BNOPP')],
- entry: entry,
- next: makeBlock(replaceInLabels(others, entry), first.outLabels[0], labelsDict),
- };
- }
- //print('// loop ? a');
- // Looping structures - in some way, we can get back to here
- if (first.outLabels.length > 0 && first.allInLabels.indexOf(entry) != -1) {
- //print('// loop ? b');
- // Look for outsiders - labels no longer capable of getting here. Those must be
- // outside the loop. Insiders are those that can get back to the entry
- var split2 = splitter(others, function(label) { return label.allOutLabels.indexOf(entry) == -1 });
- var outsiders = split2.splitOut;
- var insiders = split2.leftIn;
- //print('// potential loop : in/out : ' + getLabelIds(insiders) + ' to ' + getLabelIds(outsiders));
- // Hopefully exactly one of the outsiders is a 'pivot' - a label to which all those leaving
- // the loop must go. Then even some |if (falala) { ... break; }| will get ...
- // as an outsider, but it will actually still be in the loop
- var pivots =
- outsiders.filter(function(outsider) {
- return insiders.filter(function(insider) {
- return insider.mustGetTo.indexOf(outsider.ident) == -1;
- }) == 0;
- });
- // Find the earliest pivot. They must be staggered, each leading to another,
- // as all insiders must go through *all* of these. So we seek a pivot that
- // is never reached by another pivot. That must be the one with fewest
- // mustGetTo
- //print("//pivots: " + pivots.length + ',' + JSON.stringify(getLabelIds(pivots)));
- if (pivots.length >= 1) { // We have ourselves a loop
- pivots.sort(function(a, b) { return b.mustGetTo.length - a.mustGetTo.length });
- var pivot = pivots[0];
- dprint(' Creating LOOP : ' + entry + ' insiders: ' + getLabelIds(insiders) + ' to pivot: ' + pivot.ident);
- var otherLoopLabels = insiders;
- var loopLabels = insiders.concat([first]);
- var nextLabels = outsiders;
- // Rework branches out of the loop into new 'break' labels
- forLabelLines(loopLabels, function(line) {
- replaceLabels(line, pivot.ident, 'BREAK' + entry);
- });
- // Rework branches to the inc part of the loop into |continues|
- forLabelLines(loopLabels, function(line, label) {
- if (0) {// XXX - make this work :label.outLabels.length == 1 && label.outLabels[0] == entry && !label.hasBreak && !label.hasReturn) {
- // it can remove unneeded continues (but does too much right now, as the continue might have been
- // placed into an emulated while(1) switch { }
- replaceLabels(line, entry, 'BNOPP'); print("// zeezee " + line.lineNum);
- } else {
- replaceLabels(line, entry, 'BCONT' + entry);
- }
- });
- // Fix inc branch into rest
- var nextEntry = null;
- first.outLabels.forEach(function(outLabel) {
- if (outLabel != pivot.ident) {
- replaceLabels(lastLine, outLabel, 'BNOPP');
- nextEntry = outLabel;
- }
- });
- if (nextEntry == entry) { // We loop back to ourselves, the entire loop is just us
- nextEntry = null;
- }
- var ret = {
- type: 'loop',
- labels: loopLabels,
- entry: entry,
- inc: makeBlock([isolate(first)], entry, labelsDict),
- rest: makeBlock(replaceInLabels(otherLoopLabels, entry), nextEntry, labelsDict),
- };
- dprint(' getting last line for block starting with ' + entry + ' and nextEntry: ' + nextEntry);
- var lastLoopLine = getLastLine(nextEntry ? ret.rest : ret.inc);
- if (lastLoopLine) {
- replaceLabels(lastLoopLine, 'BCONT' + entry, 'BNOPP'); // Last line will feed back into the loop anyhow
- }
- ret.next = makeBlock(replaceInLabels(nextLabels, getLabelIds(loopLabels)), pivot.ident, labelsDict);
- return ret;
- }
- }
-
- // Try an 'if' structure
- if (first.outLabels.length == 2) {
- if (labelsDict[first.outLabels[1]].mustGetTo.indexOf(first.outLabels[0]) != -1) {
- first.outLabels.push(first.outLabels.shift()); // Reverse order - normalize. Very fast check anyhow
- }
- //print('// if? labels are ' + JSON.stringify(first.outLabels));
- if (labelsDict[first.outLabels[0]].mustGetTo.indexOf(first.outLabels[1]) != -1) {
- var ifLabelId = first.outLabels[0];
- var outLabelId = first.outLabels[1];
- // 0 - the if area. 1 - where we all exit to later
- var ifTrueLabels = getAll(ifLabelId, [outLabelId]);
- var ifLabels = ifTrueLabels.concat([first]);
- var nextLabels = getAll(outLabelId);
- // If we can get to the outside in more than 2 ways (one from if, one from True clause) - have breaks
- var breaking = labelsDict[outLabelId].allInLabels.length > 2;
- dprint(' Creating XXX IF: ' + getLabelIds(ifTrueLabels) + ' to ' + outLabelId + ' ==> ' + getLabelIds(nextLabels) + ' breaking: ' + breaking);
- //print('// if separation: ' + labels.length + ' = ' + ifLabels.length + ' + ' + nextLabels.length + ' (' + ifTrueLabels.length + ')');
- if (breaking) {
- // Rework branches out of the if into new 'break' labels
- forLabelLines(ifTrueLabels, function(line) {
- replaceLabels(line, outLabelId, 'BREAK' + entry);
- });
- }
- // Remove branching op - we will do it manually
- replaceLabels(lastLine, ifLabelId, 'BNOPP');
- replaceLabels(lastLine, outLabelId, 'BNOPP');
- return {
- type: (breaking ? 'breaking' : '') + 'if',
- labels: ifLabels,
- entry: entry,
- ifVar: lastLine.ident,
- check: makeBlock([isolate(first)], entry, labelsDict),
- ifTrue: makeBlock(replaceInLabels(ifTrueLabels, entry), ifLabelId, labelsDict),
- next: makeBlock(replaceInLabels(nextLabels, getLabelIds(ifLabels)), outLabelId, labelsDict),
- };
- }
- }
-
- // Give up on this structure - emulate it
- dprint(' Creating complex emulated');
- return def;
- }
-
- // TODO: each of these can be run in parallel
- item.functions.forEach(function(func) {
- dprint('relooping', "// relooping function: " + func.ident);
- func.labelsDict = {};
- func.labels.forEach(function(label) {
- func.labelsDict[label.ident] = label;
- });
- func.block = makeBlock(func.labels, toNiceIdent('%entry'), func.labelsDict);
- });
-
- return finish();
- },
- });
-
- // Optimizer
- substrate.addZyme('Optimizer', {
- processItem: function(item) {
- var that = this;
- function finish() {
- item.__finalResult__ = true;
- return [item];
- }
- if (!OPTIMIZE) return finish();
-
- // Check if a line has side effects *aside* from an explicit assign if it has one
- function isLineSideEffecting(line) {
- if (line.intertype == 'assign' && line.value.intertype !== 'call') return false;
- if (['fastgetelementptrload'].indexOf(line.intertype) != -1) return false;
- return true;
- }
-
- function replaceVars(line, ident, replaceWith) {
- if (!replaceWith) {
- print('// Not replacing ' + dump(ident) + ' : ' + dump(replaceWith));
- return false;
- }
- var found = false;
- // assigns, loads, mathops
- var POSSIBLE_VARS = ['ident', 'ident2'];
- for (var i = 0; i < POSSIBLE_VARS.length; i++) {
- var possible = POSSIBLE_VARS[i];
- if (line[possible] == ident) {
- line[possible] = replaceWith;
- found = true;
- }
- if (line.value && line.value[possible] == ident) {
- line.value[possible] = replaceWith;
- found = true;
- }
- }
- // getelementptr, call params
- [line, line.value].forEach(function(element) {
- if (!element || !element.params) return;
- var params = element.params;
- for (var j = 0; j < params.length; j++) {
- var param = params[j];
- if (param.intertype == 'value' && param.ident == ident) {
- param.ident = replaceWith;
- found = true;
- }
- }
- });
- return found;
- }
-
- // Fast getelementptr loads
- item.functions.forEach(function(func) {
- for (var i = 0; i < func.lines.length-1; i++) {
- var a = func.lines[i];
- var b = func.lines[i+1];
- if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
- b.intertype == 'assign' && b.value.intertype == 'load' &&
- a.ident == b.value.ident && func.variables[a.ident].uses == 1) {
-// print("// LOADSUSPECT: " + i + ',' + (i+1) + ':' + a.ident + ':' + b.value.ident);
- a.intertype = 'fastgetelementptrload';
- a.ident = b.ident;
- b.intertype = null;
- i++;
- }
- }
- cleanFunc(func);
- });
-
- // Fast getelementptr stores
- item.functions.forEach(function(func) {
- for (var i = 0; i < func.lines.length-1; i++) {
- var a = func.lines[i];
- var b = func.lines[i+1];
- if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' &&
- b.intertype == 'store' && b.value.text &&
- a.ident == b.ident && func.variables[a.ident].uses == 1) {
- //print("// STORESUSPECT: " + a.lineNum + ',' + b.lineNum);
- a.intertype = 'fastgetelementptrstore';
- a.ident = toNiceIdent(b.value.text);
- b.intertype = null;
- i++;
- }
- }
- cleanFunc(func);
- });
-
- // TODO: Use for all that can
- function optimizePairs(worker, minSlice, maxSlice) {
- minSlice = minSlice ? minSlice : 2;
- maxSlice = maxSlice ? maxSlice : 2;
- item.functions.forEach(function(func) {
- func.labels.forEach(function(label) {
- for (var i = 0; i < label.lines.length-1; i++) {
- for (var j = i+minSlice-1; j < Math.min(i+maxSlice+1, label.lines.length); j++) {
- if (worker(func, label.lines.slice(i, j+1))) {
- i += j-i;
- break; // stop working on this i
- }
- }
- }
- });
- cleanFunc(func);
- });
- }
-
- // Fast bitcast&something after them
- optimizePairs(function(func, lines) {
- var a = lines[0], b = lines[1];
- if (a.intertype == 'assign' && a.value.intertype == 'bitcast' &&
- func.variables[a.ident].uses == 1 && replaceVars(b, a.ident, a.value.ident)) {
- a.intertype = null;
- return true;
- }
- });
-
-/*
- // Remove unnecessary branches
- item.functions.forEach(function(func) {
- for (var i = 0; i < func.labels.length-1; i++) {
- var a = func.labels[i].lines.slice(-1)[0];
- var b = func.labels[i+1];
- if (a.intertype == 'branch' && a.label == b.ident) {
- a.intertype = null;
- }
- }
- cleanFunc(func);
- });
-*/
-
- // Remove temp variables around nativized
- item.functions.forEach(function(func) {
- // loads, mathops
- var worked = true;
- while (worked) {
- worked = false;
- for (var i = 0; i < func.lines.length-1; i++) {
- var a = func.lines[i];
- var b = func.lines[i+1];
- if (a.intertype == 'assign' && a.value.intertype == 'load' &&
- func.variables[a.value.ident] && // Not global
- func.variables[a.value.ident].impl === VAR_NATIVIZED) {
- //print('// ??zzzz ' + dump(a) + ',\n // ??zzbb' + dump(b));
- // If target is only used on next line - do not need it.
- if (func.variables[a.ident].uses == 1 &&
- replaceVars(b, a.ident, a.value.ident)) {
- a.intertype = null;
- i ++;
- worked = true;
- }
- }
- }
- cleanFunc(func);
- }
-
- // stores
- for (var i = 0; i < func.lines.length-1; i++) {
- var a = func.lines[i];
- var b = func.lines[i+1];
- if (b.intertype == 'store' &&
- func.variables[b.ident] && // Not global
- func.variables[b.ident].impl === VAR_NATIVIZED) {
- // If target is only used on prev line - do not need it.
- if (func.variables[b.value.ident] && func.variables[b.value.ident].uses == 1 &&
- ['assign', 'fastgetelementptrload'].indexOf(a.intertype) != -1 && a.ident == b.value.ident) {
- a.ident = b.ident;
- a.overrideSSA = true;
- b.intertype = null;
- i ++;
- }
- }
- }
- cleanFunc(func);
- });
-
- // Remove redundant vars - SLOW! XXX
- optimizePairs(function(func, lines) {
- // a - a line defining a var
- // b - a line defining a var that is identical to a
- // c - the only line using b, hopefully
- var a = lines[0], b = lines[lines.length-2], c = lines[lines.length-1];
- if (a.intertype == 'assign' && b.intertype == 'assign' &&
- func.variables[b.ident] && func.variables[b.ident].uses == 1 &&
- compareTokens(a.value, b.value) &&
- lines.slice(0,-1).filter(isLineSideEffecting).length == 0 &&
- replaceVars(c, b.ident, a.ident)) {
- b.intertype = null;
- return true;
- }
- }, 3, 12);
-
- return finish();
- },
- });
-
- substrate.addItem({
- items: data,
- }, 'Sorter');
-
- return substrate.solve();
-}
-
-// Convert analyzed data to javascript
-function JSify(data) {
- substrate = new Substrate('JSifyer');
-
- var TYPES = data.types;
-
- // type
- substrate.addZyme('Type', {
- processItem: function(item) {
- var type = TYPES[item.name_];
- if (type.needsFlattening) {
- item.JS = 'var ' + toNiceIdent(item.name_) + '___FLATTENER = ' + JSON.stringify(TYPES[item.name_].flatIndexes) + ';';
- } else {
- item.JS = '// type: ' + item.name_;
- }
- item.__result__ = true;
- return [item];
- },
- });
-
- function makePointer(slab, pos) {
- // XXX hardcoded ptr impl
- if (slab == 'HEAP') return pos;
- if (slab[0] != '[') {
- slab = '[' + slab + ']';
- }
- return 'Pointer_make(' + slab + ', ' + (pos ? pos : 0) + ')';
- // return '{ slab: ' + slab + ', pos: ' + (pos ? pos : 0) + ' }';
- // return '[' + slab + ', ' + (pos ? pos : 0) + ']';
- }
-
- function makeGetSlab(ptr) {
- // XXX hardcoded ptr impl
- // return ptr + '.slab';
- return 'HEAP';
- }
-
- function makeGetPos(ptr) {
- // XXX hardcoded ptr impl
- // return ptr + '.pos';
- return ptr;
- }
-
- function makeGetValue(ptr, pos, noNeedFirst) {
- return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + ']';
- }
-
- function makeSetValue(ptr, pos, value, noNeedFirst) {
- if (SAFE_HEAP) {
- return 'SAFE_HEAP_STORE(' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + ', ' + value + ')';
- } else {
- return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + '] = ' + value;
- }
- }
-
- function makeEmptyStruct(type) {
- dprint('types', '??makeemptystruct?? ' + dump(type));
- // XXX hardcoded ptr impl
- var ret = [];
- var typeData = TYPES[type];
- assertTrue(typeData);
- for (var i = 0; i < typeData.flatSize; i++) {
- ret.push(0);
- }
- return ret;
- }
-
- // Gets an entire constant expression
- function parseConst(value, type) {
- dprint('gconst', '//yyyyy ' + JSON.stringify(value) + ',' + type + '\n');
- if (isNumberType(type) || pointingLevels(type) == 1) {
- return makePointer(parseNumerical(value.text));
- } else if (value.text == 'zeroinitializer') {
- return makePointer(JSON.stringify(makeEmptyStruct(type)));
- } else if (value.text && value.text[0] == '"') {
- value.text = value.text.substr(1, value.text.length-2);
- return makePointer(JSON.stringify(parseLLVMString(value.text)) + ' /* ' + value.text + '*/');
- } else {
- // Gets an array of constant items, separated by ',' tokens
- function handleSegments(tokens) {
- // Handle a single segment (after comma separation)
- function handleSegment(segment) {
- dprint('gconst', '// seggg: ' + JSON.stringify(segment) + '\n' + '\n')
- if (segment[1].text == 'null') {
- return '0';
- } else if (segment[1].text == 'zeroinitializer') {
- return JSON.stringify(makeEmptyStruct(segment[0].text));
- } else if (segment[1].text == 'getelementptr') {
- return finalizeGetElementPtr(parseGetElementPtr(segment));
- } else if (segment[1].text in searchable('bitcast', 'inttoptr', 'ptrtoint')) {
- var type = segment[2].item[0].tokens.slice(-1)[0].text; // TODO: Use this?
- return handleSegment(segment[2].item[0].tokens.slice(0, -2));
- } else if (segment[1].text == 'add') {
- var subSegments = splitTokenList(segment[2].item[0].tokens);
- return '(' + handleSegment(subSegments[0]) + ' + ' + handleSegment(subSegments[1]) + ')';
- } else if (segment[1].type == '{') {
- return '[' + handleSegments(segment[1].tokens) + ']';
- } else if (segment.length == 2) {
- return parseNumerical(toNiceIdent(segment[1].text));
- } else {
- throw 'Invalid segment: ' + dump(segment);
- }
- };
- return splitTokenList(tokens).map(handleSegment).map(parseNumerical).join(', ');
- }
- if (value.item) {
- // list of items
- return makePointer('[ ' + handleSegments(value.item[0].tokens) + ' ]');
- } else if (value.type == '{') {
- // struct
- return makePointer('[ ' + handleSegments(value.tokens) + ' ]');
- } else if (value[0]) {
- return makePointer('[ ' + handleSegments(value[0].tokens) + ' ]');
- } else {
- throw '// failzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value);
- }
- }
- }
-
- // globalVariablw
- substrate.addZyme('GlobalVariable', {
- processItem: function(item) {
- dprint('gconst', '// zz global Cons: ' + dump(item) + ' :: ' + dump(item.value));
- if (item.ident == '_llvm_global_ctors') {
- item.JS = '\n__globalConstructor__ = function() {\n' +
- item.ctors.map(function(ctor) { return ' ' + toNiceIdent(ctor) + '();' }).join('\n') +
- '\n}\n';
- } else if (item.type.text == 'external') {
- item.JS = 'var ' + item.ident + ' = ' + '0; /* external value? */';
- } else {
- // GETTER - lazy loading, fixes issues with ordering.
- item.JS = 'this.__defineGetter__("' + item.ident + '", function() { delete ' + item.ident + '; ' + item.ident + ' = ' + parseConst(item.value, item.type.text) + '; return ' + item.ident + ' });';
- }
- item.__result__ = true;
- return [item];
- },
- });
-
- // functionStub
- substrate.addZyme('FunctionStub', {
- processItem: function(item) {
- var shortident = item.ident.substr(1);
- if (shortident in Snippets) {
- item.JS = item.ident + ' = ' + Snippets[shortident].toString();
- } else {
- item.JS = '// stub for ' + item.ident;
- }
- item.__result__ = true;
- return [item];
- },
- });
-
- // function splitter
- substrate.addZyme('FunctionSplitter', {
- processItem: function(item) {
- var ret = [item];
- item.splitItems = 0;
- item.labels.forEach(function(label) {
- label.lines.forEach(function(line) {
- line.func = item.ident;
- line.funcData = item;
- line.parentLabel = label.ident;
- ret.push(line);
- item.splitItems ++;
- });
- });
-
- this.forwardItems(ret, 'FuncLineTriager');
- },
- });
- // function reconstructor & post-JS optimizer
- substrate.addZyme('FunctionReconstructor', {
- funcs: {},
- seen: {},
- processItem: function(item) {
- if (this.seen[item.__uid__]) return;
- if (item.intertype == 'function') {
- this.funcs[item.ident] = item;
- item.relines = {};
- this.seen[item.__uid__] = true;
- return;
- }
- var line = item;
- var func = this.funcs[line.func];
- if (!func) return;
-
- // Re-insert our line
- this.seen[item.__uid__] = true;
- var label = func.labels.filter(function(label) { return label.ident == line.parentLabel })[0];
- label.lines = label.lines.map(function(line2) {
- return (line2.lineNum !== line.lineNum) ? line2 : line;
- });
- func.splitItems --;
- // OLD delete line.funcData; // clean up
- if (func.splitItems > 0) return;
-
- // We have this function all reconstructed, go and finalize it's JS!
-
- var hasVarArgs = false, hasPhi = false;
- var params = parseParamTokens(func.params.item[0].tokens).map(function(param) {
- hasPhi = hasPhi || param.intertype == 'phi';
- if (param.intertype == 'varargs') {
- hasVarArgs = true;
- return null;
- }
- return toNiceIdent(param.ident);
- }).filter(function(param) { return param != null });;
-
- func.JS = '\nfunction ' + func.ident + '(' + params.join(', ') + ') {\n';
- if (LABEL_DEBUG) func.JS += " print(INDENT + ' Entering: " + func.ident + "'); INDENT += ' ';\n";
-
- if (hasVarArgs) {
- func.JS += ' __numArgs__ = ' + params.length + ';\n';
- }
-
- // Walk function blocks and generate JS
- function walkBlock(block, indent) {
- if (!block) return '';
- function getLabelLines(label, indent) {
- var ret = '';
- if (LABEL_DEBUG) {
- ret += indent + " print(INDENT + '" + func.ident + ":" + label.ident + "');\n";
- }
- return ret + label.lines.map(function(line) { return indent + line.JS + (line.comment ? ' // ' + line.comment : '') }).join('\n');
- }
- var ret = '';
- if (block.type == 'emulated' || block.type == 'simple') {
- if (block.labels.length > 1) {
- ret += indent + 'var __label__ = ' + getLabelId(block.entry) + '; /* ' + block.entry + ' */\n';
- ret += indent + 'while(1) switch(__label__) {\n';
- ret += block.labels.map(function(label) {
- return indent + ' case ' + getLabelId(label.ident) + ': // ' + label.ident + '\n'
- + getLabelLines(label, indent + ' ');
- }).join('\n');
- ret += '\n' + indent + '}';
- } else {
- ret += getLabelLines(block.labels[0], indent);
- }
- ret += '\n';
- } else if (block.type == 'loop') {
-// if (mustGetTo(first.outLabels[0], [first.ident, first.outLabels[1]])) { /* left branch must return here, or go to right branch */
- ret += indent + block.entry + ': while(1) {\n';
- ret += walkBlock(block.inc, indent + ' ');
- ret += walkBlock(block.rest, indent + ' ');
- ret += indent + '}\n';
- } else if (block.type == 'breakingif') {
- ret += walkBlock(block.check, indent);
- ret += indent + block.entry + ': do { if (' + block.ifVar + ') {\n';
- ret += walkBlock(block.ifTrue, indent + ' ');
- ret += indent + '} } while(0);\n';
- } else if (block.type == 'if') {
- ret += walkBlock(block.check, indent);
- ret += indent + 'if (' + block.ifVar + ') {\n';
- ret += walkBlock(block.ifTrue, indent + ' ');
- ret += indent + '}\n';
- } else {
- ret = 'XXXXXXXXX!';
- }
- return ret + walkBlock(block.next, indent);
- }
- func.JS += walkBlock(func.block, ' ');
- // Finalize function
- if (LABEL_DEBUG) func.JS += " INDENT = INDENT.substr(0, INDENT.length-2);\n";
- func.JS += '}\n';
- func.__result__ = true;
- return func;
- },
- });
-
- function getVarData(funcData, ident) {
- if (funcData.variables[ident]) {
- return funcData.variables[ident].impl;
- } else {
- return 'emulated'; // All global are emulated
- }
- }
-
- substrate.addZyme('FuncLineTriager', {
- processItem: function(item) {
- if (item.intertype == 'function') {
- this.forwardItem(item, 'FunctionReconstructor');
- } else if (item.JS) {
- if (item.parentLineNum) {
- this.forwardItem(item, 'AssignReintegrator');
- } else {
- this.forwardItem(item, 'FunctionReconstructor');
- }
- } else {
- this.forwardItem(item, 'Intertype:' + item.intertype);
- }
- },
- });
-
- // assignment
- substrate.addZyme('Intertype:assign', {
- processItem: function(item) {
- var pair = splitItem(item, 'value', ['funcData']);
- this.forwardItem(pair.parent, 'AssignReintegrator');
- this.forwardItem(pair.child, 'FuncLineTriager');
- },
- });
- substrate.addZyme('AssignReintegrator', makeReintegrator(function(item, child) {
- // 'var', since this is SSA - first assignment is the only assignment, and where it is defined
- item.JS = (item.overrideSSA ? '' : 'var ') + toNiceIdent(item.ident);
-
- var type = item.value.type.text;
- var value = parseNumerical(item.value.JS);
- //print("zz var: " + item.JS);
- var impl = getVarData(item.funcData, item.ident);
- switch (impl) {
- case VAR_NATIVE: {
- break;
- }
- case VAR_NATIVIZED: {
- // SSA, so this must be the alloca. No need for a value
- if (!item.overrideSSA) value = '';
- break;
- }
- case VAR_EMULATED: {
- break;
- }
- default: print('zz unknown impl: ' + impl);
- }
- if (value)
- item.JS += ' = ' + value;
- item.JS += ';';
-
- this.forwardItem(item, 'FunctionReconstructor');
- }));
-
- // Function lines
- function makeFuncLineZyme(intertype, func) {
- substrate.addZyme('Intertype:' + intertype, {
- processItem: function(item) {
- item.JS = func(item);
- if (!item.JS) throw "XXX - no JS generated for " + dump(item);
- this.forwardItem(item, 'FuncLineTriager');
- },
- });
- }
- makeFuncLineZyme('store', function(item) {
- //print('// zzqqzz ' + dump(item.value) + ' :::: ' + dump(item.pointer) + ' :::: ');
- var ident = toNiceIdent(item.ident);
- var value;
- if (item.value.intertype == 'getelementptr') {
- value = finalizeGetElementPtr(item.value);
- } else {
- value = toNiceIdent(item.value.ident);
- }
- if (pointingLevels(item.pointerType.text) == 1) {
- value = parseNumerical(value, removePointing(item.pointerType.text));
- }
- //print("// zz seek " + ident + ',' + dump(item));
- var impl = getVarData(item.funcData, item.ident);
- var ret;
- switch (impl) {
- case VAR_NATIVIZED: ret = ident + ' = ' + value + ';'; break; // We have the actual value here
- case VAR_EMULATED: ret = makeSetValue(ident, 0, value) + ';'; break;
- default: print('zz unknown [store] impl: ' + impl);
- }
-/*
- if (LINEDEBUG && value) {
- ret += '\nprint(INDENT + "' + ident + ' == " + JSON.stringify(' + ident + '));';
- ret += '\nprint(INDENT + "' + ident + ' == " + (' + ident + ' && ' + ident + '.toString ? ' + ident + '.toString() : ' + ident + '));';
- }
-*/
- return ret;
- });
-
- var LABEL_IDs = {};
- var LABEL_ID_COUNTER = 0;
- function getLabelId(label) {
- //print('needs id: ' + label + ' : ' + JSON.stringify(LABEL_IDs));
- label = toNiceIdent(label);
- if (label in LABEL_IDs) return LABEL_IDs[label];
- return LABEL_IDs[label] = LABEL_ID_COUNTER ++;
- }
-
- function makeBranch(label) {
- if (label[0] == 'B') {
- if (label[1] == 'R') {
- return 'break ' + label.substr(5) + ';';
- } else if (label[1] == 'C') {
- return 'continue ' + label.substr(5) + ';';
- } else { // NOPP
- return ';'; // Returning no text might confuse this parser
- }
- } else {
- // FIXME: We should use __lastLabel__ only if we actually need
- // it, which is in the case of phi being used on the label.
- return '__lastLabel__ = __label__; __label__ = ' + getLabelId(label) + '; break;';
- }
- }
-
- makeFuncLineZyme('branch', function(item) {
- //print('branch: ' + dump(item));
- if (!item.ident) {
- return makeBranch(item.label);
- } else {
- var labelTrue = makeBranch(item.labelTrue);
- var labelFalse = makeBranch(item.labelFalse);
- if (labelTrue == ';' && labelFalse == ';') return ';';
- var head = 'if (' + item.ident + ') { ';
- var head2 = 'if (!(' + item.ident + ')) { ';
- var else_ = ' } else { ';
- var tail = ' }';
- if (labelTrue == ';') {
- return head2 + labelFalse + tail;
- } else if (labelFalse == ';') {
- return head + labelTrue + tail;
- } else {
- return head + labelTrue + else_ + labelFalse + tail;
- }
- }
- });
- makeFuncLineZyme('switch', function(item) {
- var ret = '';
- var first = true;
- item.switchLabels.forEach(function(switchLabel) {
- if (!first) {
- ret += 'else ';
- } else {
- first = false;
- }
- ret += 'if (' + item.ident + ' == ' + switchLabel.value + ') {\n';
- ret += ' ' + makeBranch(switchLabel.label) + '\n';
- ret += '}\n';
- });
- ret += 'else {\n';
- ret += makeBranch(item.defaultLabel) + '\n';
- ret += '}\n';
- if (item.value) {
- ret += ' ' + toNiceIdent(item.value);
- }
- return ret;
- });
- makeFuncLineZyme('return', function(item) {
- var ret = '';
- if (LABEL_DEBUG) ret += "INDENT = INDENT.substr(0, INDENT.length-2);\n";
- ret += 'return';
- if (item.value) {
- ret += ' ' + toNiceIdent(item.value);
- }
- return ret + ';';
- });
- makeFuncLineZyme('invoke', function(item) {
- // Wrapping in a function lets us easily return values if we are
- // in an assignment
- var ret = '(function() { try { return '
- + makeFunctionCall(item.ident, item.params) + '; '
- + '__THREW__ = false } catch(e) { '
- + '__THREW__ = true; '
- + '} })(); if (!__THREW__) { ' + makeBranch(item.toLabel) + ' } else { ' + makeBranch(item.unwindLabel) + ' }';
- return ret;
- });
- makeFuncLineZyme('load', function(item) {
- //print('// zz LOAD ' + dump(item) + ' :: ' + dump(item.tokens));
- var ident = toNiceIdent(item.ident);
- var impl = getVarData(item.funcData, item.ident);
- switch (impl) {
- case VAR_NATIVIZED: {
- return ident; // We have the actual value here
- }
- case VAR_EMULATED: return makeGetValue(ident);
- default: return "unknown [load] impl: " + impl;
- }
- });
- makeFuncLineZyme('alloca', function(item) {
- dprint('alloca', dump(item));
- if (pointingLevels(item.allocatedType.text) == 0 && isStructType(item.allocatedType.text)) {
- // TODO: allocate on a stack, not on the heap (we currently leak all this)
- return makePointer(JSON.stringify(makeEmptyStruct(item.allocatedType.text)));
- } else {
- return makePointer('[0]');
- }
- });
- makeFuncLineZyme('phi', function(item) {
- dprint('phi', dump(item));
- return '__lastLabel__ == ' + getLabelId(item.label1) + ' ? ' + toNiceIdent(item.value1) + ' : ' + toNiceIdent(item.value2);
- });
- makeFuncLineZyme('mathop', function(item) { with(item) {
- dprint('mathop', 'mathop: ' + dump(item));
- switch (item.op) {
- case 'add': return ident + ' + ' + ident2;
- case 'sub': return ident + ' - ' + ident2;
- case 'sdiv': case 'udiv': return 'Math.floor(' + ident + ' / ' + ident2 + ')';
- case 'mul': return ident + ' * ' + ident2;
- case 'urem': case 'srem': return 'Math.floor(' + ident + ' % ' + ident2 + ')';
- case 'or': return ident + ' | ' + ident2;
- case 'and': return ident + ' & ' + ident2;
- case 'xor': return ident + ' ^ ' + ident2;
- case 'shl': case 'ashl': case 'lshl': return ident + ' << ' + ident2;
- case 'shr': case 'ashr': case 'lshr': return ident + ' >> ' + ident2;
- case 'fadd': return ident + ' + ' + ident2;
- case 'fsub': return ident + ' - ' + ident2;
- case 'fdiv': return ident + ' / ' + ident2;
- case 'fmul': return ident + ' * ' + ident2;
- case 'uitofp': case 'sitofp': return ident;
- case 'fptoui': case 'fptosi': return 'Math.floor(' + ident + ')';
- case 'icmp': {
- switch (variant) {
- case 'uge': case 'sge': return '0+(' + ident + ' >= ' + ident2 + ')';
- case 'ule': case 'sle': return '0+(' + ident + ' <= ' + ident2 + ')';
- case 'ugt': case 'sgt': return '0+(' + ident + ' > ' + ident2 + ')';
- case 'ult': case 'slt': return '0+(' + ident + ' < ' + ident2 + ')';
- case 'ne': case 'une': return '0+(' + ident + ' != ' + ident2 + ')';
- case 'eq': return '0+(' + ident + ' == ' + ident2 + ')';
- default: throw 'Unknown icmp variant: ' + variant
- }
- }
- case 'fcmp': {
- switch (variant) {
- case 'uge': case 'oge': return '0+(' + ident + ' >= ' + ident2 + ')';
- case 'ule': case 'ole': return '0+(' + ident + ' <= ' + ident2 + ')';
- case 'ugt': case 'ogt': return '0+(' + ident + ' > ' + ident2 + ')';
- case 'ult': case 'olt': return '0+(' + ident + ' < ' + ident2 + ')';
- case 'une': case 'one': return '0+(' + ident + ' != ' + ident2 + ')';
- case 'ueq': case 'oeq': return '0+(' + ident + ' == ' + ident2 + ')';
- default: throw 'Unknown fcmp variant: ' + variant
- }
- }
- case 'zext': case 'fpext': case 'trunc': case 'sext': case 'fptrunc': return ident;
- case 'select': return '(' + ident + ' ? ' + ident3 + ' : ' + ident4 + ')';
- case 'ptrtoint': {
- if (type.text != 'i8*') print('// XXX Warning: Risky ptrtoint operation on line ' + lineNum);
- return ident;
- }
- case 'inttoptr': {
- print('// XXX Warning: inttoptr operation on line ' + lineNum);
- return ident;
- }
- default: throw 'Unknown mathcmp op: ' + item.op
- }
- } });
-/*
- return [{
- __result__: true,
- intertype: 'mathop',
- op: op,
- variant: variant,
- type: item.tokens[1].text,
- ident: item.tokens[2].text,
- value: item.tokens[4].text,
- lineNum: item.lineNum,
- }];
-*/
-
- function getGetElementPtrIndexes(item) {
- var ident = item.ident;
- var type = item.params[0].type.text; // param 0 == type
- // struct pointer, struct*, and getting a ptr to an element in that struct. Param 1 is which struct, then we have items in that
- // struct, and possibly further substructures, all embedded
- // can also be to 'blocks': [8 x i32]*, not just structs
- type = removePointing(type);
- var indexes = [makeGetPos(ident)];
- var offset = toNiceIdent(item.params[1].ident);
- if (offset != 0) {
- indexes.push((isStructType(type) && TYPES[type].flatSize != 1 ? TYPES[type].flatSize + '*' : '') + offset);
- }
- item.params.slice(2, item.params.length).forEach(function(arg) {
- var curr = toNiceIdent(arg.ident);
- // TODO: If index is constant, optimize
- var typeData = TYPES[type];
- if (isStructType(type) && typeData.needsFlattening) {
- if (typeData.flatFactor) {
- indexes.push(curr + '*' + typeData.flatFactor);
- } else {
- indexes.push(toNiceIdent(type) + '___FLATTENER[' + curr + ']');
- }
- } else {
- if (curr != 0) {
- indexes.push(curr);
- }
- }
- type = TYPES[type] ? TYPES[type].fields[curr] : '';
- });
- return indexes.join('+');
- }
-
- function finalizeGetElementPtr(item) {
- //print('//zz finalize: ' + dump(item.params));
- // TODO: statically combine indexes here if consts
- return makePointer(makeGetSlab(item.ident), getGetElementPtrIndexes(item));
- }
-
- function finalizeBitcast(item) {
- //print('//zz finalizeBC: ' + dump(item));
- return item.ident;
- }
-
- makeFuncLineZyme('bitcast', function(item) {
- // XXX Don't we need to copy ptr - i.e. create new ones (at least if uses > just the next line)?
- // XXX hardcoded ptr impl - as ptrs are ints, we don't need to copy
- var ident = toNiceIdent(item.ident);
- return ident;
- });
- function makeFunctionCall(ident, params) {
- // Special cases
- if (ident == '_llvm_va_start') {
- var args = 'Array.prototype.slice.call(arguments, __numArgs__)';
- var data = 'Pointer_make([' + args + '.length].concat(' + args + '), 0)';
- if (SAFE_HEAP) {
- return 'SAFE_HEAP_STORE(' + params[0].ident + ', ' + data + ', 0)';
- } else {
- return 'HEAP[' + params[0].ident + '] = ' + data;
- }
- } else if (ident == '_llvm_va_end') {
- return ';'
- }
-
- var params = params.map(function(param) {
- if (param.intertype === 'getelementptr') {
- return finalizeGetElementPtr(param);
- } else if (param.intertype === 'bitcast') {
- return finalizeBitcast(param);
- } else {
- return toNiceIdent(param.ident); //.value;//'??arg ' + param.intertype + '??';
- }
- });
- return ident + '(' + params.join(', ') + ')';
- }
- makeFuncLineZyme('getelementptr', function(item) { return finalizeGetElementPtr(item) });
- makeFuncLineZyme('call', function(item) {
- return makeFunctionCall(item.ident, item.params) + (item.standalone ? ';' : '');
- });
-
- // Optimzed intertypes
-
- makeFuncLineZyme('fastgetelementptrload', function(item) {
-//print('// FAST ' + dump(item));
- return 'var ' + item.ident + ' = ' + makeGetValue(item.value.ident, getGetElementPtrIndexes(item.value), true) + ';';
- });
- makeFuncLineZyme('fastgetelementptrstore', function(item) {
-//print('// FAST ' + dump(item));
- return makeSetValue(item.value.ident, getGetElementPtrIndexes(item.value), item.ident, true) + ';';
- });
-
- makeFuncLineZyme('unreachable', function(item) { return '// unreachable' });
-
- // Final combiner
-
- function finalCombiner(items) {
- var ret = items.filter(function(item) { return item.intertype == 'type' });
- ret = ret.concat(items.filter(function(item) { return item.intertype == 'globalVariable' }));
- ret.push('\n');
- ret = ret.concat(items.filter(function(item) { return item.intertype == 'functionStub' }));
- ret.push('\n');
- ret = ret.concat(items.filter(function(item) { return item.intertype == 'function' }));
- return ret.map(function(item) { return item.JS }).join('\n');
- }
-
- // Data
-
- substrate.addItems(values(data.types).filter(function(type) { return type.lineNum != '?' }), 'Type');
- substrate.addItems(data.globalVariables, 'GlobalVariable');
- substrate.addItems(data.functions, 'FunctionSplitter');
- substrate.addItems(data.functionStubs, 'FunctionStub');
-
- return preprocess(read('preamble.js')) + finalCombiner(substrate.solve()) + read('postamble.js');
-// return finalCombiner(substrate.solve());
-}
-
-//===============================
-// Main
-//===============================
-
-// Override settings
-var settings = JSON.parse(readline());
-for (setting in settings) {
- this[setting] = settings[setting];
-}
-
-// Read llvm
-var lines = [];
-var line;
-do {
- line = readline();
- if (line == null) break;
- lines.push(line);
-} while(true);
-var data = lines.join("\n");
-
-// Do it
-print(JSify(analyzer(intertyper(data))));
-
diff --git a/src/settings.js b/src/settings.js
index e5eca3e2..32db3739 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -3,3 +3,5 @@ RELOOP = 0;
SAFE_HEAP = 0;
LABEL_DEBUG = 0;
+DEBUG_TAGS_SHOWING = ['labelbranching', 'enzymatic'];
+
diff --git a/src/utility.js b/src/utility.js
index 28d3fa93..2cce3f4c 100644
--- a/src/utility.js
+++ b/src/utility.js
@@ -1,3 +1,4 @@
+// General JS utilities
function dump(item) {
function lineify(text) {
@@ -134,7 +135,6 @@ function splitter(array, filter) {
return { leftIn: leftIn, splitOut: splitOut };
}
-DEBUG_TAGS_SHOWING = ['labelbranching', 'enzymatic'];
function dcheck(tag) {
return DEBUG_TAGS_SHOWING.indexOf(arguments[0]) != -1;
}