aboutsummaryrefslogtreecommitdiff
path: root/src/parser.js
diff options
context:
space:
mode:
authoralon@honor <none@none>2010-08-25 21:01:10 -0700
committeralon@honor <none@none>2010-08-25 21:01:10 -0700
commita9256705ada4ae335870cdb60ae7f9c8373038e3 (patch)
tree2c7aeabbdf38a9fea035d6680f8ad31b2a7e0d46 /src/parser.js
parentf6d98e5d038ee80177b9414e5e34ddc05857627b (diff)
the code
Diffstat (limited to 'src/parser.js')
-rw-r--r--src/parser.js2286
1 files changed, 2286 insertions, 0 deletions
diff --git a/src/parser.js b/src/parser.js
new file mode 100644
index 00000000..ba0a36e4
--- /dev/null
+++ b/src/parser.js
@@ -0,0 +1,2286 @@
+// LLVM parser
+//============
+
+/*
+ * TODO:
+ * * Re-use variables (of the same kind, native/nativized vs. emulated).
+ */
+
+// Options
+
+OPTIMIZE = 1;
+RELOOP = 1;
+
+LINEDEBUG = 0;
+
+// Prep - allow this to run in both SpiderMonkey and V8
+
+if (!this['load']) {
+ load = function(f) { eval(snarf(f)) }
+}
+if (!this['read']) {
+ read = function(f) { snarf(f) }
+}
+
+load('utility.js');
+load('enzymatic.js');
+
+// Tools
+
+function addPointing(type) { return type + '*' }
+function removePointing(type) { return type.substr(0, type.length-1) }
+
+function pointingLevels(type) {
+ var ret = 0;
+ while (type.substr(-ret-1, 1) === '*') {
+ ret ++;
+ }
+ return ret;
+}
+
+function toNiceIdent(ident) {
+ if (parseFloat(ident) == ident) return ident;
+ return ident.replace(/[" \.@%]/g, '_');
+}
+
+function isNumberType(type) {
+ var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double'];
+ return types.indexOf(type) != -1;
+}
+
+function isStructPointerType(type) {
+ var proof = '%struct';
+ return type.substr(0, proof.length) == proof;
+}
+
+function isStructType(type) {
+ if (/^\[\d+\ x\ (.*)\]/g.test(type)) return true; // [15 x ?] blocks. Like structs
+ var proof = '%struct';
+ return type.substr(0, proof.length) == proof && !isPointerType(type);
+}
+
+function isPointerType(type) { // TODO!
+ return pointingLevels(type) > 0;
+}
+
+function isType(type) { // TODO!
+ return isNumberType(type) || isStructType(type) || isPointerType(type);
+}
+
+function isFunctionDef(token) {
+ var text = token.text;
+ var pointing = pointingLevels(text);
+ var nonPointing = text;
+ for (var i = 0; i < pointing; i++)
+ nonPointing = removePointing(nonPointing);
+ if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')')
+ return false;
+ if (nonPointing == '(...)') return true;
+ if (!token.item) return false;
+ var fail = false;
+ token.item[0].tokens.forEach(function(subtoken) {
+ fail = fail || !isType(subtoken.text);
+ });
+ return !fail;
+}
+
+function addIdent(token) {
+ token.ident = token.text;
+ return token;
+}
+
+// Splits out items that pass filter. Returns also the original sans the filtered
+function splitter(array, filter) {
+ var splitOut = array.filter(filter);
+ var original = array.filter(function(x) { return !filter(x) });
+ return { original: original, splitOut: splitOut };
+}
+
+function combineTokens(tokens) {
+ var ret = {
+ lineNum: tokens[0].lineNum,
+ text: '',
+ tokens: [],
+ };
+ tokens.forEach(function(token) {
+ ret.text += token.text;
+ ret.tokens.push(token);
+ });
+ return ret;
+}
+
+function compareTokens(a, b) {
+ var aId = a.__uid__;
+ var bId = b.__uid__;
+ a.__uid__ = 0;
+ b.__uid__ = 0;
+ var ret = JSON.stringify(a) == JSON.stringify(b);
+ a.__uid__ = aId;
+ b.__uid__ = bId;
+ return ret;
+}
+
+function splitTokenList(tokens) {
+ if (tokens.length == 0) return [];
+ if (tokens.slice(-1)[0].text != ',') tokens.push({text:','});
+ var ret = [];
+ var seg = [];
+ tokens.forEach(function(token) {
+ if (token.text == ',') {
+ ret.push(seg);
+ seg = [];
+ } else {
+ seg.push(token);
+ }
+ });
+ return ret;
+}
+
+function makeSplitter(parentSlot, parentSlotValue, parentUnrequiredSlot, childSlot, copySlots) {
+ return {
+ selectItem: function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] && item[childSlot] !== null },
+ processItem: function(parent) {
+ var child = parent[childSlot];
+ parent[childSlot] = null;
+ child.parentUid = parent.__uid__;
+ child.parentSlot = childSlot;
+ child.lineNum = parent.lineNum; // Debugging
+ if (!copySlots) copySlots = [];
+ copySlots.forEach(function(slot) { child[slot] = parent[slot] });
+ return [parent, child];
+ },
+ };
+}
+
+function makeCombiner(parentSlot, parentSlotValue, parentUnrequiredSlot, childRequiredSlot, finalizeFunc) {
+ return {
+ select: function(items) {
+ var parents = items.filter(function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] });
+ for (var i = 0; i < parents.length; i++) {
+ var parent = parents[i];
+ var child = items.filter(function(item) { return item[childRequiredSlot] && item.parentUid === parent.__uid__ })[0];
+ if (child) return [parent, child];
+ }
+ return [];
+ },
+ process: function(items) {
+ var parent = items[0];
+ var child = items[1];
+ parent[child.parentSlot] = child;
+ delete child.parentUid;
+ delete child.parentSlot;
+ finalizeFunc(parent);
+ return [parent];
+ },
+ };
+}
+
+function parseParamTokens(params) {
+//print('NEW params ' + JSON.stringify(params));
+ if (params.length === 0) return [];
+ var ret = [];
+ if (params[params.length-1].text != ',') {
+ params.push({ text: ',' });
+ }
+ while (params.length > 0) {
+//print('params ' + JSON.stringify(params));
+ var i = 0;
+ while (params[i].text != ',') i++;
+ var segment = params.slice(0, i);
+//print(' seg ' + JSON.stringify(segment));
+ params = params.slice(i+1);
+ if (segment[1].text === 'getelementptr' || segment[1].text === 'noalias') {
+ ret.push(parseGetElementPtr(segment));
+ } else if (segment[1].text === 'bitcast') {
+ ret.push(parseBitcast(segment));
+ } else {
+ if (segment[2] && segment[2].text == 'to') { // part of bitcast params
+ segment = segment.slice(0, 2);
+ }
+ while (segment.length > 2) {
+ segment[0].text += segment[1].text;
+ segment.splice(1, 1); // TODO: merge tokens nicely
+ }
+ ret.push({
+ intertype: 'value',
+ type: segment[0],
+ value: segment[1],
+ ident: segment[1].text,
+ });
+// } else {
+// throw "what is this params token? " + JSON.stringify(segment);
+ }
+ }
+ return ret;
+}
+
+function parseGetElementPtr(segment) {
+ segment = segment.slice(0);
+ if (segment[1].text === 'noalias') {
+ segment.splice(1, 1);
+ }
+ var ret = {
+ intertype: 'getelementptr',
+ type: segment[0],
+ params: parseParamTokens(segment[3].item[0].tokens),
+ };
+ ret.ident = toNiceIdent(ret.params[0].ident);
+ return ret;
+}
+
+// TODO: use this
+function parseBitcast(segment) {
+//print('zz parseBC pre: ' + dump(segment));
+ var ret = {
+ intertype: 'bitcast',
+ type: segment[0],
+ params: parseParamTokens(segment[2].item[0].tokens),
+ };
+ ret.ident = toNiceIdent(ret.params[0].ident);
+//print('zz parseBC: ' + dump(ret));
+ return ret;
+}
+
+function getLabelIds(labels) {
+ return labels.map(function(label) { return label.ident });
+}
+
+// =======================
+
+// llvm => intertypes
+function intertyper(data) {
+ // Substrate
+
+ substrate = new Substrate('Intertyper');
+
+ // Input
+
+ substrate.addItem({
+ llvmText: data,
+ });
+
+ // Tools
+
+ function findTokenText(item, text) {
+ for (var i = 0; i < item.tokens.length; i++) {
+ if (item.tokens[i].text == text) return i;
+ }
+ return -1;
+ }
+
+ // Line splitter.
+ substrate.addZyme({
+ selectItem: function(item) { return !!item.llvmText; },
+ processItem: function(item) {
+ var lines = item.llvmText.split('\n');
+ var ret = [];
+ for (var i = 0; i < lines.length; i++) {
+ if (/^\ +to.*/g.test(lines[i])) {
+ // to after invoke
+ ret.slice(-1)[0].lineText += lines[i];
+ } else {
+ ret.push({
+ lineText: lines[i],
+ lineNum: i + 1,
+ });
+ }
+ }
+ return ret.filter(function(item) { return item.lineText; });
+ },
+ });
+
+ // Line tokenizer
+ substrate.addZyme({
+ selectItem: function(item) { return item.lineText; },
+ processItem: function(item) {
+//print("line: " + item.lineText);
+ var lineText = item.lineText + " ";
+ var tokens = [];
+ var tokenStart = -1;
+ var indent = -1;
+ var quotes = 0;
+ var i = 0;
+ // Note: '{' is not an encloser, as its use in functions is split over many lines
+ var enclosers = {
+ '[': 0,
+ ']': '[',
+ '(': 0,
+ ')': '(',
+ '<': 0,
+ '>': '<',
+ };
+ function notQuoted() {
+ return quotes == 0;
+ }
+ function notEnclosed() {
+ for (var i in enclosers) {
+ if (typeof enclosers[i] === 'number' && enclosers[i] > 0)
+ return false;
+ }
+ return true;
+ }
+ var that = this;
+ function tryStartToken() {
+ if (tokenStart == -1 && notEnclosed() && notQuoted()) {
+//print("try START " + tokenStart + ',' + JSON.stringify(enclosers));
+ tokenStart = i;
+ }
+ }
+ function tryFinishToken(includeThis) {
+ if (tokenStart >= 0 && notEnclosed() && notQuoted()) {
+//print("try finish " + tokenStart + ',' + JSON.stringify(enclosers));
+ var token = {
+ text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)),
+ };
+ if (token.text[0] in enclosers) {
+ token.item = that.processItem({
+ lineText: token.text.substr(1, token.text.length-2)
+ });
+ token.type = token.text[0];
+ }
+ if (indent == -1) {
+ indent = tokenStart;
+ }
+ // merge certain tokens
+ if ( (tokens.length > 0 && tokens.slice(-1)[0].text == '%' && token.text[0] == '"' ) ||
+ (tokens.length > 0 && token.text.replace(/\*/g, '') == '') ) {
+ tokens.slice(-1)[0].text += token.text;
+ } else if (tokens.length > 0 && isType(tokens.slice(-1)[0].text) && isFunctionDef(token)) {
+ tokens.slice(-1)[0].text += ' ' + token.text;
+ } else if (tokens.length > 0 && token.text[token.text.length-1] == '}') {
+ var openBrace = tokens.length-1;
+ while (tokens[openBrace].text != '{') openBrace --;
+ token = combineTokens(tokens.slice(openBrace+1));
+ tokens.splice(openBrace, tokens.length-openBrace+1);
+ tokens.push(token);
+ tokens.slice(-1)[0].type = '{';
+ } else {
+ tokens.push(token);
+ }
+// print("new token: " + dump(tokens.slice(-1)[0]));
+ tokenStart = -1;
+ }
+ }
+ for (; i < lineText.length; i++) {
+ var letter = lineText[i];
+//print("letter: " + letter);
+ switch (letter) {
+ case ' ':
+ tryFinishToken();
+ break;
+ case '"':
+ tryFinishToken();
+ tryStartToken();
+ quotes = 1-quotes;
+ break;
+ case ',':
+ tryFinishToken();
+ if (notEnclosed() && notQuoted()) {
+ tokens.push({ text: ',' });
+ }
+ break;
+ default:
+ if (letter in enclosers && notQuoted()) {
+ if (typeof enclosers[letter] === 'number') {
+ tryFinishToken();
+ tryStartToken();
+ enclosers[letter]++;
+ } else {
+ enclosers[enclosers[letter]]--;
+ tryFinishToken(true);
+ }
+//print(' post-enclosers: ' + JSON.stringify(enclosers));
+ } else {
+ tryStartToken();
+ }
+ }
+ }
+ return [{
+ tokens: tokens,
+ indent: indent,
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+
+ // Line parsers to intermediate form
+
+ // Comment
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens[0].text == ';' },
+ processItem: function(item) { return [] },
+ });
+ // target
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens[0].text == 'target' },
+ processItem: function(item) { return [] },
+ });
+ // globals: type or constant
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=' },
+ processItem: function(item) {
+ if (item.tokens[2].text == 'type') {
+ // type
+//print('// zz ' + dump(item));
+ var fields = [];
+ if (item.tokens[3].text != 'opaque') {
+ var subTokens = item.tokens[3].tokens;
+ subTokens.push({text:','});
+ while (subTokens[0]) {
+ var stop = 1;
+ while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
+ fields.push(combineTokens(subTokens.slice(0, stop)).text);
+ subTokens.splice(0, stop+1);
+ }
+ }
+ return [{
+ __result__: true,
+ intertype: 'type',
+ name_: item.tokens[0].text,
+ fields: fields,
+ lineNum: item.lineNum,
+ }]
+ } else if (item.tokens[2].text == 'global') {
+ // variable
+ return [{
+ __result__: true,
+ intertype: 'globalVariable',
+ ident: item.tokens[0].text,
+ type: item.tokens[3].text,
+ value: item.tokens[4],
+ lineNum: item.lineNum,
+ }]
+ } else {
+ // constant
+ var ident = item.tokens[0].text;
+ while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 })
+ item.tokens.splice(2, 1);
+ var ret = {
+ __result__: true,
+ intertype: 'globalConstant',
+ ident: ident,
+ type: item.tokens[2],
+ lineNum: item.lineNum,
+ };
+ if (ident == '@llvm.global_ctors') {
+ ret.ctors = [];
+ var subTokens = item.tokens[3].item[0].tokens;
+ splitTokenList(subTokens).forEach(function(segment) {
+ ret.ctors.push(segment[1].tokens.slice(-1)[0].text);
+ });
+ } else {
+ if (item.tokens[3].text == 'c')
+ item.tokens.splice(3, 1);
+ ret.value = item.tokens[3];
+ }
+ return [ret];
+ }
+ },
+ });
+ // function header
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' &&
+ item.tokens.slice(-1)[0].text == '{' },
+ processItem: function(item) {
+ if (item.tokens.slice(-3,-2)[0].text == 'align')
+ item.tokens.splice(-3,2);
+ if (item.tokens.slice(-2,-1)[0].text == 'nounwind')
+ item.tokens.splice(-2,1);
+ while (item.tokens.length > 5)
+ item.tokens.splice(1, 1);
+ return [{
+ __result__: true,
+ intertype: 'function',
+ ident: item.tokens[2].text,
+ returnType: item.tokens[1],
+ params: item.tokens[3],
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // label
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':' },
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'label',
+ ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1),
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // assignment
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 &&
+ !item.intertype },
+ processItem: function(item) {
+ var opIndex = findTokenText(item, '=');
+ return [{
+ intertype: 'assign',
+ ident: combineTokens(item.tokens.slice(0, opIndex)).text,
+ value: null,
+ lineNum: item.lineNum,
+ }, { // Additional token, to be parsed, and later re-integrated
+ indent: -1,
+ tokens: item.tokens.slice(opIndex+1),
+ parentLineNum: item.lineNum,
+ parentSlot: 'value',
+ }];
+ },
+ });
+ // reintegration - find intermediate representation-parsed items and
+ // place back in parents
+ substrate.addZyme({
+ select: function(items) {
+ for (var i = 0; i < items.length; i++) {
+ if (items[i].parentSlot && items[i].intertype) {
+ for (var j = 0; j < items.length; j++) {
+ if (items[j].lineNum == items[i].parentLineNum) {
+ return [items[j], items[i]];
+ }
+ }
+ }
+ }
+ return [];
+ },
+ process: function(items) {
+ var parent = items[0];
+ var child = items[1];
+ parent[child.parentSlot] = child;
+ parent.__result__ = true;
+ delete child.parentLineNum;
+ return [parent];
+ }
+ });
+ // 'load'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load' },
+ processItem: function(item) {
+ item.intertype = 'load';
+ item.pointerType = item.tokens[1];
+ item.pointer = item.tokens[2];
+ item.ident = item.pointer.text;
+//print("// zz zz pointer: " + JSON.stringify(item));
+ item.type = { text: removePointing(item.pointerType.text) };
+ return [item];
+ },
+ });
+ // 'bitcast'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast' },
+ processItem: function(item) {
+ item.intertype = 'bitcast';
+ item.type = item.tokens[1];
+ item.ident = item.tokens[2].text;
+ item.type2 = item.tokens[4];
+ return [item];
+ },
+ });
+ // 'getelementptr'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr' },
+ processItem: function(item) {
+ var last = 0;
+ while (item.tokens[last].text != ';') last++;
+ var segment = [ item.tokens[1], { text: null }, null, { item: [ {
+ tokens: item.tokens.slice(2, last)
+ } ] } ];
+ var data = parseGetElementPtr(segment);
+ item.intertype = 'getelementptr';
+ item.type = data.type;
+ item.params = data.params;
+ item.ident = data.ident;
+ return [item];
+ },
+ });
+ // 'call'
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype },
+ processItem: function(item) {
+ item.intertype = 'call';
+ if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) {
+ item.tokens.splice(1, 1);
+ }
+ item.type = item.tokens[1];
+ item.functionType = '';
+ while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
+ item.functionType += item.tokens[2].text;
+ item.tokens.splice(2, 1);
+ }
+ item.ident = item.tokens[2].text;
+ item.params = parseParamTokens(item.tokens[3].item[0].tokens);
+ if (item.indent == 2) {
+ // standalone call - not in assign
+ item.standalone = true;
+ item.__result__ = true;
+ }
+ return [item];
+ },
+ });
+ // 'invoke'
+ substrate.addZyme({
+ selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype },
+ processItem: function(item) {
+ item.intertype = 'invoke';
+ item.type = item.tokens[1];
+ item.functionType = '';
+ while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
+ item.functionType += item.tokens[2].text;
+ item.tokens.splice(2, 1);
+ }
+ item.ident = item.tokens[2].text;
+ item.params = parseParamTokens(item.tokens[3].item[0].tokens);
+ item.toLabel = item.tokens[6].text;
+ item.unwindLabel = item.tokens[9].text;
+ item.__result__ = true;
+ return [item];
+ },
+ });
+ // 'alloca'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca' },
+ processItem: function(item) {
+ item.intertype = 'alloca';
+ item.allocatedType = item.tokens[1];
+ item.type = { text: addPointing(item.tokens[1].text) };
+ return [item];
+ },
+ });
+ // mathops
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 &&
+ ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptoui', 'fptosi', 'trunc', 'sext', 'select']
+ .indexOf(item.tokens[0].text) != -1 && !item.intertype },
+ processItem: function(item) {
+ item.intertype = 'mathop';
+ item.op = item.tokens[0].text;
+ item.variant = null;
+ if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1);
+ if (['icmp', 'fcmp'].indexOf(item.op) != -1) {
+ item.variant = item.tokens[1].text;
+ item.tokens.splice(1, 1);
+ }
+ item.type = item.tokens[1];
+ item.ident = item.tokens[2].text;
+ item.ident2 = item.tokens[4].text;
+ item.ident3 = item.tokens[5] ? item.tokens[5].text : null;
+ item.ident4 = item.tokens[8] ? item.tokens[8].text : null;
+//print('// zz got maptop ' + item.op + ',' + item.variant + ',' + item.ident + ',' + item.value);
+ return [item];
+ },
+ });
+ // 'store'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' &&
+ !item.intertype },
+ processItem: function(item) {
+ if (item.tokens[3].text != ',') {
+ assertEq(item.tokens[2].text, 'getelementptr');
+ // complex input - likely getelementptr
+ var commaIndex = 4;
+ while (item.tokens[commaIndex].text != ',') commaIndex ++;
+ return [{
+ __result__: true,
+ intertype: 'store',
+ valueType: item.tokens[1],
+ value: parseGetElementPtr(item.tokens.slice(1, commaIndex)),
+ pointerType: item.tokens[commaIndex+1],
+ pointer: item.tokens[commaIndex+2],
+ ident: item.tokens[commaIndex+2].text,
+ lineNum: item.lineNum,
+ }];
+ }
+ return [{
+ __result__: true,
+ intertype: 'store',
+ valueType: item.tokens[1],
+ value: addIdent(item.tokens[2]),
+ pointerType: item.tokens[4],
+ pointer: item.tokens[5],
+ ident: item.tokens[5].text,
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // 'br'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' &&
+ !item.intertype },
+ processItem: function(item) {
+ if (item.tokens[1].text == 'label') {
+ return [{
+ __result__: true,
+ intertype: 'branch',
+ label: toNiceIdent(item.tokens[2].text),
+ lineNum: item.lineNum,
+ }];
+ } else {
+ return [{
+ __result__: true,
+ intertype: 'branch',
+ ident: item.tokens[2].text,
+ labelTrue: toNiceIdent(item.tokens[5].text),
+ labelFalse: toNiceIdent(item.tokens[8].text),
+ lineNum: item.lineNum,
+ }];
+ }
+ },
+ });
+ // 'ret'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' &&
+ !item.intertype },
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'return',
+ type: item.tokens[1].text,
+ value: item.tokens[2] ? item.tokens[2].text : null,
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // function end
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype },
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'functionEnd',
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // external function stub
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' &&
+ !item.intertype },
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'functionStub',
+ ident: item.tokens[2].text,
+ returnType: item.tokens[1],
+ params: item.tokens[3],
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+ // 'unreachable'
+ substrate.addZyme({
+ selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' &&
+ !item.intertype },
+ processItem: function(item) {
+ return [{
+ __result__: true,
+ intertype: 'unreachable',
+ lineNum: item.lineNum,
+ }];
+ },
+ });
+
+ return substrate.solve();
+}
+
+// Analyze intertype data
+
+VAR_NATIVE = 'native';
+VAR_NATIVIZED = 'nativized';
+VAR_EMULATED = 'emulated';
+
+function cleanFunc(func) {
+ func.lines = func.lines.filter(function(line) { return line.intertype !== null });
+ func.labels.forEach(function(label) {
+ label.lines = label.lines.filter(function(line) { return line.intertype !== null });
+ });
+}
+
+function analyzer(data) {
+//print('zz analaz')
+ substrate = new Substrate('Analyzer');
+
+ substrate.addItem({
+ items: data,
+ });
+
+ // Sorter
+ substrate.addZyme({
+ selectItem: function(item) { return !item.sorted; },
+ processItem: function(item) {
+ item.items.sort(function (a, b) { return a.lineNum - b.lineNum });
+ item.sorted = true;
+ return [item];
+ },
+ });
+
+ // Gatherer
+ substrate.addZyme({
+ selectItem: function(item) { return item.sorted && !item.gathered; },
+ processItem: function(item) {
+ // Single-liners
+ ['globalConstant', 'globalVariable', 'functionStub', 'type'].forEach(function(intertype) {
+ var temp = splitter(item.items, function(item) { return item.intertype == intertype });
+ item[intertype + 's'] = temp.splitOut;
+ item.items = temp.original;
+ });
+ // Functions & labels
+ item.functions = []
+ for (var i = 0; i < item.items.length; i++) {
+ var subItem = item.items[i];
+ if (subItem.intertype == 'function') {
+ item.functions.push(subItem);
+ subItem.endLineNum = null;
+ subItem.lines = [];
+ subItem.labels = [];
+ } else if (subItem.intertype == 'functionEnd') {
+ item.functions.slice(-1)[0].endLineNum = subItem.lineNum;
+ } else if (subItem.intertype == 'label') {
+ item.functions.slice(-1)[0].labels.push(subItem);
+ subItem.lines = [];
+ } else if (item.functions.slice(-1)[0].endLineNum === null) {
+ // Internal line
+ item.functions.slice(-1)[0].lines.push(subItem);
+ item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem);
+ } else {
+ print("ERROR: what is this? " + JSON.stringify(subItem));
+ }
+ }
+ delete item.items;
+ item.gathered = true;
+ return [item];
+ },
+ });
+
+ // IdentiNicer
+ substrate.addZyme({
+ selectItem: function(item) { return item.gathered && !item.identiniced; },
+ processItem: function(output) {
+ walkJSON(output, function(item) {
+ ['', '2', '3', '4', '5'].forEach(function(ext) {
+ if (item && item['ident' + ext])
+ item['ident' + ext] = toNiceIdent(item['ident' + ext]);
+ });
+ });
+ output.identiniced = true;
+ return [output];
+ }
+ });
+
+ function addType(type, data) {
+ if (['<', '(', 'internal', 'inbounds', 'void'].indexOf(type) != -1) return;
+ var check = /^\[(\d+)\ x\ (.*)\]$/g.exec(type);
+ // 'blocks': [14 x %struct.X] etc.
+ if (check) {
+ var num = parseInt(check[1]);
+ var subType = check[2];
+ data.types.push({
+ name_: type,
+ fields: range(num).map(function() { return subType }),
+ lineNum: '?',
+ });
+ return;
+ }
+ if (['['].indexOf(type) != -1) return;
+ if (isNumberType(type) || isPointerType(type)) return;
+ if (!data.types[type]) {
+// print("// New type: " + type);
+ data.types.push({
+ name_: type,
+ fields: [ 'int32' ], // XXX
+ flatSize: 1,
+ lineNum: '?',
+ });
+ }
+ }
+
+ // TypeVestigator
+ substrate.addZyme({
+ selectItem: function(item) { return item.gathered && !item.typevestigated; },
+ processItem: function(data) {
+ walkJSON(data, function(item) {
+ if (!item) return;
+ if (item.type) {
+ addType(!item.type.text ? item.type : item.type.text, data);
+ }
+ if (item.type2) {
+ addType(!item.type2.text ? item.type2 : item.type2.text, data);
+ }
+ });
+ data.typevestigated = true;
+ return [data];
+ }
+ });
+
+ // Type analyzer
+ substrate.addZyme({
+ selectItem: function(item) { return item.typevestigated && !item.typed; },
+ processItem: function(item) {
+//print('zz analaz types')
+ // 'fields' is the raw list of LLVM fields. However, we embed
+ // child structures into parent structures, basically like C.
+ // So { int, { int, int }, int } would be represented as
+ // an Array of 4 ints. getelementptr on the parent would take
+ // values 0, 1, 2, where 2 is the entire middle structure.
+ // We also need to be careful with getelementptr to child
+ // structures - we return a pointer to the same slab, just
+ // a different offset. Likewise, need to be careful for
+ // getelementptr of 2 (the last int) - it's real index is 4.
+ // The benefit of this approach is inheritance -
+ // { { ancestor } , etc. } = descendant
+ // In this case it is easy to bitcast ancestor to descendant
+ // pointers - nothing needs to be done. If the ancestor were
+ // a new slab, it would need some pointer to the outer one
+ // for casting in that direction.
+ // TODO: bitcasts of non-inheritance cases of embedding (not at start)
+ var more = true;
+ while (more) {
+ more = false;
+ function getType(t) {
+ return item.types.filter(function(type) { return type.name_ == t })[0];
+ }
+ item.types.forEach(function(type) {
+ var ready = true;
+ type.fields.forEach(function(field) {
+//print('// zz getT: ' + type.name_ + ' : ' + field);
+ if (isStructType(field)) {
+ if (!getType(field)) {
+ addType(field, item);
+ ready = false;
+ } else {
+ if (!getType(field).flatIndexes) {
+ ready = false;
+ }
+ }
+ }
+ });
+ if (!ready) {
+ more = true;
+ return;
+ }
+ type.flatSize = 0;
+ type.needsFlattening = false;
+ var sizes = [];
+ type.flatIndexes = type.fields.map(function(field) {
+ var curr = type.flatSize;
+ if (isStructType(field)) {
+ var size = getType(field).flatSize;
+ type.flatSize += size;
+ sizes.push(size);
+ type.needsFlattening = true;
+ } else {
+ type.flatSize ++;
+ }
+ return curr;
+ });
+ if (type.needsFlattening && dedup(sizes).length == 1) {
+ type.flatFactor = sizes[0];
+ }
+ });
+ }
+
+ item.types.forEach(function(type) {
+ print('// type: ' + type.name_);// + ' : ' + JSON.stringify(type.fields));
+ });
+ item.typed = true;
+ return [item];
+ },
+ });
+
+ // Variable analyzer
+ substrate.addZyme({
+ selectItem: function(item) { return item.typevestigated && !item.variablized; },
+ processItem: function(item) {
+ item.functions.forEach(function(func) {
+ func.variables = {};
+
+ // LLVM is SSA, so we always have a single assignment/write. We care about
+ // the reads/other uses.
+ walkJSON(func.lines, function(item) {
+//if (item && item.intertype == 'assign') print('zz assign: ' + JSON.stringify(item));
+ if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr'].indexOf(item.value.intertype) != -1) {
+//print("zz add var " + item.ident + ',' + item.intertype);
+ func.variables[item.ident] = {
+ ident: item.ident,
+ type: item.value.type.text,
+ origin: item.value.intertype,
+ uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]),
+ };
+ }
+ });
+
+ for (vname in func.variables) {
+ var variable = func.variables[vname];
+
+ // Whether the value itself is used. For an int, always yes. For a pointer,
+ // we might never use the pointer's value - we might always just store to it /
+ // read from it. If so, then we can optimize away the pointer.
+ variable.hasValueTaken = false;
+ // Whether our address was used. If not, then we do not need to bother with
+ // implementing this variable in a way that other functions can access it.
+ variable.hasAddrTaken = false;
+
+ variable.pointingLevels = pointingLevels(variable.type);
+
+ // Analysis!
+
+ if (variable.pointingLevels > 0) {
+ // Pointers
+ variable.loads = 0;
+ variable.stores = 0;
+
+ func.lines.forEach(function(line) {
+//print(dump(line))
+ if (line.intertype == 'store' && line.ident == vname) {
+ variable.stores ++;
+ } else if (line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) {
+ variable.loads ++;
+ }
+ });
+
+ variable.otherUses = variable.uses - variable.loads - variable.stores;
+ if (variable.otherUses > 0)
+ variable.hasValueTaken = true;
+ }
+
+ // Decision time