diff options
author | alon@honor <none@none> | 2010-08-25 21:01:10 -0700 |
---|---|---|
committer | alon@honor <none@none> | 2010-08-25 21:01:10 -0700 |
commit | a9256705ada4ae335870cdb60ae7f9c8373038e3 (patch) | |
tree | 2c7aeabbdf38a9fea035d6680f8ad31b2a7e0d46 /src/parser.js | |
parent | f6d98e5d038ee80177b9414e5e34ddc05857627b (diff) |
the code
Diffstat (limited to 'src/parser.js')
-rw-r--r-- | src/parser.js | 2286 |
1 files changed, 2286 insertions, 0 deletions
diff --git a/src/parser.js b/src/parser.js new file mode 100644 index 00000000..ba0a36e4 --- /dev/null +++ b/src/parser.js @@ -0,0 +1,2286 @@ +// LLVM parser +//============ + +/* + * TODO: + * * Re-use variables (of the same kind, native/nativized vs. emulated). + */ + +// Options + +OPTIMIZE = 1; +RELOOP = 1; + +LINEDEBUG = 0; + +// Prep - allow this to run in both SpiderMonkey and V8 + +if (!this['load']) { + load = function(f) { eval(snarf(f)) } +} +if (!this['read']) { + read = function(f) { snarf(f) } +} + +load('utility.js'); +load('enzymatic.js'); + +// Tools + +function addPointing(type) { return type + '*' } +function removePointing(type) { return type.substr(0, type.length-1) } + +function pointingLevels(type) { + var ret = 0; + while (type.substr(-ret-1, 1) === '*') { + ret ++; + } + return ret; +} + +function toNiceIdent(ident) { + if (parseFloat(ident) == ident) return ident; + return ident.replace(/[" \.@%]/g, '_'); +} + +function isNumberType(type) { + var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double']; + return types.indexOf(type) != -1; +} + +function isStructPointerType(type) { + var proof = '%struct'; + return type.substr(0, proof.length) == proof; +} + +function isStructType(type) { + if (/^\[\d+\ x\ (.*)\]/g.test(type)) return true; // [15 x ?] blocks. Like structs + var proof = '%struct'; + return type.substr(0, proof.length) == proof && !isPointerType(type); +} + +function isPointerType(type) { // TODO! + return pointingLevels(type) > 0; +} + +function isType(type) { // TODO! + return isNumberType(type) || isStructType(type) || isPointerType(type); +} + +function isFunctionDef(token) { + var text = token.text; + var pointing = pointingLevels(text); + var nonPointing = text; + for (var i = 0; i < pointing; i++) + nonPointing = removePointing(nonPointing); + if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')') + return false; + if (nonPointing == '(...)') return true; + if (!token.item) return false; + var fail = false; + token.item[0].tokens.forEach(function(subtoken) { + fail = fail || !isType(subtoken.text); + }); + return !fail; +} + +function addIdent(token) { + token.ident = token.text; + return token; +} + +// Splits out items that pass filter. Returns also the original sans the filtered +function splitter(array, filter) { + var splitOut = array.filter(filter); + var original = array.filter(function(x) { return !filter(x) }); + return { original: original, splitOut: splitOut }; +} + +function combineTokens(tokens) { + var ret = { + lineNum: tokens[0].lineNum, + text: '', + tokens: [], + }; + tokens.forEach(function(token) { + ret.text += token.text; + ret.tokens.push(token); + }); + return ret; +} + +function compareTokens(a, b) { + var aId = a.__uid__; + var bId = b.__uid__; + a.__uid__ = 0; + b.__uid__ = 0; + var ret = JSON.stringify(a) == JSON.stringify(b); + a.__uid__ = aId; + b.__uid__ = bId; + return ret; +} + +function splitTokenList(tokens) { + if (tokens.length == 0) return []; + if (tokens.slice(-1)[0].text != ',') tokens.push({text:','}); + var ret = []; + var seg = []; + tokens.forEach(function(token) { + if (token.text == ',') { + ret.push(seg); + seg = []; + } else { + seg.push(token); + } + }); + return ret; +} + +function makeSplitter(parentSlot, parentSlotValue, parentUnrequiredSlot, childSlot, copySlots) { + return { + selectItem: function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] && item[childSlot] !== null }, + processItem: function(parent) { + var child = parent[childSlot]; + parent[childSlot] = null; + child.parentUid = parent.__uid__; + child.parentSlot = childSlot; + child.lineNum = parent.lineNum; // Debugging + if (!copySlots) copySlots = []; + copySlots.forEach(function(slot) { child[slot] = parent[slot] }); + return [parent, child]; + }, + }; +} + +function makeCombiner(parentSlot, parentSlotValue, parentUnrequiredSlot, childRequiredSlot, finalizeFunc) { + return { + select: function(items) { + var parents = items.filter(function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] }); + for (var i = 0; i < parents.length; i++) { + var parent = parents[i]; + var child = items.filter(function(item) { return item[childRequiredSlot] && item.parentUid === parent.__uid__ })[0]; + if (child) return [parent, child]; + } + return []; + }, + process: function(items) { + var parent = items[0]; + var child = items[1]; + parent[child.parentSlot] = child; + delete child.parentUid; + delete child.parentSlot; + finalizeFunc(parent); + return [parent]; + }, + }; +} + +function parseParamTokens(params) { +//print('NEW params ' + JSON.stringify(params)); + if (params.length === 0) return []; + var ret = []; + if (params[params.length-1].text != ',') { + params.push({ text: ',' }); + } + while (params.length > 0) { +//print('params ' + JSON.stringify(params)); + var i = 0; + while (params[i].text != ',') i++; + var segment = params.slice(0, i); +//print(' seg ' + JSON.stringify(segment)); + params = params.slice(i+1); + if (segment[1].text === 'getelementptr' || segment[1].text === 'noalias') { + ret.push(parseGetElementPtr(segment)); + } else if (segment[1].text === 'bitcast') { + ret.push(parseBitcast(segment)); + } else { + if (segment[2] && segment[2].text == 'to') { // part of bitcast params + segment = segment.slice(0, 2); + } + while (segment.length > 2) { + segment[0].text += segment[1].text; + segment.splice(1, 1); // TODO: merge tokens nicely + } + ret.push({ + intertype: 'value', + type: segment[0], + value: segment[1], + ident: segment[1].text, + }); +// } else { +// throw "what is this params token? " + JSON.stringify(segment); + } + } + return ret; +} + +function parseGetElementPtr(segment) { + segment = segment.slice(0); + if (segment[1].text === 'noalias') { + segment.splice(1, 1); + } + var ret = { + intertype: 'getelementptr', + type: segment[0], + params: parseParamTokens(segment[3].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); + return ret; +} + +// TODO: use this +function parseBitcast(segment) { +//print('zz parseBC pre: ' + dump(segment)); + var ret = { + intertype: 'bitcast', + type: segment[0], + params: parseParamTokens(segment[2].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); +//print('zz parseBC: ' + dump(ret)); + return ret; +} + +function getLabelIds(labels) { + return labels.map(function(label) { return label.ident }); +} + +// ======================= + +// llvm => intertypes +function intertyper(data) { + // Substrate + + substrate = new Substrate('Intertyper'); + + // Input + + substrate.addItem({ + llvmText: data, + }); + + // Tools + + function findTokenText(item, text) { + for (var i = 0; i < item.tokens.length; i++) { + if (item.tokens[i].text == text) return i; + } + return -1; + } + + // Line splitter. + substrate.addZyme({ + selectItem: function(item) { return !!item.llvmText; }, + processItem: function(item) { + var lines = item.llvmText.split('\n'); + var ret = []; + for (var i = 0; i < lines.length; i++) { + if (/^\ +to.*/g.test(lines[i])) { + // to after invoke + ret.slice(-1)[0].lineText += lines[i]; + } else { + ret.push({ + lineText: lines[i], + lineNum: i + 1, + }); + } + } + return ret.filter(function(item) { return item.lineText; }); + }, + }); + + // Line tokenizer + substrate.addZyme({ + selectItem: function(item) { return item.lineText; }, + processItem: function(item) { +//print("line: " + item.lineText); + var lineText = item.lineText + " "; + var tokens = []; + var tokenStart = -1; + var indent = -1; + var quotes = 0; + var i = 0; + // Note: '{' is not an encloser, as its use in functions is split over many lines + var enclosers = { + '[': 0, + ']': '[', + '(': 0, + ')': '(', + '<': 0, + '>': '<', + }; + function notQuoted() { + return quotes == 0; + } + function notEnclosed() { + for (var i in enclosers) { + if (typeof enclosers[i] === 'number' && enclosers[i] > 0) + return false; + } + return true; + } + var that = this; + function tryStartToken() { + if (tokenStart == -1 && notEnclosed() && notQuoted()) { +//print("try START " + tokenStart + ',' + JSON.stringify(enclosers)); + tokenStart = i; + } + } + function tryFinishToken(includeThis) { + if (tokenStart >= 0 && notEnclosed() && notQuoted()) { +//print("try finish " + tokenStart + ',' + JSON.stringify(enclosers)); + var token = { + text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)), + }; + if (token.text[0] in enclosers) { + token.item = that.processItem({ + lineText: token.text.substr(1, token.text.length-2) + }); + token.type = token.text[0]; + } + if (indent == -1) { + indent = tokenStart; + } + // merge certain tokens + if ( (tokens.length > 0 && tokens.slice(-1)[0].text == '%' && token.text[0] == '"' ) || + (tokens.length > 0 && token.text.replace(/\*/g, '') == '') ) { + tokens.slice(-1)[0].text += token.text; + } else if (tokens.length > 0 && isType(tokens.slice(-1)[0].text) && isFunctionDef(token)) { + tokens.slice(-1)[0].text += ' ' + token.text; + } else if (tokens.length > 0 && token.text[token.text.length-1] == '}') { + var openBrace = tokens.length-1; + while (tokens[openBrace].text != '{') openBrace --; + token = combineTokens(tokens.slice(openBrace+1)); + tokens.splice(openBrace, tokens.length-openBrace+1); + tokens.push(token); + tokens.slice(-1)[0].type = '{'; + } else { + tokens.push(token); + } +// print("new token: " + dump(tokens.slice(-1)[0])); + tokenStart = -1; + } + } + for (; i < lineText.length; i++) { + var letter = lineText[i]; +//print("letter: " + letter); + switch (letter) { + case ' ': + tryFinishToken(); + break; + case '"': + tryFinishToken(); + tryStartToken(); + quotes = 1-quotes; + break; + case ',': + tryFinishToken(); + if (notEnclosed() && notQuoted()) { + tokens.push({ text: ',' }); + } + break; + default: + if (letter in enclosers && notQuoted()) { + if (typeof enclosers[letter] === 'number') { + tryFinishToken(); + tryStartToken(); + enclosers[letter]++; + } else { + enclosers[enclosers[letter]]--; + tryFinishToken(true); + } +//print(' post-enclosers: ' + JSON.stringify(enclosers)); + } else { + tryStartToken(); + } + } + } + return [{ + tokens: tokens, + indent: indent, + lineNum: item.lineNum, + }]; + }, + }); + + // Line parsers to intermediate form + + // Comment + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens[0].text == ';' }, + processItem: function(item) { return [] }, + }); + // target + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens[0].text == 'target' }, + processItem: function(item) { return [] }, + }); + // globals: type or constant + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=' }, + processItem: function(item) { + if (item.tokens[2].text == 'type') { + // type +//print('// zz ' + dump(item)); + var fields = []; + if (item.tokens[3].text != 'opaque') { + var subTokens = item.tokens[3].tokens; + subTokens.push({text:','}); + while (subTokens[0]) { + var stop = 1; + while ([','].indexOf(subTokens[stop].text) == -1) stop ++; + fields.push(combineTokens(subTokens.slice(0, stop)).text); + subTokens.splice(0, stop+1); + } + } + return [{ + __result__: true, + intertype: 'type', + name_: item.tokens[0].text, + fields: fields, + lineNum: item.lineNum, + }] + } else if (item.tokens[2].text == 'global') { + // variable + return [{ + __result__: true, + intertype: 'globalVariable', + ident: item.tokens[0].text, + type: item.tokens[3].text, + value: item.tokens[4], + lineNum: item.lineNum, + }] + } else { + // constant + var ident = item.tokens[0].text; + while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 }) + item.tokens.splice(2, 1); + var ret = { + __result__: true, + intertype: 'globalConstant', + ident: ident, + type: item.tokens[2], + lineNum: item.lineNum, + }; + if (ident == '@llvm.global_ctors') { + ret.ctors = []; + var subTokens = item.tokens[3].item[0].tokens; + splitTokenList(subTokens).forEach(function(segment) { + ret.ctors.push(segment[1].tokens.slice(-1)[0].text); + }); + } else { + if (item.tokens[3].text == 'c') + item.tokens.splice(3, 1); + ret.value = item.tokens[3]; + } + return [ret]; + } + }, + }); + // function header + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' && + item.tokens.slice(-1)[0].text == '{' }, + processItem: function(item) { + if (item.tokens.slice(-3,-2)[0].text == 'align') + item.tokens.splice(-3,2); + if (item.tokens.slice(-2,-1)[0].text == 'nounwind') + item.tokens.splice(-2,1); + while (item.tokens.length > 5) + item.tokens.splice(1, 1); + return [{ + __result__: true, + intertype: 'function', + ident: item.tokens[2].text, + returnType: item.tokens[1], + params: item.tokens[3], + lineNum: item.lineNum, + }]; + }, + }); + // label + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':' }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'label', + ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1), + lineNum: item.lineNum, + }]; + }, + }); + // assignment + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 && + !item.intertype }, + processItem: function(item) { + var opIndex = findTokenText(item, '='); + return [{ + intertype: 'assign', + ident: combineTokens(item.tokens.slice(0, opIndex)).text, + value: null, + lineNum: item.lineNum, + }, { // Additional token, to be parsed, and later re-integrated + indent: -1, + tokens: item.tokens.slice(opIndex+1), + parentLineNum: item.lineNum, + parentSlot: 'value', + }]; + }, + }); + // reintegration - find intermediate representation-parsed items and + // place back in parents + substrate.addZyme({ + select: function(items) { + for (var i = 0; i < items.length; i++) { + if (items[i].parentSlot && items[i].intertype) { + for (var j = 0; j < items.length; j++) { + if (items[j].lineNum == items[i].parentLineNum) { + return [items[j], items[i]]; + } + } + } + } + return []; + }, + process: function(items) { + var parent = items[0]; + var child = items[1]; + parent[child.parentSlot] = child; + parent.__result__ = true; + delete child.parentLineNum; + return [parent]; + } + }); + // 'load' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load' }, + processItem: function(item) { + item.intertype = 'load'; + item.pointerType = item.tokens[1]; + item.pointer = item.tokens[2]; + item.ident = item.pointer.text; +//print("// zz zz pointer: " + JSON.stringify(item)); + item.type = { text: removePointing(item.pointerType.text) }; + return [item]; + }, + }); + // 'bitcast' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast' }, + processItem: function(item) { + item.intertype = 'bitcast'; + item.type = item.tokens[1]; + item.ident = item.tokens[2].text; + item.type2 = item.tokens[4]; + return [item]; + }, + }); + // 'getelementptr' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr' }, + processItem: function(item) { + var last = 0; + while (item.tokens[last].text != ';') last++; + var segment = [ item.tokens[1], { text: null }, null, { item: [ { + tokens: item.tokens.slice(2, last) + } ] } ]; + var data = parseGetElementPtr(segment); + item.intertype = 'getelementptr'; + item.type = data.type; + item.params = data.params; + item.ident = data.ident; + return [item]; + }, + }); + // 'call' + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype }, + processItem: function(item) { + item.intertype = 'call'; + if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) { + item.tokens.splice(1, 1); + } + item.type = item.tokens[1]; + item.functionType = ''; + while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { + item.functionType += item.tokens[2].text; + item.tokens.splice(2, 1); + } + item.ident = item.tokens[2].text; + item.params = parseParamTokens(item.tokens[3].item[0].tokens); + if (item.indent == 2) { + // standalone call - not in assign + item.standalone = true; + item.__result__ = true; + } + return [item]; + }, + }); + // 'invoke' + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype }, + processItem: function(item) { + item.intertype = 'invoke'; + item.type = item.tokens[1]; + item.functionType = ''; + while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { + item.functionType += item.tokens[2].text; + item.tokens.splice(2, 1); + } + item.ident = item.tokens[2].text; + item.params = parseParamTokens(item.tokens[3].item[0].tokens); + item.toLabel = item.tokens[6].text; + item.unwindLabel = item.tokens[9].text; + item.__result__ = true; + return [item]; + }, + }); + // 'alloca' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca' }, + processItem: function(item) { + item.intertype = 'alloca'; + item.allocatedType = item.tokens[1]; + item.type = { text: addPointing(item.tokens[1].text) }; + return [item]; + }, + }); + // mathops + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && + ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptoui', 'fptosi', 'trunc', 'sext', 'select'] + .indexOf(item.tokens[0].text) != -1 && !item.intertype }, + processItem: function(item) { + item.intertype = 'mathop'; + item.op = item.tokens[0].text; + item.variant = null; + if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1); + if (['icmp', 'fcmp'].indexOf(item.op) != -1) { + item.variant = item.tokens[1].text; + item.tokens.splice(1, 1); + } + item.type = item.tokens[1]; + item.ident = item.tokens[2].text; + item.ident2 = item.tokens[4].text; + item.ident3 = item.tokens[5] ? item.tokens[5].text : null; + item.ident4 = item.tokens[8] ? item.tokens[8].text : null; +//print('// zz got maptop ' + item.op + ',' + item.variant + ',' + item.ident + ',' + item.value); + return [item]; + }, + }); + // 'store' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' && + !item.intertype }, + processItem: function(item) { + if (item.tokens[3].text != ',') { + assertEq(item.tokens[2].text, 'getelementptr'); + // complex input - likely getelementptr + var commaIndex = 4; + while (item.tokens[commaIndex].text != ',') commaIndex ++; + return [{ + __result__: true, + intertype: 'store', + valueType: item.tokens[1], + value: parseGetElementPtr(item.tokens.slice(1, commaIndex)), + pointerType: item.tokens[commaIndex+1], + pointer: item.tokens[commaIndex+2], + ident: item.tokens[commaIndex+2].text, + lineNum: item.lineNum, + }]; + } + return [{ + __result__: true, + intertype: 'store', + valueType: item.tokens[1], + value: addIdent(item.tokens[2]), + pointerType: item.tokens[4], + pointer: item.tokens[5], + ident: item.tokens[5].text, + lineNum: item.lineNum, + }]; + }, + }); + // 'br' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' && + !item.intertype }, + processItem: function(item) { + if (item.tokens[1].text == 'label') { + return [{ + __result__: true, + intertype: 'branch', + label: toNiceIdent(item.tokens[2].text), + lineNum: item.lineNum, + }]; + } else { + return [{ + __result__: true, + intertype: 'branch', + ident: item.tokens[2].text, + labelTrue: toNiceIdent(item.tokens[5].text), + labelFalse: toNiceIdent(item.tokens[8].text), + lineNum: item.lineNum, + }]; + } + }, + }); + // 'ret' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'return', + type: item.tokens[1].text, + value: item.tokens[2] ? item.tokens[2].text : null, + lineNum: item.lineNum, + }]; + }, + }); + // function end + substrate.addZyme({ + selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'functionEnd', + lineNum: item.lineNum, + }]; + }, + }); + // external function stub + substrate.addZyme({ + selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'functionStub', + ident: item.tokens[2].text, + returnType: item.tokens[1], + params: item.tokens[3], + lineNum: item.lineNum, + }]; + }, + }); + // 'unreachable' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'unreachable', + lineNum: item.lineNum, + }]; + }, + }); + + return substrate.solve(); +} + +// Analyze intertype data + +VAR_NATIVE = 'native'; +VAR_NATIVIZED = 'nativized'; +VAR_EMULATED = 'emulated'; + +function cleanFunc(func) { + func.lines = func.lines.filter(function(line) { return line.intertype !== null }); + func.labels.forEach(function(label) { + label.lines = label.lines.filter(function(line) { return line.intertype !== null }); + }); +} + +function analyzer(data) { +//print('zz analaz') + substrate = new Substrate('Analyzer'); + + substrate.addItem({ + items: data, + }); + + // Sorter + substrate.addZyme({ + selectItem: function(item) { return !item.sorted; }, + processItem: function(item) { + item.items.sort(function (a, b) { return a.lineNum - b.lineNum }); + item.sorted = true; + return [item]; + }, + }); + + // Gatherer + substrate.addZyme({ + selectItem: function(item) { return item.sorted && !item.gathered; }, + processItem: function(item) { + // Single-liners + ['globalConstant', 'globalVariable', 'functionStub', 'type'].forEach(function(intertype) { + var temp = splitter(item.items, function(item) { return item.intertype == intertype }); + item[intertype + 's'] = temp.splitOut; + item.items = temp.original; + }); + // Functions & labels + item.functions = [] + for (var i = 0; i < item.items.length; i++) { + var subItem = item.items[i]; + if (subItem.intertype == 'function') { + item.functions.push(subItem); + subItem.endLineNum = null; + subItem.lines = []; + subItem.labels = []; + } else if (subItem.intertype == 'functionEnd') { + item.functions.slice(-1)[0].endLineNum = subItem.lineNum; + } else if (subItem.intertype == 'label') { + item.functions.slice(-1)[0].labels.push(subItem); + subItem.lines = []; + } else if (item.functions.slice(-1)[0].endLineNum === null) { + // Internal line + item.functions.slice(-1)[0].lines.push(subItem); + item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem); + } else { + print("ERROR: what is this? " + JSON.stringify(subItem)); + } + } + delete item.items; + item.gathered = true; + return [item]; + }, + }); + + // IdentiNicer + substrate.addZyme({ + selectItem: function(item) { return item.gathered && !item.identiniced; }, + processItem: function(output) { + walkJSON(output, function(item) { + ['', '2', '3', '4', '5'].forEach(function(ext) { + if (item && item['ident' + ext]) + item['ident' + ext] = toNiceIdent(item['ident' + ext]); + }); + }); + output.identiniced = true; + return [output]; + } + }); + + function addType(type, data) { + if (['<', '(', 'internal', 'inbounds', 'void'].indexOf(type) != -1) return; + var check = /^\[(\d+)\ x\ (.*)\]$/g.exec(type); + // 'blocks': [14 x %struct.X] etc. + if (check) { + var num = parseInt(check[1]); + var subType = check[2]; + data.types.push({ + name_: type, + fields: range(num).map(function() { return subType }), + lineNum: '?', + }); + return; + } + if (['['].indexOf(type) != -1) return; + if (isNumberType(type) || isPointerType(type)) return; + if (!data.types[type]) { +// print("// New type: " + type); + data.types.push({ + name_: type, + fields: [ 'int32' ], // XXX + flatSize: 1, + lineNum: '?', + }); + } + } + + // TypeVestigator + substrate.addZyme({ + selectItem: function(item) { return item.gathered && !item.typevestigated; }, + processItem: function(data) { + walkJSON(data, function(item) { + if (!item) return; + if (item.type) { + addType(!item.type.text ? item.type : item.type.text, data); + } + if (item.type2) { + addType(!item.type2.text ? item.type2 : item.type2.text, data); + } + }); + data.typevestigated = true; + return [data]; + } + }); + + // Type analyzer + substrate.addZyme({ + selectItem: function(item) { return item.typevestigated && !item.typed; }, + processItem: function(item) { +//print('zz analaz types') + // 'fields' is the raw list of LLVM fields. However, we embed + // child structures into parent structures, basically like C. + // So { int, { int, int }, int } would be represented as + // an Array of 4 ints. getelementptr on the parent would take + // values 0, 1, 2, where 2 is the entire middle structure. + // We also need to be careful with getelementptr to child + // structures - we return a pointer to the same slab, just + // a different offset. Likewise, need to be careful for + // getelementptr of 2 (the last int) - it's real index is 4. + // The benefit of this approach is inheritance - + // { { ancestor } , etc. } = descendant + // In this case it is easy to bitcast ancestor to descendant + // pointers - nothing needs to be done. If the ancestor were + // a new slab, it would need some pointer to the outer one + // for casting in that direction. + // TODO: bitcasts of non-inheritance cases of embedding (not at start) + var more = true; + while (more) { + more = false; + function getType(t) { + return item.types.filter(function(type) { return type.name_ == t })[0]; + } + item.types.forEach(function(type) { + var ready = true; + type.fields.forEach(function(field) { +//print('// zz getT: ' + type.name_ + ' : ' + field); + if (isStructType(field)) { + if (!getType(field)) { + addType(field, item); + ready = false; + } else { + if (!getType(field).flatIndexes) { + ready = false; + } + } + } + }); + if (!ready) { + more = true; + return; + } + type.flatSize = 0; + type.needsFlattening = false; + var sizes = []; + type.flatIndexes = type.fields.map(function(field) { + var curr = type.flatSize; + if (isStructType(field)) { + var size = getType(field).flatSize; + type.flatSize += size; + sizes.push(size); + type.needsFlattening = true; + } else { + type.flatSize ++; + } + return curr; + }); + if (type.needsFlattening && dedup(sizes).length == 1) { + type.flatFactor = sizes[0]; + } + }); + } + + item.types.forEach(function(type) { + print('// type: ' + type.name_);// + ' : ' + JSON.stringify(type.fields)); + }); + item.typed = true; + return [item]; + }, + }); + + // Variable analyzer + substrate.addZyme({ + selectItem: function(item) { return item.typevestigated && !item.variablized; }, + processItem: function(item) { + item.functions.forEach(function(func) { + func.variables = {}; + + // LLVM is SSA, so we always have a single assignment/write. We care about + // the reads/other uses. + walkJSON(func.lines, function(item) { +//if (item && item.intertype == 'assign') print('zz assign: ' + JSON.stringify(item)); + if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr'].indexOf(item.value.intertype) != -1) { +//print("zz add var " + item.ident + ',' + item.intertype); + func.variables[item.ident] = { + ident: item.ident, + type: item.value.type.text, + origin: item.value.intertype, + uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]), + }; + } + }); + + for (vname in func.variables) { + var variable = func.variables[vname]; + + // Whether the value itself is used. For an int, always yes. For a pointer, + // we might never use the pointer's value - we might always just store to it / + // read from it. If so, then we can optimize away the pointer. + variable.hasValueTaken = false; + // Whether our address was used. If not, then we do not need to bother with + // implementing this variable in a way that other functions can access it. + variable.hasAddrTaken = false; + + variable.pointingLevels = pointingLevels(variable.type); + + // Analysis! + + if (variable.pointingLevels > 0) { + // Pointers + variable.loads = 0; + variable.stores = 0; + + func.lines.forEach(function(line) { +//print(dump(line)) + if (line.intertype == 'store' && line.ident == vname) { + variable.stores ++; + } else if (line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) { + variable.loads ++; + } + }); + + variable.otherUses = variable.uses - variable.loads - variable.stores; + if (variable.otherUses > 0) + variable.hasValueTaken = true; + } + + // Decision time |