diff options
Diffstat (limited to 'src/parser.js')
-rw-r--r-- | src/parser.js | 2572 |
1 files changed, 0 insertions, 2572 deletions
diff --git a/src/parser.js b/src/parser.js deleted file mode 100644 index c904cde1..00000000 --- a/src/parser.js +++ /dev/null @@ -1,2572 +0,0 @@ -// LLVM parser -//============ - -/* - * TODO: - * * Re-use variables (of the same kind, native/nativized vs. emulated). - */ - -// Prep - allow this to run in both SpiderMonkey and V8 - -if (!this['load']) { - load = function(f) { eval(snarf(f)) } -} -if (!this['read']) { - read = function(f) { snarf(f) } -} - -load('settings.js'); -if (LABEL_DEBUG && RELOOP) throw "Cannot debug labels if they have been relooped!"; - -load('utility.js'); -load('enzymatic.js'); -load('snippets.js'); - -// Tools - -// Simple #if/else/endif preprocessing for a file. Checks if the -// ident checked is true in our global. -function preprocess(text) { - var lines = text.split('\n'); - var ret = ''; - var show = true; - for (var i = 0; i < lines.length; i++) { - var line = lines[i]; - if (line[0] != '#') { - if (show) { - ret += line + '\n'; - } - } else { - if (line[1] == 'i') { // if - var ident = line.substr(4); - show = !!this[ident]; - } else if (line[2] == 'l') { // else - show = !show; - } else if (line[2] == 'n') { // endif - show = true; - } else { - throw "Unclear preprocessor command: " + line; - } - } - } - return ret; -} - -function addPointing(type) { return type + '*' } -function removePointing(type, num) { - if (num === 0) return type; - return type.substr(0, type.length-(num ? num : 1)) -} - -function pointingLevels(type) { - if (!type) return 0; - var ret = 0; - var len1 = type.length - 1; - while (type[len1-ret] === '*') { - ret ++; - } - return ret; -} - -function toNiceIdent(ident) { - if (parseFloat(ident) == ident) return ident; - if (ident == 'null') return '0'; // see parseNumerical - return ident.replace(/[" \.@%:<>,\*]/g, '_'); -} - -function isNumberType(type) { - var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double']; - return types.indexOf(type) != -1; -} - -function isStructPointerType(type) { - // This test is necessary for clang - in llvm-gcc, we - // could check for %struct. The downside is that %1 can - // be either a variable or a structure, and we guess it is - // a struct, which can lead to |call i32 %5()| having - // |%5()| as a function call (like |i32 (i8*)| etc.). So - // we must check later on, in call(), where we have more - // context, to differentiate such cases. - // A similar thing happns in isStructType() - return !isNumberType(type) && type[0] == '%'; -} - -function isStructType(type) { - if (isPointerType(type)) return false; - if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs - // See comment in isStructPointerType() - return !isNumberType(type) && type[0] == '%'; -} - -function isPointerType(type) { // TODO! - return pointingLevels(type) > 0; -} - -function isVoidType(type) { - return type == 'void'; -} - -function isType(type) { // TODO! - return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type); -} - -// Detects a function definition, ([...|type,[type,...]]) -function isFunctionDef(token) { - var text = token.text; - var pointing = pointingLevels(text); - var nonPointing = removePointing(text, pointing); - if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')') - return false; - if (nonPointing == '(...)') return true; - if (!token.item) return false; - var fail = false; - splitTokenList(token.item[0].tokens).forEach(function(segment) { - var subtoken = segment[0]; - fail = fail || !isType(subtoken.text) || segment.length > 1; - }); - return !fail; -} - -function addIdent(token) { - token.ident = token.text; - return token; -} - -function combineTokens(tokens) { - var ret = { - lineNum: tokens[0].lineNum, - text: '', - tokens: [], - }; - tokens.forEach(function(token) { - ret.text += token.text; - ret.tokens.push(token); - }); - return ret; -} - -function compareTokens(a, b) { - var aId = a.__uid__; - var bId = b.__uid__; - a.__uid__ = 0; - b.__uid__ = 0; - var ret = JSON.stringify(a) == JSON.stringify(b); - a.__uid__ = aId; - b.__uid__ = bId; - return ret; -} - -function getTokenIndexByText(tokens, text) { - var i = 0; - while (tokens[i].text != ';') i++; - return i; -} - -function findTokenText(item, text) { - for (var i = 0; i < item.tokens.length; i++) { - if (item.tokens[i].text == text) return i; - } - return -1; -} - -// Splits a list of tokens separated by commas. For example, a list of arguments in a function call -function splitTokenList(tokens) { - if (tokens.length == 0) return []; - if (tokens.slice(-1)[0].text != ',') tokens.push({text:','}); - var ret = []; - var seg = []; - tokens.forEach(function(token) { - if (token.text == ',') { - ret.push(seg); - seg = []; - } else { - seg.push(token); - } - }); - return ret; -} - -// Splits an item, with the intent of later reintegration -function splitItem(parent, childSlot, copySlots) { - if (!copySlots) copySlots = []; - if (!parent[childSlot]) parent[childSlot] = {}; - var child = parent[childSlot]; - parent[childSlot] = null; - child.parentUid = parent.__uid__; - child.parentSlot = childSlot; - child.parentLineNum = child.lineNum = parent.lineNum; - copySlots.forEach(function(slot) { child[slot] = parent[slot] }); - return { - parent: parent, - child: child, - }; -} - -function makeReintegrator(afterFunc) { - // reintegration - find intermediate representation-parsed items and - // place back in parents TODO: Optimize this code to optimal O(..) - return { - process: function(items) { - var ret = []; - for (var i = 0; i < items.length; i++) { - var found = false; - if (items[i] && items[i].parentSlot) { - var child = items[i]; - for (var j = 0; j < items.length; j++) { - if (items[j] && items[j].lineNum == items[i].parentLineNum) { - var parent = items[j]; - // process the pair - parent[child.parentSlot] = child; - delete child.parentLineNum; - afterFunc.call(this, parent, child); - - items[i] = null; - items[j] = null; - found = true; - break; - } - } - } - } - this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully - return ret; - } - }; -} - -function parseParamTokens(params) { - if (params.length === 0) return []; - var ret = []; - if (params[params.length-1].text != ',') { - params.push({ text: ',' }); - } - var absIndex = 0; - while (params.length > 0) { - var i = 0; - while (params[i].text != ',') i++; - var segment = params.slice(0, i); - params = params.slice(i+1); - segment = cleanSegment(segment); - if (segment.length == 1) { - if (segment[0].text == '...') { - ret.push({ - intertype: 'varargs', - }); - } else { - // Clang sometimes has a parameter with just a type, - // no name... the name is implied to be %{the index} - ret.push({ - intertype: 'value', - type: segment[0], - value: null, - ident: '_' + absIndex, - }); - } - } else if (segment[1].text === 'getelementptr') { - ret.push(parseGetElementPtr(segment)); - } else if (segment[1].text === 'bitcast') { - ret.push(parseBitcast(segment)); - } else { - if (segment[2] && segment[2].text == 'to') { // part of bitcast params - segment = segment.slice(0, 2); - } - while (segment.length > 2) { - segment[0].text += segment[1].text; - segment.splice(1, 1); // TODO: merge tokens nicely - } - ret.push({ - intertype: 'value', - type: segment[0], - value: segment[1], - ident: segment[1].text, - }); - // } else { - // throw "what is this params token? " + JSON.stringify(segment); - } - absIndex ++; - } - return ret; -} - -function cleanSegment(segment) { - if (segment.length == 1) return segment; - while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) { - segment.splice(1, 1); - } - return segment; -} - -// Expects one of the several LVM getelementptr formats: -// a qualifier, a type, a null, then an () item with tokens -function parseGetElementPtr(segment) { -//print("Parse GTP: " + dump(segment)); - segment = segment.slice(0); - segment = cleanSegment(segment); - assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1); - //dprint('// zz: ' + dump(segment) + '\n\n\n'); - var ret = { - intertype: 'getelementptr', - type: segment[0], - params: parseParamTokens(segment[3].item[0].tokens), - }; - ret.ident = toNiceIdent(ret.params[0].ident); - return ret; -} - -// TODO: use this -function parseBitcast(segment) { - //print('zz parseBC pre: ' + dump(segment)); - var ret = { - intertype: 'bitcast', - type: segment[0], - params: parseParamTokens(segment[2].item[0].tokens), - }; - ret.ident = toNiceIdent(ret.params[0].ident); -//print('zz parseBC: ' + dump(ret)); - return ret; -} - -function cleanOutTokens(filterOut, tokens, index) { - while (filterOut.indexOf(tokens[index].text) != -1) { - tokens.splice(index, 1); - } -} - -function _HexToInt(stringy) { - var ret = 0; - var mul = 1; - var base; - for (var i = (stringy.length - 1); i >= 0; i = i - 1) { - if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) { - base = "A".charCodeAt(0) - 10; - } else { - base = "0".charCodeAt(0); - } - ret = ret + (mul*(stringy.charCodeAt(i) - base)); - mul = mul * 16; - } - return ret; -} - -function IEEEUnHex(stringy) { - var a = _HexToInt(stringy.substr(2, 8)); - var b = _HexToInt(stringy.substr(10)); - var e = (a >> ((52 - 32) & 0x7ff)) - 1023; - return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e)); -} - -function parseNumerical(value, type) { - if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') { - // Hexadecimal double value, as the llvm docs say, - // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants." - return IEEEUnHex(value); - } - if (value == 'null') { - // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.) - return '0'; - } - return value; -} - -// \0Dsometext is really '\r', then sometext -// This function returns an array of int values -function parseLLVMString(str) { - var ret = []; - var i = 0; - while (i < str.length) { - var chr = str[i]; - if (chr != '\\') { - ret.push(chr.charCodeAt(0)); - i++; - } else { - ret.push(_HexToInt(str[i+1]+str[i+2])); - i += 3; - } - } - return ret; -} - -function getLabelIds(labels) { - return labels.map(function(label) { return label.ident }); -} - -// ======================= - -// llvm => intertypes -function intertyper(data) { - // Substrate - - substrate = new Substrate('Intertyper'); - - // Line splitter. - substrate.addZyme('LineSplitter', { - processItem: function(item) { - var lines = item.llvmText.split('\n'); - var ret = []; - var inContinual = false; - for (var i = 0; i < lines.length; i++) { - var line = lines[i]; - if (inContinual || new RegExp(/^\ +to.*/g).test(line)) { - // to after invoke - ret.slice(-1)[0].lineText += line; - if (new RegExp(/^\ +\]/g).test(line)) { // end of llvm switch - inContinual = false; - } - } else { - ret.push({ - lineText: line, - lineNum: i + 1, - }); - if (new RegExp(/^\ +switch\ .*/g).test(line)) { - // beginning of llvm switch - inContinual = true; - } - } - } - this.forwardItems(ret.filter(function(item) { return item.lineText; }), 'Tokenizer'); - }, - }); - - // Line tokenizer - substrate.addZyme('Tokenizer', { - processItem: function(item, inner) { - var lineText = item.lineText + " "; - var tokens = []; - var tokenStart = -1; - var indent = -1; - var quotes = 0; - var lastToken = null; - var i = 0; - // Note: '{' is not an encloser, as its use in functions is split over many lines - var enclosers = { - '[': 0, - ']': '[', - '(': 0, - ')': '(', - '<': 0, - '>': '<', - }; - function notEnclosed() { - if (enclosers['['] > 0 || enclosers['('] > 0 || enclosers['<'] > 0) - return false; - return true; - } - var that = this; - function tryStartToken() { - if (tokenStart == -1 && notEnclosed() && quotes == 0) { - //print("try START " + tokenStart + ',' + JSON.stringify(enclosers)); - tokenStart = i; - } - } - function tryFinishToken(includeThis) { - if (tokenStart >= 0 && notEnclosed() && quotes == 0) { - //print("try finish " + tokenStart + ',' + JSON.stringify(enclosers)); - var token = { - text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)), - }; - if (token.text[0] in enclosers) { - token.item = that.processItem({ - lineText: token.text.substr(1, token.text.length-2) - }, true); - token.type = token.text[0]; - } - if (indent == -1) { - indent = tokenStart; - } - // merge certain tokens - if ( (lastToken && lastToken.text == '%' && token.text[0] == '"' ) || - (lastToken && token.text.replace(/\*/g, '') == '') ) { - lastToken.text += token.text; - } else if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { - lastToken.text += ' ' + token.text; - } else if (lastToken && token.text[token.text.length-1] == '}') { - var openBrace = tokens.length-1; - while (tokens[openBrace].text != '{') openBrace --; - token = combineTokens(tokens.slice(openBrace+1)); - tokens.splice(openBrace, tokens.length-openBrace+1); - tokens.push(token); - token.type = '{'; - lastToken = token; - } else { - tokens.push(token); - lastToken = token; - } - // print("new token: " + dump(lastToken)); - tokenStart = -1; - } - } - for (; i < lineText.length; i++) { - var letter = lineText[i]; - //print("letter: " + letter); - switch (letter) { - case ' ': - tryFinishToken(); - break; - case '"': - tryFinishToken(); - tryStartToken(); - quotes = 1-quotes; - break; - case ',': - tryFinishToken(); - if (notEnclosed() && quotes == 0) { - tokens.push({ text: ',' }); - } - break; - default: - if (letter in enclosers && quotes == 0) { - if (typeof enclosers[letter] === 'number') { - tryFinishToken(); - tryStartToken(); - enclosers[letter]++; - } else { - enclosers[enclosers[letter]]--; - tryFinishToken(true); - } - //print(' post-enclosers: ' + JSON.stringify(enclosers)); - } else { - tryStartToken(); - } - } - } - var item = { - tokens: tokens, - indent: indent, - lineNum: item.lineNum, - }; - if (inner) { - return [item]; - } else { - this.forwardItem(item, 'Triager'); - } - }, - }); - - substrate.addZyme('Triager', { - processItem: function(item) { - function triage() { - if (!item.intertype) { - if (item.tokens[0].text in searchable(';', 'target')) - return '/dev/null'; - if (item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=') - return 'Global'; - if (item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' && - item.tokens.slice(-1)[0].text == '{') - return 'FuncHeader'; - if (item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':') - return 'Label'; - if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 && - !item.intertype) - return 'Assign'; - if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load') - return 'Load'; - if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast') - return 'Bitcast'; - if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr') - return 'GEP'; - if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype) - return 'Call'; - if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype) - return 'Invoke'; - if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca') - return 'Alloca'; - if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'phi') - return 'Phi'; - if (item.indent === -1 && item.tokens && item.tokens.length >= 3 && - ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr'].indexOf(item.tokens[0].text) != -1 && !item.intertype) - return 'Mathops'; - if (item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' && - !item.intertype) - return 'Store'; - if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' && - !item.intertype) - return 'Branch'; - if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' && - !item.intertype) - return 'Return'; - if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'switch' && - !item.intertype) - return 'Switch'; - if (item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype) - return 'FuncEnd'; - if (item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' && - !item.intertype) - return 'External'; - if (item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' && - !item.intertype) - return 'Unreachable'; - } else { - // Already intertyped - if (item.parentSlot) - return 'Reintegrator'; - } - throw 'Invalid token, cannot triage: ' + dump(item); - } - this.forwardItem(item, triage(item)); - }, - }); - - // Line parsers to intermediate form - - // globals: type or variable - substrate.addZyme('Global', { - processItem: function(item) { - if (item.tokens[2].text == 'type') { - //dprint('type/const linenum: ' + item.lineNum + ':' + dump(item)); - var fields = []; - if (item.tokens[3].text != 'opaque') { - if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec - item.tokens[3] = item.tokens[3].item[0]; - } - var subTokens = item.tokens[3].tokens; - subTokens.push({text:','}); - while (subTokens[0]) { - var stop = 1; - while ([','].indexOf(subTokens[stop].text) == -1) stop ++; - fields.push(combineTokens(subTokens.slice(0, stop)).text); - subTokens.splice(0, stop+1); - } - } - return [{ - __result__: true, // XXX can remove these - intertype: 'type', - name_: item.tokens[0].text, - fields: fields, - lineNum: item.lineNum, - }] - } else { - // variable - var ident = item.tokens[0].text; - while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 }) - item.tokens.splice(2, 1); - var ret = { - __result__: true, - intertype: 'globalVariable', - ident: ident, - type: item.tokens[2], - lineNum: item.lineNum, - }; - if (ident == '@llvm.global_ctors') { - ret.ctors = []; - var subTokens = item.tokens[3].item[0].tokens; - splitTokenList(subTokens).forEach(function(segment) { - ret.ctors.push(segment[1].tokens.slice(-1)[0].text); - }); - } else { - if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec - item.tokens[3] = item.tokens[3].item[0].tokens; - } - - if (item.tokens[3].text == 'c') - item.tokens.splice(3, 1); - ret.value = item.tokens[3]; - } - return [ret]; - } - }, - }); - // function header - substrate.addZyme('FuncHeader', { - processItem: function(item) { - item.tokens = item.tokens.filter(function(token) { - return ['internal', 'signext', 'zeroext', 'nounwind', 'define', 'linkonce_odr', 'inlinehint', '{'].indexOf(token.text) == -1; - }); - return [{ - __result__: true, - intertype: 'function', - ident: item.tokens[1].text, - returnType: item.tokens[0], - params: item.tokens[2], - lineNum: item.lineNum, - }]; - }, - }); - // label - substrate.addZyme('Label', { - processItem: function(item) { - return [{ - __result__: true, - intertype: 'label', - ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1), - lineNum: item.lineNum, - }]; - }, - }); - - // assignment - substrate.addZyme('Assign', { - processItem: function(item) { - var opIndex = findTokenText(item, '='); - var pair = splitItem({ - intertype: 'assign', - ident: combineTokens(item.tokens.slice(0, opIndex)).text, - lineNum: item.lineNum, - }, 'value'); - this.forwardItem(pair.parent, 'Reintegrator'); - this.forwardItem(mergeInto(pair.child, { // Additional token, to be triaged and later re-integrated - indent: -1, - tokens: item.tokens.slice(opIndex+1), - }), 'Triager'); - }, - }); - // reintegration - find intermediate representation-parsed items and - // place back in parents TODO: Optimize this code to optimal O(..) - substrate.addZyme('Reintegrator', makeReintegrator(function(parent, child) { - // Special re-integration behaviors - if (child.intertype == 'fastgetelementptrload') { - parent.intertype = 'fastgetelementptrload'; - } - this.forwardItem(parent, '/dev/stdout'); - })); - - // 'load' - substrate.addZyme('Load', { - processItem: function(item) { - item.pointerType = item.tokens[1]; - item.type = { text: removePointing(item.pointerType.text) }; - if (item.tokens[2].text == 'getelementptr') { - var last = getTokenIndexByText(item.tokens, ';'); - var gepTokens = item.tokens.slice(1, last); // without 'load' - var segment = [ gepTokens[2], gepTokens[0], null ].concat(gepTokens.slice(3)); - var data = parseGetElementPtr(segment); - item.intertype = 'fastgetelementptrload'; - item.type = data.type; - item.params = data.params; - item.pointer = { text: data.ident }; - item.value = data.value; - } else { - item.intertype = 'load'; - if (item.tokens[2].text == 'bitcast') { - item.pointer = item.tokens[3].item[0].tokens[1]; - item.originalType = item.tokens[3].item[0].tokens[0]; - } else { - item.pointer = item.tokens[2]; - } - } - item.ident = item.pointer.text; - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'bitcast' - substrate.addZyme('Bitcast', { - processItem: function(item) { - item.intertype = 'bitcast'; - item.type = item.tokens[1]; - item.ident = item.tokens[2].text; - item.type2 = item.tokens[4]; - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'getelementptr' - substrate.addZyme('GEP', { - processItem: function(item) { - var last = getTokenIndexByText(item.tokens, ';'); - var segment = [ item.tokens[1], { text: null }, null, { item: [ { - tokens: item.tokens.slice(2, last) - } ] } ]; - var data = parseGetElementPtr(segment); - item.intertype = 'getelementptr'; - item.type = data.type; - item.params = data.params; - item.ident = data.ident; - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'call' - substrate.addZyme('Call', { - processItem: function(item) { - item.intertype = 'call'; - if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) { - item.tokens.splice(1, 1); - } - item.type = item.tokens[1]; - item.functionType = ''; - while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { - item.functionType += item.tokens[2].text; - item.tokens.splice(2, 1); - } - item.ident = item.tokens[2].text; - if (item.ident.substr(-2) == '()') { - // See comment in isStructType() - item.ident = item.ident.substr(0, item.ident.length-2); - // Also, we remove some spaces which might occur. - while (item.ident[item.ident.length-1] == ' ') { - item.ident = item.ident.substr(0, item.ident.length-1); - } - item.params = []; - } else { - item.params = parseParamTokens(item.tokens[3].item[0].tokens); - } - if (item.indent == 2) { - // standalone call - not in assign - item.standalone = true; - item.__result__ = true; - return [item]; - } - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'invoke' - substrate.addZyme('Invoke', { - processItem: function(item) { - item.intertype = 'invoke'; - item.type = item.tokens[1]; - item.functionType = ''; - while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { - item.functionType += item.tokens[2].text; - item.tokens.splice(2, 1); - } - cleanOutTokens(['alignstack', 'alwaysinline', 'inlinehint', 'naked', 'noimplicitfloat', 'noinline', 'alwaysinline attribute.', 'noredzone', 'noreturn', 'nounwind', 'optsize', 'readnone', 'readonly', 'ssp', 'sspreq'], item.tokens, 4); - item.ident = item.tokens[2].text; - item.params = parseParamTokens(item.tokens[3].item[0].tokens); - item.toLabel = toNiceIdent(item.tokens[6].text); - item.unwindLabel = toNiceIdent(item.tokens[9].text); - if (item.indent == 2) { - // standalone call - not in assign - item.standalone = true; - item.__result__ = true; - return [item]; - } - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'alloca' - substrate.addZyme('Alloca', { - processItem: function(item) { - item.intertype = 'alloca'; - item.allocatedType = item.tokens[1]; - item.type = { text: addPointing(item.tokens[1].text) }; // type of pointer we will get - item.type2 = { text: item.tokens[1].text }; // value we will create, and get a pointer to - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'phi' - substrate.addZyme('Phi', { - processItem: function(item) { - item.intertype = 'phi'; - item.type = { text: item.tokens[1].text } - item.label1 = item.tokens[2].item[0].tokens[2].text; - item.value1 = item.tokens[2].item[0].tokens[0].text; - item.label2 = item.tokens[4].item[0].tokens[2].text; - item.value2 = item.tokens[4].item[0].tokens[0].text; - this.forwardItem(item, 'Reintegrator'); - }, - }); - // mathops - substrate.addZyme('Mathops', { - processItem: function(item) { - item.intertype = 'mathop'; - item.op = item.tokens[0].text; - item.variant = null; - if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1); - if (['icmp', 'fcmp'].indexOf(item.op) != -1) { - item.variant = item.tokens[1].text; - item.tokens.splice(1, 1); - } - item.type = item.tokens[1]; - item.ident = item.tokens[2].text; - item.ident2 = item.tokens[4].text; - item.ident3 = item.tokens[5] ? item.tokens[5].text : null; - item.ident4 = item.tokens[8] ? item.tokens[8].text : null; - dprint('mathop', item.op + ',' + item.variant + ',' + item.ident + ',' + item.value); - this.forwardItem(item, 'Reintegrator'); - }, - }); - // 'store' - substrate.addZyme('Store', { - processItem: function(item) { - if (item.tokens[3].text != ',') { - assertEq(item.tokens[2].text, 'getelementptr'); - // complex input - likely getelementptr - var commaIndex = 4; - while (item.tokens[commaIndex].text != ',') commaIndex ++; - return [{ - __result__: true, - intertype: 'store', - valueType: item.tokens[1], - value: parseGetElementPtr(item.tokens.slice(1, commaIndex)), - pointerType: item.tokens[commaIndex+1], - pointer: item.tokens[commaIndex+2], - ident: item.tokens[commaIndex+2].text, - lineNum: item.lineNum, - }]; - } - return [{ - __result__: true, - intertype: 'store', - valueType: item.tokens[1], - value: addIdent(item.tokens[2]), - pointerType: item.tokens[4], - pointer: item.tokens[5], - ident: item.tokens[5].text, - lineNum: item.lineNum, - }]; - }, - }); - // 'br' - substrate.addZyme('Branch', { - processItem: function(item) { - if (item.tokens[1].text == 'label') { - return [{ - __result__: true, - intertype: 'branch', - label: toNiceIdent(item.tokens[2].text), - lineNum: item.lineNum, - }]; - } else { - return [{ - __result__: true, - intertype: 'branch', - ident: item.tokens[2].text, - labelTrue: toNiceIdent(item.tokens[5].text), - labelFalse: toNiceIdent(item.tokens[8].text), - lineNum: item.lineNum, - }]; - } - }, - }); - // 'ret' - substrate.addZyme('Return', { - processItem: function(item) { - return [{ - __result__: true, - intertype: 'return', - type: item.tokens[1].text, - value: item.tokens[2] ? item.tokens[2].text : null, - lineNum: item.lineNum, - }]; - }, - }); - // 'switch' - substrate.addZyme('Switch', { - processItem: function(item) { - function parseSwitchLabels(item) { - var ret = []; - var tokens = item.item[0].tokens; - while (tokens.length > 0) { - ret.push({ - value: tokens[1].text, - label: toNiceIdent(tokens[4].text), - }); - tokens = tokens.slice(5); - } - return ret; - } - return [{ - __result__: true, - intertype: 'switch', - type: item.tokens[1].text, - ident: item.tokens[2].text, - defaultLabel: item.tokens[5].text, - switchLabels: parseSwitchLabels(item.tokens[6]), - lineNum: item.lineNum, - }]; - }, - }); - // function end - substrate.addZyme('FuncEnd', { - processItem: function(item) { - return [{ - __result__: true, - intertype: 'functionEnd', - lineNum: item.lineNum, - }]; - }, - }); - // external function stub - substrate.addZyme('External', { - processItem: function(item) { - return [{ - __result__: true, - intertype: 'functionStub', - ident: item.tokens[2].text, - returnType: item.tokens[1], - params: item.tokens[3], - lineNum: item.lineNum, - }]; - }, - }); - // 'unreachable' - substrate.addZyme('Unreachable', { - processItem: function(item) { - return [{ - __result__: true, - intertype: 'unreachable', - lineNum: item.lineNum, - }]; - }, - }); - - // Input - - substrate.addItem({ - llvmText: data, - }, 'LineSplitter'); - - return substrate.solve(); -} - -// Analyze intertype data - -VAR_NATIVE = 'native'; -VAR_NATIVIZED = 'nativized'; -VAR_EMULATED = 'emulated'; - -function cleanFunc(func) { - func.lines = func.lines.filter(function(line) { return line.intertype !== null }); - func.labels.forEach(function(label) { - label.lines = label.lines.filter(function(line) { return line.intertype !== null }); - }); -} - -function analyzer(data) { -//print('zz analaz') - substrate = new Substrate('Analyzer'); - - // Sorter - substrate.addZyme('Sorter', { - processItem: function(item) { - item.items.sort(function (a, b) { return a.lineNum - b.lineNum }); - this.forwardItem(item, 'Gatherer'); - }, - }); - - // Gatherer - substrate.addZyme('Gatherer', { - processItem: function(item) { - // Single-liners - ['globalVariable', 'functionStub', 'type'].forEach(function(intertype) { - var temp = splitter(item.items, function(item) { return item.intertype == intertype }); - item[intertype + 's'] = temp.splitOut; - item.items = temp.leftIn; - }); - // Functions & labels - item.functions = [] - for (var i = 0; i < item.items.length; i++) { - var subItem = item.items[i]; - if (subItem.intertype == 'function') { - item.functions.push(subItem); - subItem.endLineNum = null; - subItem.lines = []; - subItem.labels = []; - } else if (subItem.intertype == 'functionEnd') { - item.functions.slice(-1)[0].endLineNum = subItem.lineNum; - } else if (subItem.intertype == 'label') { - item.functions.slice(-1)[0].labels.push(subItem); - subItem.lines = []; - } else if (item.functions.slice(-1)[0].endLineNum === null) { - // Internal line - item.functions.slice(-1)[0].lines.push(subItem); - item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem); - } else { - print("ERROR: what is this? " + JSON.stringify(subItem)); - } - } - delete item.items; - this.forwardItem(item, 'Identinicer'); - }, - }); - - // IdentiNicer - substrate.addZyme('Identinicer', { - processItem: function(output) { - walkJSON(output, function(item) { - |