aboutsummaryrefslogtreecommitdiff
path: root/src/parser.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.js')
-rw-r--r--src/parser.js2572
1 files changed, 0 insertions, 2572 deletions
diff --git a/src/parser.js b/src/parser.js
deleted file mode 100644
index c904cde1..00000000
--- a/src/parser.js
+++ /dev/null
@@ -1,2572 +0,0 @@
-// LLVM parser
-//============
-
-/*
- * TODO:
- * * Re-use variables (of the same kind, native/nativized vs. emulated).
- */
-
-// Prep - allow this to run in both SpiderMonkey and V8
-
-if (!this['load']) {
- load = function(f) { eval(snarf(f)) }
-}
-if (!this['read']) {
- read = function(f) { snarf(f) }
-}
-
-load('settings.js');
-if (LABEL_DEBUG && RELOOP) throw "Cannot debug labels if they have been relooped!";
-
-load('utility.js');
-load('enzymatic.js');
-load('snippets.js');
-
-// Tools
-
-// Simple #if/else/endif preprocessing for a file. Checks if the
-// ident checked is true in our global.
-function preprocess(text) {
- var lines = text.split('\n');
- var ret = '';
- var show = true;
- for (var i = 0; i < lines.length; i++) {
- var line = lines[i];
- if (line[0] != '#') {
- if (show) {
- ret += line + '\n';
- }
- } else {
- if (line[1] == 'i') { // if
- var ident = line.substr(4);
- show = !!this[ident];
- } else if (line[2] == 'l') { // else
- show = !show;
- } else if (line[2] == 'n') { // endif
- show = true;
- } else {
- throw "Unclear preprocessor command: " + line;
- }
- }
- }
- return ret;
-}
-
-function addPointing(type) { return type + '*' }
-function removePointing(type, num) {
- if (num === 0) return type;
- return type.substr(0, type.length-(num ? num : 1))
-}
-
-function pointingLevels(type) {
- if (!type) return 0;
- var ret = 0;
- var len1 = type.length - 1;
- while (type[len1-ret] === '*') {
- ret ++;
- }
- return ret;
-}
-
-function toNiceIdent(ident) {
- if (parseFloat(ident) == ident) return ident;
- if (ident == 'null') return '0'; // see parseNumerical
- return ident.replace(/[" \.@%:<>,\*]/g, '_');
-}
-
-function isNumberType(type) {
- var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double'];
- return types.indexOf(type) != -1;
-}
-
-function isStructPointerType(type) {
- // This test is necessary for clang - in llvm-gcc, we
- // could check for %struct. The downside is that %1 can
- // be either a variable or a structure, and we guess it is
- // a struct, which can lead to |call i32 %5()| having
- // |%5()| as a function call (like |i32 (i8*)| etc.). So
- // we must check later on, in call(), where we have more
- // context, to differentiate such cases.
- // A similar thing happns in isStructType()
- return !isNumberType(type) && type[0] == '%';
-}
-
-function isStructType(type) {
- if (isPointerType(type)) return false;
- if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs
- // See comment in isStructPointerType()
- return !isNumberType(type) && type[0] == '%';
-}
-
-function isPointerType(type) { // TODO!
- return pointingLevels(type) > 0;
-}
-
-function isVoidType(type) {
- return type == 'void';
-}
-
-function isType(type) { // TODO!
- return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type);
-}
-
-// Detects a function definition, ([...|type,[type,...]])
-function isFunctionDef(token) {
- var text = token.text;
- var pointing = pointingLevels(text);
- var nonPointing = removePointing(text, pointing);
- if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')')
- return false;
- if (nonPointing == '(...)') return true;
- if (!token.item) return false;
- var fail = false;
- splitTokenList(token.item[0].tokens).forEach(function(segment) {
- var subtoken = segment[0];
- fail = fail || !isType(subtoken.text) || segment.length > 1;
- });
- return !fail;
-}
-
-function addIdent(token) {
- token.ident = token.text;
- return token;
-}
-
-function combineTokens(tokens) {
- var ret = {
- lineNum: tokens[0].lineNum,
- text: '',
- tokens: [],
- };
- tokens.forEach(function(token) {
- ret.text += token.text;
- ret.tokens.push(token);
- });
- return ret;
-}
-
-function compareTokens(a, b) {
- var aId = a.__uid__;
- var bId = b.__uid__;
- a.__uid__ = 0;
- b.__uid__ = 0;
- var ret = JSON.stringify(a) == JSON.stringify(b);
- a.__uid__ = aId;
- b.__uid__ = bId;
- return ret;
-}
-
-function getTokenIndexByText(tokens, text) {
- var i = 0;
- while (tokens[i].text != ';') i++;
- return i;
-}
-
-function findTokenText(item, text) {
- for (var i = 0; i < item.tokens.length; i++) {
- if (item.tokens[i].text == text) return i;
- }
- return -1;
-}
-
-// Splits a list of tokens separated by commas. For example, a list of arguments in a function call
-function splitTokenList(tokens) {
- if (tokens.length == 0) return [];
- if (tokens.slice(-1)[0].text != ',') tokens.push({text:','});
- var ret = [];
- var seg = [];
- tokens.forEach(function(token) {
- if (token.text == ',') {
- ret.push(seg);
- seg = [];
- } else {
- seg.push(token);
- }
- });
- return ret;
-}
-
-// Splits an item, with the intent of later reintegration
-function splitItem(parent, childSlot, copySlots) {
- if (!copySlots) copySlots = [];
- if (!parent[childSlot]) parent[childSlot] = {};
- var child = parent[childSlot];
- parent[childSlot] = null;
- child.parentUid = parent.__uid__;
- child.parentSlot = childSlot;
- child.parentLineNum = child.lineNum = parent.lineNum;
- copySlots.forEach(function(slot) { child[slot] = parent[slot] });
- return {
- parent: parent,
- child: child,
- };
-}
-
-function makeReintegrator(afterFunc) {
- // reintegration - find intermediate representation-parsed items and
- // place back in parents TODO: Optimize this code to optimal O(..)
- return {
- process: function(items) {
- var ret = [];
- for (var i = 0; i < items.length; i++) {
- var found = false;
- if (items[i] && items[i].parentSlot) {
- var child = items[i];
- for (var j = 0; j < items.length; j++) {
- if (items[j] && items[j].lineNum == items[i].parentLineNum) {
- var parent = items[j];
- // process the pair
- parent[child.parentSlot] = child;
- delete child.parentLineNum;
- afterFunc.call(this, parent, child);
-
- items[i] = null;
- items[j] = null;
- found = true;
- break;
- }
- }
- }
- }
- this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully
- return ret;
- }
- };
-}
-
-function parseParamTokens(params) {
- if (params.length === 0) return [];
- var ret = [];
- if (params[params.length-1].text != ',') {
- params.push({ text: ',' });
- }
- var absIndex = 0;
- while (params.length > 0) {
- var i = 0;
- while (params[i].text != ',') i++;
- var segment = params.slice(0, i);
- params = params.slice(i+1);
- segment = cleanSegment(segment);
- if (segment.length == 1) {
- if (segment[0].text == '...') {
- ret.push({
- intertype: 'varargs',
- });
- } else {
- // Clang sometimes has a parameter with just a type,
- // no name... the name is implied to be %{the index}
- ret.push({
- intertype: 'value',
- type: segment[0],
- value: null,
- ident: '_' + absIndex,
- });
- }
- } else if (segment[1].text === 'getelementptr') {
- ret.push(parseGetElementPtr(segment));
- } else if (segment[1].text === 'bitcast') {
- ret.push(parseBitcast(segment));
- } else {
- if (segment[2] && segment[2].text == 'to') { // part of bitcast params
- segment = segment.slice(0, 2);
- }
- while (segment.length > 2) {
- segment[0].text += segment[1].text;
- segment.splice(1, 1); // TODO: merge tokens nicely
- }
- ret.push({
- intertype: 'value',
- type: segment[0],
- value: segment[1],
- ident: segment[1].text,
- });
- // } else {
- // throw "what is this params token? " + JSON.stringify(segment);
- }
- absIndex ++;
- }
- return ret;
-}
-
-function cleanSegment(segment) {
- if (segment.length == 1) return segment;
- while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) {
- segment.splice(1, 1);
- }
- return segment;
-}
-
-// Expects one of the several LVM getelementptr formats:
-// a qualifier, a type, a null, then an () item with tokens
-function parseGetElementPtr(segment) {
-//print("Parse GTP: " + dump(segment));
- segment = segment.slice(0);
- segment = cleanSegment(segment);
- assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1);
- //dprint('// zz: ' + dump(segment) + '\n\n\n');
- var ret = {
- intertype: 'getelementptr',
- type: segment[0],
- params: parseParamTokens(segment[3].item[0].tokens),
- };
- ret.ident = toNiceIdent(ret.params[0].ident);
- return ret;
-}
-
-// TODO: use this
-function parseBitcast(segment) {
- //print('zz parseBC pre: ' + dump(segment));
- var ret = {
- intertype: 'bitcast',
- type: segment[0],
- params: parseParamTokens(segment[2].item[0].tokens),
- };
- ret.ident = toNiceIdent(ret.params[0].ident);
-//print('zz parseBC: ' + dump(ret));
- return ret;
-}
-
-function cleanOutTokens(filterOut, tokens, index) {
- while (filterOut.indexOf(tokens[index].text) != -1) {
- tokens.splice(index, 1);
- }
-}
-
-function _HexToInt(stringy) {
- var ret = 0;
- var mul = 1;
- var base;
- for (var i = (stringy.length - 1); i >= 0; i = i - 1) {
- if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) {
- base = "A".charCodeAt(0) - 10;
- } else {
- base = "0".charCodeAt(0);
- }
- ret = ret + (mul*(stringy.charCodeAt(i) - base));
- mul = mul * 16;
- }
- return ret;
-}
-
-function IEEEUnHex(stringy) {
- var a = _HexToInt(stringy.substr(2, 8));
- var b = _HexToInt(stringy.substr(10));
- var e = (a >> ((52 - 32) & 0x7ff)) - 1023;
- return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e));
-}
-
-function parseNumerical(value, type) {
- if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') {
- // Hexadecimal double value, as the llvm docs say,
- // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants."
- return IEEEUnHex(value);
- }
- if (value == 'null') {
- // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.)
- return '0';
- }
- return value;
-}
-
-// \0Dsometext is really '\r', then sometext
-// This function returns an array of int values
-function parseLLVMString(str) {
- var ret = [];
- var i = 0;
- while (i < str.length) {
- var chr = str[i];
- if (chr != '\\') {
- ret.push(chr.charCodeAt(0));
- i++;
- } else {
- ret.push(_HexToInt(str[i+1]+str[i+2]));
- i += 3;
- }
- }
- return ret;
-}
-
-function getLabelIds(labels) {
- return labels.map(function(label) { return label.ident });
-}
-
-// =======================
-
-// llvm => intertypes
-function intertyper(data) {
- // Substrate
-
- substrate = new Substrate('Intertyper');
-
- // Line splitter.
- substrate.addZyme('LineSplitter', {
- processItem: function(item) {
- var lines = item.llvmText.split('\n');
- var ret = [];
- var inContinual = false;
- for (var i = 0; i < lines.length; i++) {
- var line = lines[i];
- if (inContinual || new RegExp(/^\ +to.*/g).test(line)) {
- // to after invoke
- ret.slice(-1)[0].lineText += line;
- if (new RegExp(/^\ +\]/g).test(line)) { // end of llvm switch
- inContinual = false;
- }
- } else {
- ret.push({
- lineText: line,
- lineNum: i + 1,
- });
- if (new RegExp(/^\ +switch\ .*/g).test(line)) {
- // beginning of llvm switch
- inContinual = true;
- }
- }
- }
- this.forwardItems(ret.filter(function(item) { return item.lineText; }), 'Tokenizer');
- },
- });
-
- // Line tokenizer
- substrate.addZyme('Tokenizer', {
- processItem: function(item, inner) {
- var lineText = item.lineText + " ";
- var tokens = [];
- var tokenStart = -1;
- var indent = -1;
- var quotes = 0;
- var lastToken = null;
- var i = 0;
- // Note: '{' is not an encloser, as its use in functions is split over many lines
- var enclosers = {
- '[': 0,
- ']': '[',
- '(': 0,
- ')': '(',
- '<': 0,
- '>': '<',
- };
- function notEnclosed() {
- if (enclosers['['] > 0 || enclosers['('] > 0 || enclosers['<'] > 0)
- return false;
- return true;
- }
- var that = this;
- function tryStartToken() {
- if (tokenStart == -1 && notEnclosed() && quotes == 0) {
- //print("try START " + tokenStart + ',' + JSON.stringify(enclosers));
- tokenStart = i;
- }
- }
- function tryFinishToken(includeThis) {
- if (tokenStart >= 0 && notEnclosed() && quotes == 0) {
- //print("try finish " + tokenStart + ',' + JSON.stringify(enclosers));
- var token = {
- text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)),
- };
- if (token.text[0] in enclosers) {
- token.item = that.processItem({
- lineText: token.text.substr(1, token.text.length-2)
- }, true);
- token.type = token.text[0];
- }
- if (indent == -1) {
- indent = tokenStart;
- }
- // merge certain tokens
- if ( (lastToken && lastToken.text == '%' && token.text[0] == '"' ) ||
- (lastToken && token.text.replace(/\*/g, '') == '') ) {
- lastToken.text += token.text;
- } else if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
- lastToken.text += ' ' + token.text;
- } else if (lastToken && token.text[token.text.length-1] == '}') {
- var openBrace = tokens.length-1;
- while (tokens[openBrace].text != '{') openBrace --;
- token = combineTokens(tokens.slice(openBrace+1));
- tokens.splice(openBrace, tokens.length-openBrace+1);
- tokens.push(token);
- token.type = '{';
- lastToken = token;
- } else {
- tokens.push(token);
- lastToken = token;
- }
- // print("new token: " + dump(lastToken));
- tokenStart = -1;
- }
- }
- for (; i < lineText.length; i++) {
- var letter = lineText[i];
- //print("letter: " + letter);
- switch (letter) {
- case ' ':
- tryFinishToken();
- break;
- case '"':
- tryFinishToken();
- tryStartToken();
- quotes = 1-quotes;
- break;
- case ',':
- tryFinishToken();
- if (notEnclosed() && quotes == 0) {
- tokens.push({ text: ',' });
- }
- break;
- default:
- if (letter in enclosers && quotes == 0) {
- if (typeof enclosers[letter] === 'number') {
- tryFinishToken();
- tryStartToken();
- enclosers[letter]++;
- } else {
- enclosers[enclosers[letter]]--;
- tryFinishToken(true);
- }
- //print(' post-enclosers: ' + JSON.stringify(enclosers));
- } else {
- tryStartToken();
- }
- }
- }
- var item = {
- tokens: tokens,
- indent: indent,
- lineNum: item.lineNum,
- };
- if (inner) {
- return [item];
- } else {
- this.forwardItem(item, 'Triager');
- }
- },
- });
-
- substrate.addZyme('Triager', {
- processItem: function(item) {
- function triage() {
- if (!item.intertype) {
- if (item.tokens[0].text in searchable(';', 'target'))
- return '/dev/null';
- if (item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=')
- return 'Global';
- if (item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' &&
- item.tokens.slice(-1)[0].text == '{')
- return 'FuncHeader';
- if (item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':')
- return 'Label';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 &&
- !item.intertype)
- return 'Assign';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load')
- return 'Load';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast')
- return 'Bitcast';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr')
- return 'GEP';
- if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype)
- return 'Call';
- if (item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype)
- return 'Invoke';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca')
- return 'Alloca';
- if (!item.intertype && item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'phi')
- return 'Phi';
- if (item.indent === -1 && item.tokens && item.tokens.length >= 3 &&
- ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr'].indexOf(item.tokens[0].text) != -1 && !item.intertype)
- return 'Mathops';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' &&
- !item.intertype)
- return 'Store';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' &&
- !item.intertype)
- return 'Branch';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' &&
- !item.intertype)
- return 'Return';
- if (item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'switch' &&
- !item.intertype)
- return 'Switch';
- if (item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype)
- return 'FuncEnd';
- if (item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' &&
- !item.intertype)
- return 'External';
- if (item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' &&
- !item.intertype)
- return 'Unreachable';
- } else {
- // Already intertyped
- if (item.parentSlot)
- return 'Reintegrator';
- }
- throw 'Invalid token, cannot triage: ' + dump(item);
- }
- this.forwardItem(item, triage(item));
- },
- });
-
- // Line parsers to intermediate form
-
- // globals: type or variable
- substrate.addZyme('Global', {
- processItem: function(item) {
- if (item.tokens[2].text == 'type') {
- //dprint('type/const linenum: ' + item.lineNum + ':' + dump(item));
- var fields = [];
- if (item.tokens[3].text != 'opaque') {
- if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
- item.tokens[3] = item.tokens[3].item[0];
- }
- var subTokens = item.tokens[3].tokens;
- subTokens.push({text:','});
- while (subTokens[0]) {
- var stop = 1;
- while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
- fields.push(combineTokens(subTokens.slice(0, stop)).text);
- subTokens.splice(0, stop+1);
- }
- }
- return [{
- __result__: true, // XXX can remove these
- intertype: 'type',
- name_: item.tokens[0].text,
- fields: fields,
- lineNum: item.lineNum,
- }]
- } else {
- // variable
- var ident = item.tokens[0].text;
- while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 })
- item.tokens.splice(2, 1);
- var ret = {
- __result__: true,
- intertype: 'globalVariable',
- ident: ident,
- type: item.tokens[2],
- lineNum: item.lineNum,
- };
- if (ident == '@llvm.global_ctors') {
- ret.ctors = [];
- var subTokens = item.tokens[3].item[0].tokens;
- splitTokenList(subTokens).forEach(function(segment) {
- ret.ctors.push(segment[1].tokens.slice(-1)[0].text);
- });
- } else {
- if (item.tokens[3].type == '<') { // type <{ i8 }> XXX - check spec
- item.tokens[3] = item.tokens[3].item[0].tokens;
- }
-
- if (item.tokens[3].text == 'c')
- item.tokens.splice(3, 1);
- ret.value = item.tokens[3];
- }
- return [ret];
- }
- },
- });
- // function header
- substrate.addZyme('FuncHeader', {
- processItem: function(item) {
- item.tokens = item.tokens.filter(function(token) {
- return ['internal', 'signext', 'zeroext', 'nounwind', 'define', 'linkonce_odr', 'inlinehint', '{'].indexOf(token.text) == -1;
- });
- return [{
- __result__: true,
- intertype: 'function',
- ident: item.tokens[1].text,
- returnType: item.tokens[0],
- params: item.tokens[2],
- lineNum: item.lineNum,
- }];
- },
- });
- // label
- substrate.addZyme('Label', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'label',
- ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1),
- lineNum: item.lineNum,
- }];
- },
- });
-
- // assignment
- substrate.addZyme('Assign', {
- processItem: function(item) {
- var opIndex = findTokenText(item, '=');
- var pair = splitItem({
- intertype: 'assign',
- ident: combineTokens(item.tokens.slice(0, opIndex)).text,
- lineNum: item.lineNum,
- }, 'value');
- this.forwardItem(pair.parent, 'Reintegrator');
- this.forwardItem(mergeInto(pair.child, { // Additional token, to be triaged and later re-integrated
- indent: -1,
- tokens: item.tokens.slice(opIndex+1),
- }), 'Triager');
- },
- });
- // reintegration - find intermediate representation-parsed items and
- // place back in parents TODO: Optimize this code to optimal O(..)
- substrate.addZyme('Reintegrator', makeReintegrator(function(parent, child) {
- // Special re-integration behaviors
- if (child.intertype == 'fastgetelementptrload') {
- parent.intertype = 'fastgetelementptrload';
- }
- this.forwardItem(parent, '/dev/stdout');
- }));
-
- // 'load'
- substrate.addZyme('Load', {
- processItem: function(item) {
- item.pointerType = item.tokens[1];
- item.type = { text: removePointing(item.pointerType.text) };
- if (item.tokens[2].text == 'getelementptr') {
- var last = getTokenIndexByText(item.tokens, ';');
- var gepTokens = item.tokens.slice(1, last); // without 'load'
- var segment = [ gepTokens[2], gepTokens[0], null ].concat(gepTokens.slice(3));
- var data = parseGetElementPtr(segment);
- item.intertype = 'fastgetelementptrload';
- item.type = data.type;
- item.params = data.params;
- item.pointer = { text: data.ident };
- item.value = data.value;
- } else {
- item.intertype = 'load';
- if (item.tokens[2].text == 'bitcast') {
- item.pointer = item.tokens[3].item[0].tokens[1];
- item.originalType = item.tokens[3].item[0].tokens[0];
- } else {
- item.pointer = item.tokens[2];
- }
- }
- item.ident = item.pointer.text;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'bitcast'
- substrate.addZyme('Bitcast', {
- processItem: function(item) {
- item.intertype = 'bitcast';
- item.type = item.tokens[1];
- item.ident = item.tokens[2].text;
- item.type2 = item.tokens[4];
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'getelementptr'
- substrate.addZyme('GEP', {
- processItem: function(item) {
- var last = getTokenIndexByText(item.tokens, ';');
- var segment = [ item.tokens[1], { text: null }, null, { item: [ {
- tokens: item.tokens.slice(2, last)
- } ] } ];
- var data = parseGetElementPtr(segment);
- item.intertype = 'getelementptr';
- item.type = data.type;
- item.params = data.params;
- item.ident = data.ident;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'call'
- substrate.addZyme('Call', {
- processItem: function(item) {
- item.intertype = 'call';
- if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) {
- item.tokens.splice(1, 1);
- }
- item.type = item.tokens[1];
- item.functionType = '';
- while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
- item.functionType += item.tokens[2].text;
- item.tokens.splice(2, 1);
- }
- item.ident = item.tokens[2].text;
- if (item.ident.substr(-2) == '()') {
- // See comment in isStructType()
- item.ident = item.ident.substr(0, item.ident.length-2);
- // Also, we remove some spaces which might occur.
- while (item.ident[item.ident.length-1] == ' ') {
- item.ident = item.ident.substr(0, item.ident.length-1);
- }
- item.params = [];
- } else {
- item.params = parseParamTokens(item.tokens[3].item[0].tokens);
- }
- if (item.indent == 2) {
- // standalone call - not in assign
- item.standalone = true;
- item.__result__ = true;
- return [item];
- }
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'invoke'
- substrate.addZyme('Invoke', {
- processItem: function(item) {
- item.intertype = 'invoke';
- item.type = item.tokens[1];
- item.functionType = '';
- while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) {
- item.functionType += item.tokens[2].text;
- item.tokens.splice(2, 1);
- }
- cleanOutTokens(['alignstack', 'alwaysinline', 'inlinehint', 'naked', 'noimplicitfloat', 'noinline', 'alwaysinline attribute.', 'noredzone', 'noreturn', 'nounwind', 'optsize', 'readnone', 'readonly', 'ssp', 'sspreq'], item.tokens, 4);
- item.ident = item.tokens[2].text;
- item.params = parseParamTokens(item.tokens[3].item[0].tokens);
- item.toLabel = toNiceIdent(item.tokens[6].text);
- item.unwindLabel = toNiceIdent(item.tokens[9].text);
- if (item.indent == 2) {
- // standalone call - not in assign
- item.standalone = true;
- item.__result__ = true;
- return [item];
- }
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'alloca'
- substrate.addZyme('Alloca', {
- processItem: function(item) {
- item.intertype = 'alloca';
- item.allocatedType = item.tokens[1];
- item.type = { text: addPointing(item.tokens[1].text) }; // type of pointer we will get
- item.type2 = { text: item.tokens[1].text }; // value we will create, and get a pointer to
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'phi'
- substrate.addZyme('Phi', {
- processItem: function(item) {
- item.intertype = 'phi';
- item.type = { text: item.tokens[1].text }
- item.label1 = item.tokens[2].item[0].tokens[2].text;
- item.value1 = item.tokens[2].item[0].tokens[0].text;
- item.label2 = item.tokens[4].item[0].tokens[2].text;
- item.value2 = item.tokens[4].item[0].tokens[0].text;
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // mathops
- substrate.addZyme('Mathops', {
- processItem: function(item) {
- item.intertype = 'mathop';
- item.op = item.tokens[0].text;
- item.variant = null;
- if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1);
- if (['icmp', 'fcmp'].indexOf(item.op) != -1) {
- item.variant = item.tokens[1].text;
- item.tokens.splice(1, 1);
- }
- item.type = item.tokens[1];
- item.ident = item.tokens[2].text;
- item.ident2 = item.tokens[4].text;
- item.ident3 = item.tokens[5] ? item.tokens[5].text : null;
- item.ident4 = item.tokens[8] ? item.tokens[8].text : null;
- dprint('mathop', item.op + ',' + item.variant + ',' + item.ident + ',' + item.value);
- this.forwardItem(item, 'Reintegrator');
- },
- });
- // 'store'
- substrate.addZyme('Store', {
- processItem: function(item) {
- if (item.tokens[3].text != ',') {
- assertEq(item.tokens[2].text, 'getelementptr');
- // complex input - likely getelementptr
- var commaIndex = 4;
- while (item.tokens[commaIndex].text != ',') commaIndex ++;
- return [{
- __result__: true,
- intertype: 'store',
- valueType: item.tokens[1],
- value: parseGetElementPtr(item.tokens.slice(1, commaIndex)),
- pointerType: item.tokens[commaIndex+1],
- pointer: item.tokens[commaIndex+2],
- ident: item.tokens[commaIndex+2].text,
- lineNum: item.lineNum,
- }];
- }
- return [{
- __result__: true,
- intertype: 'store',
- valueType: item.tokens[1],
- value: addIdent(item.tokens[2]),
- pointerType: item.tokens[4],
- pointer: item.tokens[5],
- ident: item.tokens[5].text,
- lineNum: item.lineNum,
- }];
- },
- });
- // 'br'
- substrate.addZyme('Branch', {
- processItem: function(item) {
- if (item.tokens[1].text == 'label') {
- return [{
- __result__: true,
- intertype: 'branch',
- label: toNiceIdent(item.tokens[2].text),
- lineNum: item.lineNum,
- }];
- } else {
- return [{
- __result__: true,
- intertype: 'branch',
- ident: item.tokens[2].text,
- labelTrue: toNiceIdent(item.tokens[5].text),
- labelFalse: toNiceIdent(item.tokens[8].text),
- lineNum: item.lineNum,
- }];
- }
- },
- });
- // 'ret'
- substrate.addZyme('Return', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'return',
- type: item.tokens[1].text,
- value: item.tokens[2] ? item.tokens[2].text : null,
- lineNum: item.lineNum,
- }];
- },
- });
- // 'switch'
- substrate.addZyme('Switch', {
- processItem: function(item) {
- function parseSwitchLabels(item) {
- var ret = [];
- var tokens = item.item[0].tokens;
- while (tokens.length > 0) {
- ret.push({
- value: tokens[1].text,
- label: toNiceIdent(tokens[4].text),
- });
- tokens = tokens.slice(5);
- }
- return ret;
- }
- return [{
- __result__: true,
- intertype: 'switch',
- type: item.tokens[1].text,
- ident: item.tokens[2].text,
- defaultLabel: item.tokens[5].text,
- switchLabels: parseSwitchLabels(item.tokens[6]),
- lineNum: item.lineNum,
- }];
- },
- });
- // function end
- substrate.addZyme('FuncEnd', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'functionEnd',
- lineNum: item.lineNum,
- }];
- },
- });
- // external function stub
- substrate.addZyme('External', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'functionStub',
- ident: item.tokens[2].text,
- returnType: item.tokens[1],
- params: item.tokens[3],
- lineNum: item.lineNum,
- }];
- },
- });
- // 'unreachable'
- substrate.addZyme('Unreachable', {
- processItem: function(item) {
- return [{
- __result__: true,
- intertype: 'unreachable',
- lineNum: item.lineNum,
- }];
- },
- });
-
- // Input
-
- substrate.addItem({
- llvmText: data,
- }, 'LineSplitter');
-
- return substrate.solve();
-}
-
-// Analyze intertype data
-
-VAR_NATIVE = 'native';
-VAR_NATIVIZED = 'nativized';
-VAR_EMULATED = 'emulated';
-
-function cleanFunc(func) {
- func.lines = func.lines.filter(function(line) { return line.intertype !== null });
- func.labels.forEach(function(label) {
- label.lines = label.lines.filter(function(line) { return line.intertype !== null });
- });
-}
-
-function analyzer(data) {
-//print('zz analaz')
- substrate = new Substrate('Analyzer');
-
- // Sorter
- substrate.addZyme('Sorter', {
- processItem: function(item) {
- item.items.sort(function (a, b) { return a.lineNum - b.lineNum });
- this.forwardItem(item, 'Gatherer');
- },
- });
-
- // Gatherer
- substrate.addZyme('Gatherer', {
- processItem: function(item) {
- // Single-liners
- ['globalVariable', 'functionStub', 'type'].forEach(function(intertype) {
- var temp = splitter(item.items, function(item) { return item.intertype == intertype });
- item[intertype + 's'] = temp.splitOut;
- item.items = temp.leftIn;
- });
- // Functions & labels
- item.functions = []
- for (var i = 0; i < item.items.length; i++) {
- var subItem = item.items[i];
- if (subItem.intertype == 'function') {
- item.functions.push(subItem);
- subItem.endLineNum = null;
- subItem.lines = [];
- subItem.labels = [];
- } else if (subItem.intertype == 'functionEnd') {
- item.functions.slice(-1)[0].endLineNum = subItem.lineNum;
- } else if (subItem.intertype == 'label') {
- item.functions.slice(-1)[0].labels.push(subItem);
- subItem.lines = [];
- } else if (item.functions.slice(-1)[0].endLineNum === null) {
- // Internal line
- item.functions.slice(-1)[0].lines.push(subItem);
- item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem);
- } else {
- pri