aboutsummaryrefslogtreecommitdiff
path: root/src/intertyper.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/intertyper.js')
-rw-r--r--src/intertyper.js1733
1 files changed, 826 insertions, 907 deletions
diff --git a/src/intertyper.js b/src/intertyper.js
index 082fd993..e43cc298 100644
--- a/src/intertyper.js
+++ b/src/intertyper.js
@@ -3,10 +3,149 @@
// LLVM assembly => internal intermediate representation, which is ready
// to be processed by the later stages.
-var tokenizer; // TODO: Clean this up/out
- // XXX In particular, this closes over the substrate, which can keep stuff in memory, which is bad
+// Line tokenizer
+function tokenizer(item, inner) {
+ //assert(item.lineNum != 40000);
+ //if (item.lineNum) print(item.lineNum);
+ var tokens = [];
+ var quotes = 0;
+ var lastToken = null;
+ var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied
+ // Note: '{' is not an encloser, as its use in functions is split over many lines
+ var enclosers = {
+ '[': 0,
+ ']': '[',
+ '(': 0,
+ ')': '(',
+ '<': 0,
+ '>': '<'
+ };
+ var totalEnclosing = 0;
+ function makeToken(text) {
+ if (text.length == 0) return;
+ // merge certain tokens
+ if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) {
+ lastToken.text += text;
+ return;
+ }
+
+ var token = {
+ text: text
+ };
+ if (text[0] in enclosers) {
+ token.item = tokenizer({
+ lineText: text.substr(1, text.length-2)
+ }, true);
+ token.type = text[0];
+ }
+ // merge certain tokens
+ if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
+ lastToken.text += ' ' + text;
+ } else if (lastToken && text[0] == '}') { // }, }*, etc.
+ var openBrace = tokens.length-1;
+ while (tokens[openBrace].text.substr(-1) != '{') openBrace --;
+ token = combineTokens(tokens.slice(openBrace+1));
+ tokens.splice(openBrace, tokens.length-openBrace+1);
+ tokens.push(token);
+ token.type = '{';
+ token.text = '{ ' + token.text + ' }';
+ var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text);
+ while (pointingLevelsToAdd > 0) {
+ token.text += '*';
+ pointingLevelsToAdd--;
+ }
+ lastToken = token;
+ } else {
+ tokens.push(token);
+ lastToken = token;
+ }
+ }
+ // Split using meaningful characters
+ var lineText = item.lineText + ' ';
+ var re = /[\[\]\(\)<>, "]/g;
+ var segments = lineText.split(re);
+ segments.pop();
+ var len = segments.length;
+ var i = -1;
+ var curr = '';
+ var segment, letter;
+ for (var s = 0; s < len; s++) {
+ segment = segments[s];
+ i += segment.length + 1;
+ letter = lineText[i];
+ curr += segment;
+ switch (letter) {
+ case ' ':
+ if (totalEnclosing == 0 && quotes == 0) {
+ makeToken(curr);
+ curr = '';
+ } else {
+ curr += ' ';
+ }
+ break;
+ case '"':
+ if (totalEnclosing == 0) {
+ if (quotes == 0) {
+ if (curr == '@' || curr == '%') {
+ curr += '"';
+ } else {
+ makeToken(curr);
+ curr = '"';
+ }
+ } else {
+ makeToken(curr + '"');
+ curr = '';
+ }
+ } else {
+ curr += '"';
+ }
+ quotes = 1-quotes;
+ break;
+ case ',':
+ if (totalEnclosing == 0 && quotes == 0) {
+ makeToken(curr);
+ curr = '';
+ tokens.push({ text: ',' });
+ } else {
+ curr += ',';
+ }
+ break;
+ default:
+ assert(letter in enclosers);
+ if (quotes) {
+ curr += letter;
+ break;
+ }
+ if (letter in ENCLOSER_STARTERS) {
+ if (totalEnclosing == 0) {
+ makeToken(curr);
+ curr = '';
+ }
+ curr += letter;
+ enclosers[letter]++;
+ totalEnclosing++;
+ } else {
+ enclosers[enclosers[letter]]--;
+ totalEnclosing--;
+ if (totalEnclosing == 0) {
+ makeToken(curr + letter);
+ curr = '';
+ } else {
+ curr += letter;
+ }
+ }
+ }
+ }
+ var newItem = {
+ tokens: tokens,
+ indent: lineText.search(/[^ ]/),
+ lineNum: item.lineNum
+ };
+ return newItem;
+}
+
function tokenize(text) {
- return tokenizer.processItem({ lineText: text }, true);
+ return tokenizer({ lineText: text }, true);
}
// Handy sets
@@ -22,672 +161,487 @@ var NSW_NUW = set('nsw', 'nuw');
// Intertyper
-function intertyper(data, sidePass, baseLineNums) {
+function intertyper(lines, sidePass, baseLineNums) {
var mainPass = !sidePass;
baseLineNums = baseLineNums || [[0,0]]; // each pair [#0,#1] means "starting from line #0, the base line num is #1"
dprint('framework', 'Big picture: Starting intertyper, main pass=' + mainPass);
- // Substrate
-
- var substrate = new Substrate('Intertyper');
+ var finalResults = [];
- // Line splitter. We break off some bunches of lines into unparsedBundles, which are
+ // Line splitter. We break off some bunches of lines into unparsed bundles, which are
// parsed in separate passes later. This helps to keep memory usage low - we can start
// from raw lines and end up with final JS for each function individually that way, instead
// of intertyping them all, then analyzing them all, etc.
- substrate.addActor('LineSplitter', {
- processItem: function _lineSplitter(item) {
- var lines = item.llvmLines;
- var ret = [];
- var inContinual = false;
- var inFunction = false;
- var currFunctionLines;
- var currFunctionLineNum;
- var unparsedBundles = [];
- var unparsedTypes, unparsedGlobals;
- if (mainPass) {
- unparsedTypes = {
- intertype: 'unparsedTypes',
- lines: []
- };
- unparsedBundles.push(unparsedTypes);
- unparsedGlobals = {
- intertype: 'unparsedGlobals',
- lines: []
- };
- unparsedBundles.push(unparsedGlobals);
- }
- var baseLineNumPosition = 0;
- for (var i = 0; i < lines.length; i++) {
- var line = lines[i];
- if (singlePhase) lines[i] = null; // lines may be very very large. Allow GCing to occur in the loop by releasing refs here
+ function lineSplitter() {
+ var ret = [];
+ var inContinual = false;
+ var inFunction = false;
+ var currFunctionLines;
+ var currFunctionLineNum;
+ var unparsedTypes, unparsedGlobals;
+ if (mainPass) {
+ unparsedTypes = {
+ intertype: 'unparsedTypes',
+ lines: []
+ };
+ finalResults.push(unparsedTypes);
+ unparsedGlobals = {
+ intertype: 'unparsedGlobals',
+ lines: []
+ };
+ finalResults.push(unparsedGlobals);
+ }
+ var baseLineNumPosition = 0;
+ for (var i = 0; i < lines.length; i++) {
+ var line = lines[i];
+ if (singlePhase) lines[i] = null; // lines may be very very large. Allow GCing to occur in the loop by releasing refs here
- while (baseLineNumPosition < baseLineNums.length-1 && i >= baseLineNums[baseLineNumPosition+1][0]) {
- baseLineNumPosition++;
- }
+ while (baseLineNumPosition < baseLineNums.length-1 && i >= baseLineNums[baseLineNumPosition+1][0]) {
+ baseLineNumPosition++;
+ }
- if (mainPass && (line[0] == '%' || line[0] == '@')) {
- // If this isn't a type, it's a global variable, make a note of the information now, we will need it later
- var parts = line.split(' = ');
- assert(parts.length >= 2);
- var left = parts[0], right = parts.slice(1).join(' = ');
- var testType = /^type .*/.exec(right);
- if (!testType) {
- var globalIdent = toNiceIdent(left);
- var testAlias = /^(hidden )?alias .*/.exec(right);
- Variables.globals[globalIdent] = {
- name: globalIdent,
- alias: !!testAlias,
- impl: VAR_EMULATED
- };
- unparsedGlobals.lines.push(line);
- } else {
- unparsedTypes.lines.push(line);
- }
- continue;
- }
- if (mainPass && /^define .*/.test(line)) {
- inFunction = true;
- currFunctionLines = [];
- currFunctionLineNum = i + 1;
+ if (mainPass && (line[0] == '%' || line[0] == '@')) {
+ // If this isn't a type, it's a global variable, make a note of the information now, we will need it later
+ var parts = line.split(' = ');
+ assert(parts.length >= 2);
+ var left = parts[0], right = parts.slice(1).join(' = ');
+ var testType = /^type .*/.exec(right);
+ if (!testType) {
+ var globalIdent = toNiceIdent(left);
+ var testAlias = /^(hidden )?alias .*/.exec(right);
+ Variables.globals[globalIdent] = {
+ name: globalIdent,
+ alias: !!testAlias,
+ impl: VAR_EMULATED
+ };
+ unparsedGlobals.lines.push(line);
+ } else {
+ unparsedTypes.lines.push(line);
}
- if (!inFunction || !mainPass) {
- if (inContinual || /^\ +(to|catch |filter |cleanup).*/.test(line)) {
- // to after invoke or landingpad second line
- ret.slice(-1)[0].lineText += line;
- if (/^\ +\]/.test(line)) { // end of llvm switch
- inContinual = false;
- }
- } else {
- ret.push({
- lineText: line,
- lineNum: i + 1 + baseLineNums[baseLineNumPosition][1] - baseLineNums[baseLineNumPosition][0]
- });
- if (/^\ +switch\ .*/.test(line)) {
- // beginning of llvm switch
- inContinual = true;
- }
+ continue;
+ }
+ if (mainPass && /^define .*/.test(line)) {
+ inFunction = true;
+ currFunctionLines = [];
+ currFunctionLineNum = i + 1;
+ }
+ if (!inFunction || !mainPass) {
+ if (inContinual || /^\ +(to|catch |filter |cleanup).*/.test(line)) {
+ // to after invoke or landingpad second line
+ ret.slice(-1)[0].lineText += line;
+ if (/^\ +\]/.test(line)) { // end of llvm switch
+ inContinual = false;
}
} else {
- currFunctionLines.push(line);
+ ret.push({
+ lineText: line,
+ lineNum: i + 1 + baseLineNums[baseLineNumPosition][1] - baseLineNums[baseLineNumPosition][0]
+ });
+ if (/^\ +switch\ .*/.test(line)) {
+ // beginning of llvm switch
+ inContinual = true;
+ }
}
- if (mainPass && /^}.*/.test(line)) {
- inFunction = false;
- if (mainPass) {
- var func = funcHeader.processItem(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true))[0];
+ } else {
+ currFunctionLines.push(line);
+ }
+ if (mainPass && /^}.*/.test(line)) {
+ inFunction = false;
+ if (mainPass) {
+ var func = funcHeaderHandler(tokenizer({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true));
- if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) {
- warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data');
- SKIP_STACK_IN_SMALL = 0;
- }
+ if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) {
+ warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data');
+ SKIP_STACK_IN_SMALL = 0;
+ }
- var ident = toNiceIdent(func.ident);
- if (!(ident in DEAD_FUNCTIONS)) {
- unparsedBundles.push({
- intertype: 'unparsedFunction',
- // We need this early, to know basic function info - ident, params, varargs
- ident: ident,
- params: func.params,
- returnType: func.returnType,
- hasVarArgs: func.hasVarArgs,
- lineNum: currFunctionLineNum,
- lines: currFunctionLines
- });
- }
- currFunctionLines = [];
+ var ident = toNiceIdent(func.ident);
+ if (!(ident in DEAD_FUNCTIONS)) {
+ finalResults.push({
+ intertype: 'unparsedFunction',
+ // We need this early, to know basic function info - ident, params, varargs
+ ident: ident,
+ params: func.params,
+ returnType: func.returnType,
+ hasVarArgs: func.hasVarArgs,
+ lineNum: currFunctionLineNum,
+ lines: currFunctionLines
+ });
}
+ currFunctionLines = [];
}
}
- // We need lines beginning with ';' inside functions, because older LLVM versions generated labels that way. But when not
- // parsing functions, we can ignore all such lines and save some time that way.
- this.forwardItems(ret.filter(function(item) { return item.lineText && (item.lineText[0] != ';' || !mainPass); }), 'Tokenizer');
- return unparsedBundles;
}
- });
-
- // Line tokenizer
- tokenizer = substrate.addActor('Tokenizer', {
- processItem: function _tokenizer(item, inner) {
- //assert(item.lineNum != 40000);
- //if (item.lineNum) print(item.lineNum);
- var tokens = [];
- var quotes = 0;
- var lastToken = null;
- var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied
- // Note: '{' is not an encloser, as its use in functions is split over many lines
- var enclosers = {
- '[': 0,
- ']': '[',
- '(': 0,
- ')': '(',
- '<': 0,
- '>': '<'
- };
- var totalEnclosing = 0;
- var that = this;
- function makeToken(text) {
- if (text.length == 0) return;
- // merge certain tokens
- if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) {
- lastToken.text += text;
- return;
- }
+ // We need lines beginning with ';' inside functions, because older LLVM versions generated labels that way. But when not
+ // parsing functions, we can ignore all such lines and save some time that way.
+ return ret.filter(function(item) { return item.lineText && (item.lineText[0] != ';' || !mainPass); });
+ }
- var token = {
- text: text
- };
- if (text[0] in enclosers) {
- token.item = that.processItem({
- lineText: text.substr(1, text.length-2)
- }, true);
- token.type = text[0];
- }
- // merge certain tokens
- if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
- lastToken.text += ' ' + text;
- } else if (lastToken && text[0] == '}') { // }, }*, etc.
- var openBrace = tokens.length-1;
- while (tokens[openBrace].text.substr(-1) != '{') openBrace --;
- token = combineTokens(tokens.slice(openBrace+1));
- tokens.splice(openBrace, tokens.length-openBrace+1);
- tokens.push(token);
- token.type = '{';
- token.text = '{ ' + token.text + ' }';
- var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text);
- while (pointingLevelsToAdd > 0) {
- token.text += '*';
- pointingLevelsToAdd--;
- }
- lastToken = token;
- } else {
- tokens.push(token);
- lastToken = token;
- }
+ function triager(item) {
+ assert(!item.intertype);
+ if (item.indent == 2 && (eq = findTokenText(item, '=')) >= 0) {
+ item.assignTo = toNiceIdent(combineTokens(item.tokens.slice(0, eq)).text);
+ item.tokens = item.tokens.slice(eq+1);
+ }
+ var token0Text = item.tokens[0].text;
+ var token1Text = item.tokens[1] ? item.tokens[1].text : null;
+ var tokensLength = item.tokens.length;
+ if (item.indent === 2) {
+ if (tokensLength >= 5 &&
+ (token0Text == 'store' || token1Text == 'store'))
+ return storeHandler(item);
+ if (tokensLength >= 3 && token0Text == 'br')
+ return branchHandler(item);
+ if (tokensLength >= 2 && token0Text == 'ret')
+ return returnHandler(item);
+ if (tokensLength >= 2 && token0Text == 'switch')
+ return switchHandler(item);
+ if (token0Text == 'unreachable')
+ return unreachableHandler(item);
+ if (tokensLength >= 3 && token0Text == 'indirectbr')
+ return indirectBrHandler(item);
+ if (tokensLength >= 2 && token0Text == 'resume')
+ return resumeHandler(item);
+ if (tokensLength >= 3 &&
+ (token0Text == 'load' || token1Text == 'load'))
+ return loadHandler(item);
+ if (tokensLength >= 3 &&
+ token0Text in MATHOPS)
+ return mathopsHandler(item);
+ if (tokensLength >= 3 && token0Text == 'bitcast')
+ return bitcastHandler(item);
+ if (tokensLength >= 3 && token0Text == 'getelementptr')
+ return GEPHandler(item);
+ if (tokensLength >= 2 && token0Text == 'alloca')
+ return allocaHandler(item);
+ if (tokensLength >= 3 && token0Text == 'extractvalue')
+ return extractValueHandler(item);
+ if (tokensLength >= 3 && token0Text == 'insertvalue')
+ return insertValueHandler(item);
+ if (tokensLength >= 3 && token0Text == 'phi')
+ return phiHandler(item);
+ if (tokensLength >= 3 && token0Text == 'va_arg')
+ return va_argHandler(item);
+ if (tokensLength >= 3 && token0Text == 'landingpad')
+ return landingpadHandler(item);
+ if (token0Text == 'fence')
+ return null;
+ } else if (item.indent === 0) {
+ if ((tokensLength >= 1 && token0Text.substr(-1) == ':') ||
+ (tokensLength >= 3 && token1Text == '<label>') ||
+ (tokensLength >= 2 && token1Text == ':'))
+ return labelHandler(item);
+ if (tokensLength >= 4 && token0Text == 'declare')
+ return externalHandler(item);
+ if (tokensLength >= 3 && token1Text == '=')
+ return globalHandler(item);
+ if (tokensLength >= 4 && token0Text == 'define' &&
+ item.tokens.slice(-1)[0].text == '{')
+ return funcHeaderHandler(item);
+ if (tokensLength >= 1 && token0Text == '}')
+ return funcEndHandler(item);
+ if (token0Text == 'module' && token1Text == 'asm') {
+ warn('Ignoring module asm: ' + item.tokens[2].text);
+ return null;
}
- // Split using meaningful characters
- var lineText = item.lineText + ' ';
- var re = /[\[\]\(\)<>, "]/g;
- var segments = lineText.split(re);
- segments.pop();
- var len = segments.length;
- var i = -1;
- var curr = '';
- var segment, letter;
- for (var s = 0; s < len; s++) {
- segment = segments[s];
- i += segment.length + 1;
- letter = lineText[i];
- curr += segment;
- switch (letter) {
- case ' ':
- if (totalEnclosing == 0 && quotes == 0) {
- makeToken(curr);
- curr = '';
- } else {
- curr += ' ';
- }
- break;
- case '"':
- if (totalEnclosing == 0) {
- if (quotes == 0) {
- if (curr == '@' || curr == '%') {
- curr += '"';
- } else {
- makeToken(curr);
- curr = '"';
- }
- } else {
- makeToken(curr + '"');
- curr = '';
- }
- } else {
- curr += '"';
- }
- quotes = 1-quotes;
- break;
- case ',':
- if (totalEnclosing == 0 && quotes == 0) {
- makeToken(curr);
- curr = '';
- tokens.push({ text: ',' });
- } else {
- curr += ',';
- }
- break;
- default:
- assert(letter in enclosers);
- if (quotes) {
- curr += letter;
- break;
- }
- if (letter in ENCLOSER_STARTERS) {
- if (totalEnclosing == 0) {
- makeToken(curr);
- curr = '';
- }
- curr += letter;
- enclosers[letter]++;
- totalEnclosing++;
- } else {
- enclosers[enclosers[letter]]--;
- totalEnclosing--;
- if (totalEnclosing == 0) {
- makeToken(curr + letter);
- curr = '';
- } else {
- curr += letter;
- }
- }
+ if (token0Text == 'attributes')
+ return null;
+ }
+ if (tokensLength >= 3 && (token0Text == 'call' || token1Text == 'call'))
+ return callHandler(item);
+ if (token0Text == 'target') {
+ if (token1Text == 'triple') {
+ var triple = item.tokens[3].text;
+ triple = triple.substr(1, triple.length-2);
+ var expected = TARGET_LE32 ? 'le32-unknown-nacl' : 'i386-pc-linux-gnu';
+ if (triple !== expected) {
+ warn('using an unexpected LLVM triple: ' + [triple, ' !== ', expected] + ' (are you using emcc for everything and not clang?)');
}
}
- var newItem = {
- tokens: tokens,
- indent: lineText.search(/[^ ]/),
- lineNum: item.lineNum
- };
- if (inner) {
- return newItem;
- } else {
- this.forwardItem(newItem, 'Triager');
- }
return null;
}
- });
+ if (token0Text == ';')
+ return null;
+ if (tokensLength >= 3 && token0Text == 'invoke')
+ return invokeHandler(item);
+ if (tokensLength >= 3 && token0Text == 'atomicrmw' || token0Text == 'cmpxchg')
+ return atomicHandler(item);
+ throw 'Invalid token, cannot triage: ' + dump(item);
+ }
- substrate.addActor('Triager', {
- processItem: function _triager(item) {
- function triage() {
- assert(!item.intertype);
- var token0Text = item.tokens[0].text;
- var token1Text = item.tokens[1] ? item.tokens[1].text : null;
- var tokensLength = item.tokens.length;
- if (item.indent === 2) {
- if (tokensLength >= 5 &&
- (token0Text == 'store' || token1Text == 'store'))
- return 'Store';
- if (tokensLength >= 3 && token0Text == 'br')
- return 'Branch';
- if (tokensLength >= 2 && token0Text == 'ret')
- return 'Return';
- if (tokensLength >= 2 && token0Text == 'switch')
- return 'Switch';
- if (token0Text == 'unreachable')
- return 'Unreachable';
- if (tokensLength >= 3 && token0Text == 'indirectbr')
- return 'IndirectBr';
- if (tokensLength >= 2 && token0Text == 'resume')
- return 'Resume';
- if (tokensLength >= 3 &&
- (token0Text == 'load' || token1Text == 'load'))
- return 'Load';
- if (tokensLength >= 3 &&
- token0Text in MATHOPS)
- return 'Mathops';
- if (tokensLength >= 3 && token0Text == 'bitcast')
- return 'Bitcast';
- if (tokensLength >= 3 && token0Text == 'getelementptr')
- return 'GEP';
- if (tokensLength >= 2 && token0Text == 'alloca')
- return 'Alloca';
- if (tokensLength >= 3 && token0Text == 'extractvalue')
- return 'ExtractValue';
- if (tokensLength >= 3 && token0Text == 'insertvalue')
- return 'InsertValue';
- if (tokensLength >= 3 && token0Text == 'phi')
- return 'Phi';
- if (tokensLength >= 3 && token0Text == 'va_arg')
- return 'va_arg';
- if (tokensLength >= 3 && token0Text == 'landingpad')
- return 'Landingpad';
- if (token0Text == 'fence')
- return '/dev/null';
- } else if (item.indent === 0) {
- if ((tokensLength >= 1 && token0Text.substr(-1) == ':') ||
- (tokensLength >= 3 && token1Text == '<label>') ||
- (tokensLength >= 2 && token1Text == ':'))
- return 'Label';
- if (tokensLength >= 4 && token0Text == 'declare')
- return 'External';
- if (tokensLength >= 3 && token1Text == '=')
- return 'Global';
- if (tokensLength >= 4 && token0Text == 'define' &&
- item.tokens.slice(-1)[0].text == '{')
- return 'FuncHeader';
- if (tokensLength >= 1 && token0Text == '}')
- return 'FuncEnd';
- if (token0Text == 'module' && token1Text == 'asm') {
- warn('Ignoring module asm: ' + item.tokens[2].text);
- return '/dev/null';
+ // Line parsers to intermediate form
+
+ // globals: type or variable
+ function globalHandler(item) {
+ function scanConst(value, type) {
+ // Gets an array of constant items, separated by ',' tokens
+ function handleSegments(tokens) {
+ // Handle a single segment (after comma separation)
+ function handleSegment(segment) {
+ if (segment[1].text == 'null') {
+ return { intertype: 'value', ident: '0', type: 'i32' };
+ } else if (segment[1].text == 'zeroinitializer') {
+ Types.needAnalysis[segment[0].text] = 0;
+ return { intertype: 'emptystruct', type: segment[0].text };
+ } else if (segment[1].text in PARSABLE_LLVM_FUNCTIONS) {
+ return parseLLVMFunctionCall(segment);
+ } else if (segment[1].type && segment[1].type == '{') {
+ Types.needAnalysis[segment[0].text] = 0;
+ return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].tokens) };
+ } else if (segment[1].type && segment[1].type == '<') {
+ Types.needAnalysis[segment[0].text] = 0;
+ return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].item.tokens[0].tokens) };
+ } else if (segment[1].type && segment[1].type == '[') {
+ Types.needAnalysis[segment[0].text] = 0;
+ return { intertype: 'list', type: segment[0].text, contents: handleSegments(segment[1].item.tokens) };
+ } else if (segment.length == 2) {
+ Types.needAnalysis[segment[0].text] = 0;
+ return { intertype: 'value', type: segment[0].text, ident: toNiceIdent(segment[1].text) };
+ } else if (segment[1].text === 'c') {
+ // string
+ var text = segment[2].text;
+ text = text.substr(1, text.length-2);
+ return { intertype: 'string', text: text, type: 'i8*' };
+ } else if (segment[1].text === 'blockaddress') {
+ return parseBlockAddress(segment);
+ } else {
+ throw 'Invalid segment: ' + dump(segment);
}
- if (token0Text == 'attributes')
- return '/dev/null';
+ };
+ return splitTokenList(tokens).map(handleSegment);
+ }
+
+ Types.needAnalysis[type] = 0;
+ if (Runtime.isNumberType(type) || pointingLevels(type) >= 1) {
+ return { value: toNiceIdent(value.text), type: type };
+ } else if (value.text in ZEROINIT_UNDEF) { // undef doesn't really need initting, but why not
+ return { intertype: 'emptystruct', type: type };
+ } else if (value.text && value.text[0] == '"') {
+ return { intertype: 'string', text: value.text.substr(1, value.text.length-2) };
+ } else {
+ if (value.type == '<') { // <{ i8 }> etc.
+ value = value.item.tokens;
}
- if (tokensLength >= 3 && (token0Text == 'call' || token1Text == 'call'))
- return 'Call';
- if (token0Text == 'target') {
- if (token1Text == 'triple') {
- var triple = item.tokens[3].text;
- triple = triple.substr(1, triple.length-2);
- var expected = TARGET_LE32 ? 'le32-unknown-nacl' : 'i386-pc-linux-gnu';
- if (triple !== expected) {
- warn('using an unexpected LLVM triple: ' + [triple, ' !== ', expected] + ' (are you using emcc for everything and not clang?)');
- }
- }
- return '/dev/null';
+ var contents;
+ if (value.item) {
+ // list of items
+ contents = value.item.tokens;
+ } else if (value.type == '{') {
+ // struct
+ contents = value.tokens;
+ } else if (value[0]) {
+ contents = value[0];
+ } else {
+ throw '// interfailzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value);
}
- if (token0Text == ';')
- return '/dev/null';
- if (tokensLength >= 3 && token0Text == 'invoke')
- return 'Invoke';
- if (tokensLength >= 3 && token0Text == 'atomicrmw' || token0Text == 'cmpxchg')
- return 'Atomic';
- throw 'Invalid token, cannot triage: ' + dump(item);
+ return { intertype: 'segments', contents: handleSegments(contents) };
}
- var eq;
- if (item.indent == 2 && (eq = findTokenText(item, '=')) >= 0) {
- item.assignTo = toNiceIdent(combineTokens(item.tokens.slice(0, eq)).text);
- item.tokens = item.tokens.slice(eq+1);
- }
- this.forwardItem(item, triage());
}
- });
- // Line parsers to intermediate form
-
- // globals: type or variable
- substrate.addActor('Global', {
- processItem: function _global(item) {
- function scanConst(value, type) {
- // Gets an array of constant items, separated by ',' tokens
- function handleSegments(tokens) {
- // Handle a single segment (after comma separation)
- function handleSegment(segment) {
- if (segment[1].text == 'null') {
- return { intertype: 'value', ident: '0', type: 'i32' };
- } else if (segment[1].text == 'zeroinitializer') {
- Types.needAnalysis[segment[0].text] = 0;
- return { intertype: 'emptystruct', type: segment[0].text };
- } else if (segment[1].text in PARSABLE_LLVM_FUNCTIONS) {
- return parseLLVMFunctionCall(segment);
- } else if (segment[1].type && segment[1].type == '{') {
- Types.needAnalysis[segment[0].text] = 0;
- return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].tokens) };
- } else if (segment[1].type && segment[1].type == '<') {
- Types.needAnalysis[segment[0].text] = 0;
- return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].item.tokens[0].tokens) };
- } else if (segment[1].type && segment[1].type == '[') {
- Types.needAnalysis[segment[0].text] = 0;
- return { intertype: 'list', type: segment[0].text, contents: handleSegments(segment[1].item.tokens) };
- } else if (segment.length == 2) {
- Types.needAnalysis[segment[0].text] = 0;
- return { intertype: 'value', type: segment[0].text, ident: toNiceIdent(segment[1].text) };
- } else if (segment[1].text === 'c') {
- // string
- var text = segment[2].text;
- text = text.substr(1, text.length-2);
- return { intertype: 'string', text: text, type: 'i8*' };
- } else if (segment[1].text === 'blockaddress') {
- return parseBlockAddress(segment);
- } else {
- throw 'Invalid segment: ' + dump(segment);
- }
- };
- return splitTokenList(tokens).map(handleSegment);
+ cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 2);
+ if (item.tokens[2].text == 'alias') {
+ cleanOutTokens(LLVM.LINKAGES, item.tokens, 3);
+ cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 3);
+ var last = getTokenIndexByText(item.tokens, ';');
+ var ret = {
+ intertype: 'alias',
+ ident: toNiceIdent(item.tokens[0].text),
+ value: parseLLVMSegment(item.tokens.slice(3, last)),
+ lineNum: item.lineNum
+ };
+ ret.type = ret.value.type;
+ Types.needAnalysis[ret.type] = 0;
+ if (!NAMED_GLOBALS) {
+ Variables.globals[ret.ident].type = ret.type;
+ }
+ return ret;
+ }
+ if (item.tokens[2].text == 'type') {
+ var fields = [];
+ var packed = false;
+ if (Runtime.isNumberType(item.tokens[3].text)) {
+ // Clang sometimes has |= i32| instead of |= { i32 }|
+ fields = [item.tokens[3].text];
+ } else if (item.tokens[3].text != 'opaque') {
+ if (item.tokens[3].type == '<') {
+ packed = true;
+ item.tokens[3] = item.tokens[3].item.tokens[0];
}
-
- Types.needAnalysis[type] = 0;
- if (Runtime.isNumberType(type) || pointingLevels(type) >= 1) {
- return { value: toNiceIdent(value.text), type: type };
- } else if (value.text in ZEROINIT_UNDEF) { // undef doesn't really need initting, but why not
- return { intertype: 'emptystruct', type: type };
- } else if (value.text && value.text[0] == '"') {
- return { intertype: 'string', text: value.text.substr(1, value.text.length-2) };
- } else {
- if (value.type == '<') { // <{ i8 }> etc.
- value = value.item.tokens;
+ var subTokens = item.tokens[3].tokens;
+ if (subTokens) {
+ subTokens.push({text:','});
+ while (subTokens[0]) {
+ var stop = 1;
+ while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
+ fields.push(combineTokens(subTokens.slice(0, stop)).text);
+ subTokens.splice(0, stop+1);
}
- var contents;
- if (value.item) {
- // list of items
- contents = value.item.tokens;
- } else if (value.type == '{') {
- // struct
- contents = value.tokens;
- } else if (value[0]) {
- contents = value[0];
- } else {
- throw '// interfailzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value);
- }
- return { intertype: 'segments', contents: handleSegments(contents) };
}
}
-
- cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 2);
- if (item.tokens[2].text == 'alias') {
- cleanOutTokens(LLVM.LINKAGES, item.tokens, 3);
- cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 3);
- var last = getTokenIndexByText(item.tokens, ';');
- var ret = {
- intertype: 'alias',
- ident: toNiceIdent(item.tokens[0].text),
- value: parseLLVMSegment(item.tokens.slice(3, last)),
- lineNum: item.lineNum
- };
- ret.type = ret.value.type;
- Types.needAnalysis[ret.type] = 0;
- if (!NAMED_GLOBALS) {
- Variables.globals[ret.ident].type = ret.type;
- }
- return [ret];
+ return {
+ intertype: 'type',
+ name_: item.tokens[0].text,
+ fields: fields,
+ packed: packed,
+ lineNum: item.lineNum
+ };
+ } else {
+ // variable
+ var ident = item.tokens[0].text;
+ var private_ = findTokenText(item, 'private') >= 0 || findTokenText(item, 'internal') >= 0;
+ var named = findTokenText(item, 'unnamed_addr') < 0;
+ cleanOutTokens(LLVM.GLOBAL_MODIFIERS, item.tokens, [2, 3]);
+ var external = false;
+ if (item.tokens[2].text === 'external') {
+ external = true;
+ item.tokens.splice(2, 1);
}
- if (item.tokens[2].text == 'type') {
- var fields = [];
- var packed = false;
- if (Runtime.isNumberType(item.tokens[3].text)) {
- // Clang sometimes has |= i32| instead of |= { i32 }|
- fields = [item.tokens[3].text];
- } else if (item.tokens[3].text != 'opaque') {
- if (item.tokens[3].type == '<') {
- packed = true;
- item.tokens[3] = item.tokens[3].item.tokens[0];
- }
- var subTokens = item.tokens[3].tokens;
- if (subTokens) {
- subTokens.push({text:','});
- while (subTokens[0]) {
- var stop = 1;
- while ([','].indexOf(subTokens[stop].text) == -1) stop ++;
- fields.push(combineTokens(subTokens.slice(0, stop)).text);
- subTokens.splice(0, stop+1);
+ Types.needAnalysis[item.tokens[2].text] = 0;
+ var ret = {
+ intertype: 'globalVariable',
+ ident: toNiceIdent(ident),
+ type: item.tokens[2].text,
+ external: external,
+ private_: private_,
+ named: named,
+ lineNum: item.lineNum
+ };
+ if (!NAMED_GLOBALS) {
+ Variables.globals[ret.ident].type = ret.type;
+ Variables.globals[ret.ident].external = external;
+ }
+ Types.needAnalysis[ret.type] = 0;
+ if (ident == '@llvm.global_ctors') {
+ ret.ctors = [];
+ if (item.tokens[3].item) {
+ var subTokens = item.tokens[3].item.tokens;
+ splitTokenList(subTokens).forEach(function(segment) {
+ var ctor = toNiceIdent(segment[1].tokens.slice(-1)[0].text);
+ ret.ctors.push(ctor);
+ if (ASM_JS) { // must export the global constructors from asm.js module, so mark as implemented and exported
+ Functions.implementedFunctions[ctor] = 'v';
+ EXPORTED_FUNCTIONS[ctor] = 1