//"use strict"; // LLVM assembly => internal intermediate representation, which is ready // to be processed by the later stages. var tokenizer; // TODO: Clean this up/out // XXX In particular, this closes over the substrate, which can keep stuff in memory, which is bad function tokenize(text) { return tokenizer.processItem({ lineText: text }, true); } // Handy sets var ENCLOSER_STARTERS = set('[', '(', '<'); var ENCLOSER_ENDERS = { '[': ']', '(': ')', '<': '>' }; var ZEROINIT_UNDEF = set('zeroinitializer', 'undef'); var NSW_NUW = set('nsw', 'nuw'); // Intertyper function intertyper(data, sidePass, baseLineNums) { var mainPass = !sidePass; baseLineNums = baseLineNums || [[0,0]]; // each pair [#0,#1] means "starting from line #0, the base line num is #1" dprint('framework', 'Big picture: Starting intertyper, main pass=' + mainPass); // Substrate var substrate = new Substrate('Intertyper'); // Line splitter. We break off some bunches of lines into unparsedBundles, which are // parsed in separate passes later. This helps to keep memory usage low - we can start // from raw lines and end up with final JS for each function individually that way, instead // of intertyping them all, then analyzing them all, etc. substrate.addActor('LineSplitter', { processItem: function _lineSplitter(item) { var lines = item.llvmLines; var ret = []; var inContinual = false; var inFunction = false; var currFunctionLines; var currFunctionLineNum; var unparsedBundles = []; var unparsedTypes, unparsedGlobals; if (mainPass) { unparsedTypes = { intertype: 'unparsedTypes', lines: [] }; unparsedBundles.push(unparsedTypes); unparsedGlobals = { intertype: 'unparsedGlobals', lines: [] }; unparsedBundles.push(unparsedGlobals); } var baseLineNumPosition = 0; for (var i = 0; i < lines.length; i++) { var line = lines[i]; lines[i] = null; // lines may be very very large. Allow GCing to occur in the loop by releasing refs here while (baseLineNumPosition < baseLineNums.length-1 && i >= baseLineNums[baseLineNumPosition+1][0]) { baseLineNumPosition++; } if (mainPass && (line[0] == '%' || line[0] == '@')) { // If this isn't a type, it's a global variable, make a note of the information now, we will need it later var testType = /[@%\w\d\.\" $]+ = type .*/.exec(line); if (!testType) { var global = /([@%\w\d\.\" $]+) = .*/.exec(line); var globalIdent = toNiceIdent(global[1]); var testAlias = /[@%\w\d\.\" $]+ = alias .*/.exec(line); var testString = /^[^"]+c\"[^"]+"/.exec(line); Variables.globals[globalIdent] = { name: globalIdent, alias: !!testAlias, impl: VAR_EMULATED, isString : !!testString }; unparsedGlobals.lines.push(line); } else { unparsedTypes.lines.push(line); } continue; } if (mainPass && /^define .*/.test(line)) { inFunction = true; currFunctionLines = []; currFunctionLineNum = i + 1; } if (!inFunction || !mainPass) { if (inContinual || /^\ +(to|catch |filter |cleanup).*/.test(line)) { // to after invoke or landingpad second line ret.slice(-1)[0].lineText += line; if (/^\ +\]/.test(line)) { // end of llvm switch inContinual = false; } } else { ret.push({ lineText: line, lineNum: i + 1 + baseLineNums[baseLineNumPosition][1] - baseLineNums[baseLineNumPosition][0] }); if (/^\ +switch\ .*/.test(line)) { // beginning of llvm switch inContinual = true; } } } else { currFunctionLines.push(line); } if (mainPass && /^}.*/.test(line)) { inFunction = false; if (mainPass) { var func = funcHeader.processItem(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true))[0]; if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) { warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data'); SKIP_STACK_IN_SMALL = 0; } unparsedBundles.push({ intertype: 'unparsedFunction', // We need this early, to know basic function info - ident, params, varargs ident: toNiceIdent(func.ident), params: func.params, hasVarArgs: func.hasVarArgs, lineNum: currFunctionLineNum, lines: currFunctionLines }); currFunctionLines = []; } } } // We need lines beginning with ';' inside functions, because older LLVM versions generated labels that way. But when not // parsing functions, we can ignore all such lines and save some time that way. this.forwardItems(ret.filter(function(item) { return item.lineText && (item.lineText[0] != ';' || !mainPass); }), 'Tokenizer'); return unparsedBundles; } }); // Line tokenizer tokenizer = substrate.addActor('Tokenizer', { processItem: function _tokenizer(item, inner) { //assert(item.lineNum != 40000); //if (item.lineNum) print(item.lineNum); var tokens = []; var quotes = 0; var lastToken = null; var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied // Note: '{' is not an encloser, as its use in functions is split over many lines var enclosers = { '[': 0, ']': '[', '(': 0, ')': '(', '<': 0, '>': '<' }; var totalEnclosing = 0; var that = this; function makeToken(text) { if (text.length == 0) return; // merge certain tokens if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.exec(text) ) ) { lastToken.text += text; return; } var token = { text: text }; if (text[0] in enclosers) { token.item = that.processItem({ lineText: text.substr(1, text.length-2) }, true); token.type = text[0]; } // merge certain tokens if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { lastToken.text += ' ' + text; } else if (lastToken && /^}\**$/.exec(text)) { // }, }*, etc. var openBrace = tokens.length-1; while (tokens[openBrace].text.substr(-1) != '{') openBrace --; token = combineTokens(tokens.slice(openBrace+1)); tokens.splice(openBrace, tokens.length-openBrace+1); tokens.push(token); token.type = '{'; token.text = '{ ' + token.text + ' }'; var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text); while (pointingLevelsToAdd > 0) { token.text += '*'; pointingLevelsToAdd--; } lastToken = token; } else { tokens.push(token); lastToken = token; } } // Split using meaningful characters var lineText = item.lineText + ' '; var re = /[\[\]\(\)<>, "]/g; var segments = lineText.split(re); segments.pop(); var len = segments.length; var i = -1; var curr = ''; var segment, letter; for (var s = 0; s < len; s++) { segment = segments[s]; i += segment.length + 1; letter = lineText[i]; curr += segment; switch (letter) { case ' ': if (totalEnclosing == 0 && quotes == 0) { makeToken(curr); curr = ''; } else { curr += ' '; } break; case '"': if (totalEnclosing == 0) { if (quotes == 0) { if (curr == '@' || curr == '%') { curr += '"'; } else { makeToken(curr); curr = '"'; } } else { makeToken(curr + '"'); curr = ''; } } else { curr += '"'; } quotes = 1-quotes; break; case ',': if (totalEnclosing == 0 && quotes == 0) { makeToken(curr); curr = ''; tokens.push({ text: ',' }); } else { curr += ','; } break; default: assert(letter in enclosers); if (quotes) { curr += letter; break; } if (letter in ENCLOSER_STARTERS) { if (totalEnclosing == 0) { makeToken(curr); curr = ''; } curr += letter; enclosers[letter]++; totalEnclosing++; } else { enclosers[enclosers[letter]]--; totalEnclosing--; if (totalEnclosing == 0) { makeToken(curr + letter); curr = ''; } else { curr += letter; } } } } var newItem = { tokens: tokens, indent: lineText.search(/[^ ]/), lineNum: item.lineNum }; if (inner) { return newItem; } else { this.forwardItem(newItem, 'Triager'); } return null; } }); substrate.addActor('Triager', { processItem: function _triager(item) { function triage() { assert(!item.intertype); var token0Text = item.tokens[0].text; var token1Text = item.tokens[1] ? item.tokens[1].text : null; var tokensLength = item.tokens.length; if (item.indent === 2) { if (tokensLength >= 5 && (token0Text == 'store' || token1Text == 'store')) return 'Store'; if (tokensLength >= 3 && token0Text == 'br') return 'Branch'; if (tokensLength >= 2 && token0Text == 'ret') return 'Return'; if (tokensLength >= 2 && token0Text == 'switch') return 'Switch'; if (token0Text == 'unreachable') return 'Unreachable'; if (tokensLength >= 3 && token0Text == 'indirectbr') return 'IndirectBr'; if (tokensLength >= 2 && token0Text == 'resume') return 'Resume'; if (tokensLength >= 3 && (token0Text == 'load' || token1Text == 'load')) return 'Load'; if (tokensLength >= 3 && token0Text in MATHOPS) return 'Mathops'; if (tokensLength >= 3 && token0Text == 'bitcast') return 'Bitcast'; if (tokensLength >= 3 && token0Text == 'getelementptr') return 'GEP'; if (tokensLength >= 2 && token0Text == 'alloca') return 'Alloca'; if (tokensLength >= 3 && token0Text == 'extractvalue') return 'ExtractValue'; if (tokensLength >= 3 && token0Text == 'insertvalue') return 'InsertValue'; if (tokensLength >= 3 && token0Text == 'phi') return 'Phi'; if (tokensLength >= 3 && token0Text == 'landingpad') return 'Landingpad'; if (token0Text == 'fence') return '/dev/null'; } else if (item.indent === 0) { if ((tokensLength >= 1 && token0Text.substr(-1) == ':') || (tokensLength >= 3 && token1Text == '