diff options
Diffstat (limited to 'src/intertyper.js')
-rw-r--r-- | src/intertyper.js | 1733 |
1 files changed, 826 insertions, 907 deletions
diff --git a/src/intertyper.js b/src/intertyper.js index 082fd993..e43cc298 100644 --- a/src/intertyper.js +++ b/src/intertyper.js @@ -3,10 +3,149 @@ // LLVM assembly => internal intermediate representation, which is ready // to be processed by the later stages. -var tokenizer; // TODO: Clean this up/out - // XXX In particular, this closes over the substrate, which can keep stuff in memory, which is bad +// Line tokenizer +function tokenizer(item, inner) { + //assert(item.lineNum != 40000); + //if (item.lineNum) print(item.lineNum); + var tokens = []; + var quotes = 0; + var lastToken = null; + var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied + // Note: '{' is not an encloser, as its use in functions is split over many lines + var enclosers = { + '[': 0, + ']': '[', + '(': 0, + ')': '(', + '<': 0, + '>': '<' + }; + var totalEnclosing = 0; + function makeToken(text) { + if (text.length == 0) return; + // merge certain tokens + if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) { + lastToken.text += text; + return; + } + + var token = { + text: text + }; + if (text[0] in enclosers) { + token.item = tokenizer({ + lineText: text.substr(1, text.length-2) + }, true); + token.type = text[0]; + } + // merge certain tokens + if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { + lastToken.text += ' ' + text; + } else if (lastToken && text[0] == '}') { // }, }*, etc. + var openBrace = tokens.length-1; + while (tokens[openBrace].text.substr(-1) != '{') openBrace --; + token = combineTokens(tokens.slice(openBrace+1)); + tokens.splice(openBrace, tokens.length-openBrace+1); + tokens.push(token); + token.type = '{'; + token.text = '{ ' + token.text + ' }'; + var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text); + while (pointingLevelsToAdd > 0) { + token.text += '*'; + pointingLevelsToAdd--; + } + lastToken = token; + } else { + tokens.push(token); + lastToken = token; + } + } + // Split using meaningful characters + var lineText = item.lineText + ' '; + var re = /[\[\]\(\)<>, "]/g; + var segments = lineText.split(re); + segments.pop(); + var len = segments.length; + var i = -1; + var curr = ''; + var segment, letter; + for (var s = 0; s < len; s++) { + segment = segments[s]; + i += segment.length + 1; + letter = lineText[i]; + curr += segment; + switch (letter) { + case ' ': + if (totalEnclosing == 0 && quotes == 0) { + makeToken(curr); + curr = ''; + } else { + curr += ' '; + } + break; + case '"': + if (totalEnclosing == 0) { + if (quotes == 0) { + if (curr == '@' || curr == '%') { + curr += '"'; + } else { + makeToken(curr); + curr = '"'; + } + } else { + makeToken(curr + '"'); + curr = ''; + } + } else { + curr += '"'; + } + quotes = 1-quotes; + break; + case ',': + if (totalEnclosing == 0 && quotes == 0) { + makeToken(curr); + curr = ''; + tokens.push({ text: ',' }); + } else { + curr += ','; + } + break; + default: + assert(letter in enclosers); + if (quotes) { + curr += letter; + break; + } + if (letter in ENCLOSER_STARTERS) { + if (totalEnclosing == 0) { + makeToken(curr); + curr = ''; + } + curr += letter; + enclosers[letter]++; + totalEnclosing++; + } else { + enclosers[enclosers[letter]]--; + totalEnclosing--; + if (totalEnclosing == 0) { + makeToken(curr + letter); + curr = ''; + } else { + curr += letter; + } + } + } + } + var newItem = { + tokens: tokens, + indent: lineText.search(/[^ ]/), + lineNum: item.lineNum + }; + return newItem; +} + function tokenize(text) { - return tokenizer.processItem({ lineText: text }, true); + return tokenizer({ lineText: text }, true); } // Handy sets @@ -22,672 +161,487 @@ var NSW_NUW = set('nsw', 'nuw'); // Intertyper -function intertyper(data, sidePass, baseLineNums) { +function intertyper(lines, sidePass, baseLineNums) { var mainPass = !sidePass; baseLineNums = baseLineNums || [[0,0]]; // each pair [#0,#1] means "starting from line #0, the base line num is #1" dprint('framework', 'Big picture: Starting intertyper, main pass=' + mainPass); - // Substrate - - var substrate = new Substrate('Intertyper'); + var finalResults = []; - // Line splitter. We break off some bunches of lines into unparsedBundles, which are + // Line splitter. We break off some bunches of lines into unparsed bundles, which are // parsed in separate passes later. This helps to keep memory usage low - we can start // from raw lines and end up with final JS for each function individually that way, instead // of intertyping them all, then analyzing them all, etc. - substrate.addActor('LineSplitter', { - processItem: function _lineSplitter(item) { - var lines = item.llvmLines; - var ret = []; - var inContinual = false; - var inFunction = false; - var currFunctionLines; - var currFunctionLineNum; - var unparsedBundles = []; - var unparsedTypes, unparsedGlobals; - if (mainPass) { - unparsedTypes = { - intertype: 'unparsedTypes', - lines: [] - }; - unparsedBundles.push(unparsedTypes); - unparsedGlobals = { - intertype: 'unparsedGlobals', - lines: [] - }; - unparsedBundles.push(unparsedGlobals); - } - var baseLineNumPosition = 0; - for (var i = 0; i < lines.length; i++) { - var line = lines[i]; - if (singlePhase) lines[i] = null; // lines may be very very large. Allow GCing to occur in the loop by releasing refs here + function lineSplitter() { + var ret = []; + var inContinual = false; + var inFunction = false; + var currFunctionLines; + var currFunctionLineNum; + var unparsedTypes, unparsedGlobals; + if (mainPass) { + unparsedTypes = { + intertype: 'unparsedTypes', + lines: [] + }; + finalResults.push(unparsedTypes); + unparsedGlobals = { + intertype: 'unparsedGlobals', + lines: [] + }; + finalResults.push(unparsedGlobals); + } + var baseLineNumPosition = 0; + for (var i = 0; i < lines.length; i++) { + var line = lines[i]; + if (singlePhase) lines[i] = null; // lines may be very very large. Allow GCing to occur in the loop by releasing refs here - while (baseLineNumPosition < baseLineNums.length-1 && i >= baseLineNums[baseLineNumPosition+1][0]) { - baseLineNumPosition++; - } + while (baseLineNumPosition < baseLineNums.length-1 && i >= baseLineNums[baseLineNumPosition+1][0]) { + baseLineNumPosition++; + } - if (mainPass && (line[0] == '%' || line[0] == '@')) { - // If this isn't a type, it's a global variable, make a note of the information now, we will need it later - var parts = line.split(' = '); - assert(parts.length >= 2); - var left = parts[0], right = parts.slice(1).join(' = '); - var testType = /^type .*/.exec(right); - if (!testType) { - var globalIdent = toNiceIdent(left); - var testAlias = /^(hidden )?alias .*/.exec(right); - Variables.globals[globalIdent] = { - name: globalIdent, - alias: !!testAlias, - impl: VAR_EMULATED - }; - unparsedGlobals.lines.push(line); - } else { - unparsedTypes.lines.push(line); - } - continue; - } - if (mainPass && /^define .*/.test(line)) { - inFunction = true; - currFunctionLines = []; - currFunctionLineNum = i + 1; + if (mainPass && (line[0] == '%' || line[0] == '@')) { + // If this isn't a type, it's a global variable, make a note of the information now, we will need it later + var parts = line.split(' = '); + assert(parts.length >= 2); + var left = parts[0], right = parts.slice(1).join(' = '); + var testType = /^type .*/.exec(right); + if (!testType) { + var globalIdent = toNiceIdent(left); + var testAlias = /^(hidden )?alias .*/.exec(right); + Variables.globals[globalIdent] = { + name: globalIdent, + alias: !!testAlias, + impl: VAR_EMULATED + }; + unparsedGlobals.lines.push(line); + } else { + unparsedTypes.lines.push(line); } - if (!inFunction || !mainPass) { - if (inContinual || /^\ +(to|catch |filter |cleanup).*/.test(line)) { - // to after invoke or landingpad second line - ret.slice(-1)[0].lineText += line; - if (/^\ +\]/.test(line)) { // end of llvm switch - inContinual = false; - } - } else { - ret.push({ - lineText: line, - lineNum: i + 1 + baseLineNums[baseLineNumPosition][1] - baseLineNums[baseLineNumPosition][0] - }); - if (/^\ +switch\ .*/.test(line)) { - // beginning of llvm switch - inContinual = true; - } + continue; + } + if (mainPass && /^define .*/.test(line)) { + inFunction = true; + currFunctionLines = []; + currFunctionLineNum = i + 1; + } + if (!inFunction || !mainPass) { + if (inContinual || /^\ +(to|catch |filter |cleanup).*/.test(line)) { + // to after invoke or landingpad second line + ret.slice(-1)[0].lineText += line; + if (/^\ +\]/.test(line)) { // end of llvm switch + inContinual = false; } } else { - currFunctionLines.push(line); + ret.push({ + lineText: line, + lineNum: i + 1 + baseLineNums[baseLineNumPosition][1] - baseLineNums[baseLineNumPosition][0] + }); + if (/^\ +switch\ .*/.test(line)) { + // beginning of llvm switch + inContinual = true; + } } - if (mainPass && /^}.*/.test(line)) { - inFunction = false; - if (mainPass) { - var func = funcHeader.processItem(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true))[0]; + } else { + currFunctionLines.push(line); + } + if (mainPass && /^}.*/.test(line)) { + inFunction = false; + if (mainPass) { + var func = funcHeaderHandler(tokenizer({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true)); - if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) { - warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data'); - SKIP_STACK_IN_SMALL = 0; - } + if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) { + warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data'); + SKIP_STACK_IN_SMALL = 0; + } - var ident = toNiceIdent(func.ident); - if (!(ident in DEAD_FUNCTIONS)) { - unparsedBundles.push({ - intertype: 'unparsedFunction', - // We need this early, to know basic function info - ident, params, varargs - ident: ident, - params: func.params, - returnType: func.returnType, - hasVarArgs: func.hasVarArgs, - lineNum: currFunctionLineNum, - lines: currFunctionLines - }); - } - currFunctionLines = []; + var ident = toNiceIdent(func.ident); + if (!(ident in DEAD_FUNCTIONS)) { + finalResults.push({ + intertype: 'unparsedFunction', + // We need this early, to know basic function info - ident, params, varargs + ident: ident, + params: func.params, + returnType: func.returnType, + hasVarArgs: func.hasVarArgs, + lineNum: currFunctionLineNum, + lines: currFunctionLines + }); } + currFunctionLines = []; } } - // We need lines beginning with ';' inside functions, because older LLVM versions generated labels that way. But when not - // parsing functions, we can ignore all such lines and save some time that way. - this.forwardItems(ret.filter(function(item) { return item.lineText && (item.lineText[0] != ';' || !mainPass); }), 'Tokenizer'); - return unparsedBundles; } - }); - - // Line tokenizer - tokenizer = substrate.addActor('Tokenizer', { - processItem: function _tokenizer(item, inner) { - //assert(item.lineNum != 40000); - //if (item.lineNum) print(item.lineNum); - var tokens = []; - var quotes = 0; - var lastToken = null; - var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied - // Note: '{' is not an encloser, as its use in functions is split over many lines - var enclosers = { - '[': 0, - ']': '[', - '(': 0, - ')': '(', - '<': 0, - '>': '<' - }; - var totalEnclosing = 0; - var that = this; - function makeToken(text) { - if (text.length == 0) return; - // merge certain tokens - if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) { - lastToken.text += text; - return; - } + // We need lines beginning with ';' inside functions, because older LLVM versions generated labels that way. But when not + // parsing functions, we can ignore all such lines and save some time that way. + return ret.filter(function(item) { return item.lineText && (item.lineText[0] != ';' || !mainPass); }); + } - var token = { - text: text - }; - if (text[0] in enclosers) { - token.item = that.processItem({ - lineText: text.substr(1, text.length-2) - }, true); - token.type = text[0]; - } - // merge certain tokens - if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { - lastToken.text += ' ' + text; - } else if (lastToken && text[0] == '}') { // }, }*, etc. - var openBrace = tokens.length-1; - while (tokens[openBrace].text.substr(-1) != '{') openBrace --; - token = combineTokens(tokens.slice(openBrace+1)); - tokens.splice(openBrace, tokens.length-openBrace+1); - tokens.push(token); - token.type = '{'; - token.text = '{ ' + token.text + ' }'; - var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text); - while (pointingLevelsToAdd > 0) { - token.text += '*'; - pointingLevelsToAdd--; - } - lastToken = token; - } else { - tokens.push(token); - lastToken = token; - } + function triager(item) { + assert(!item.intertype); + if (item.indent == 2 && (eq = findTokenText(item, '=')) >= 0) { + item.assignTo = toNiceIdent(combineTokens(item.tokens.slice(0, eq)).text); + item.tokens = item.tokens.slice(eq+1); + } + var token0Text = item.tokens[0].text; + var token1Text = item.tokens[1] ? item.tokens[1].text : null; + var tokensLength = item.tokens.length; + if (item.indent === 2) { + if (tokensLength >= 5 && + (token0Text == 'store' || token1Text == 'store')) + return storeHandler(item); + if (tokensLength >= 3 && token0Text == 'br') + return branchHandler(item); + if (tokensLength >= 2 && token0Text == 'ret') + return returnHandler(item); + if (tokensLength >= 2 && token0Text == 'switch') + return switchHandler(item); + if (token0Text == 'unreachable') + return unreachableHandler(item); + if (tokensLength >= 3 && token0Text == 'indirectbr') + return indirectBrHandler(item); + if (tokensLength >= 2 && token0Text == 'resume') + return resumeHandler(item); + if (tokensLength >= 3 && + (token0Text == 'load' || token1Text == 'load')) + return loadHandler(item); + if (tokensLength >= 3 && + token0Text in MATHOPS) + return mathopsHandler(item); + if (tokensLength >= 3 && token0Text == 'bitcast') + return bitcastHandler(item); + if (tokensLength >= 3 && token0Text == 'getelementptr') + return GEPHandler(item); + if (tokensLength >= 2 && token0Text == 'alloca') + return allocaHandler(item); + if (tokensLength >= 3 && token0Text == 'extractvalue') + return extractValueHandler(item); + if (tokensLength >= 3 && token0Text == 'insertvalue') + return insertValueHandler(item); + if (tokensLength >= 3 && token0Text == 'phi') + return phiHandler(item); + if (tokensLength >= 3 && token0Text == 'va_arg') + return va_argHandler(item); + if (tokensLength >= 3 && token0Text == 'landingpad') + return landingpadHandler(item); + if (token0Text == 'fence') + return null; + } else if (item.indent === 0) { + if ((tokensLength >= 1 && token0Text.substr(-1) == ':') || + (tokensLength >= 3 && token1Text == '<label>') || + (tokensLength >= 2 && token1Text == ':')) + return labelHandler(item); + if (tokensLength >= 4 && token0Text == 'declare') + return externalHandler(item); + if (tokensLength >= 3 && token1Text == '=') + return globalHandler(item); + if (tokensLength >= 4 && token0Text == 'define' && + item.tokens.slice(-1)[0].text == '{') + return funcHeaderHandler(item); + if (tokensLength >= 1 && token0Text == '}') + return funcEndHandler(item); + if (token0Text == 'module' && token1Text == 'asm') { + warn('Ignoring module asm: ' + item.tokens[2].text); + return null; } - // Split using meaningful characters - var lineText = item.lineText + ' '; - var re = /[\[\]\(\)<>, "]/g; - var segments = lineText.split(re); - segments.pop(); - var len = segments.length; - var i = -1; - var curr = ''; - var segment, letter; - for (var s = 0; s < len; s++) { - segment = segments[s]; - i += segment.length + 1; - letter = lineText[i]; - curr += segment; - switch (letter) { - case ' ': - if (totalEnclosing == 0 && quotes == 0) { - makeToken(curr); - curr = ''; - } else { - curr += ' '; - } - break; - case '"': - if (totalEnclosing == 0) { - if (quotes == 0) { - if (curr == '@' || curr == '%') { - curr += '"'; - } else { - makeToken(curr); - curr = '"'; - } - } else { - makeToken(curr + '"'); - curr = ''; - } - } else { - curr += '"'; - } - quotes = 1-quotes; - break; - case ',': - if (totalEnclosing == 0 && quotes == 0) { - makeToken(curr); - curr = ''; - tokens.push({ text: ',' }); - } else { - curr += ','; - } - break; - default: - assert(letter in enclosers); - if (quotes) { - curr += letter; - break; - } - if (letter in ENCLOSER_STARTERS) { - if (totalEnclosing == 0) { - makeToken(curr); - curr = ''; - } - curr += letter; - enclosers[letter]++; - totalEnclosing++; - } else { - enclosers[enclosers[letter]]--; - totalEnclosing--; - if (totalEnclosing == 0) { - makeToken(curr + letter); - curr = ''; - } else { - curr += letter; - } - } + if (token0Text == 'attributes') + return null; + } + if (tokensLength >= 3 && (token0Text == 'call' || token1Text == 'call')) + return callHandler(item); + if (token0Text == 'target') { + if (token1Text == 'triple') { + var triple = item.tokens[3].text; + triple = triple.substr(1, triple.length-2); + var expected = TARGET_LE32 ? 'le32-unknown-nacl' : 'i386-pc-linux-gnu'; + if (triple !== expected) { + warn('using an unexpected LLVM triple: ' + [triple, ' !== ', expected] + ' (are you using emcc for everything and not clang?)'); } } - var newItem = { - tokens: tokens, - indent: lineText.search(/[^ ]/), - lineNum: item.lineNum - }; - if (inner) { - return newItem; - } else { - this.forwardItem(newItem, 'Triager'); - } return null; } - }); + if (token0Text == ';') + return null; + if (tokensLength >= 3 && token0Text == 'invoke') + return invokeHandler(item); + if (tokensLength >= 3 && token0Text == 'atomicrmw' || token0Text == 'cmpxchg') + return atomicHandler(item); + throw 'Invalid token, cannot triage: ' + dump(item); + } - substrate.addActor('Triager', { - processItem: function _triager(item) { - function triage() { - assert(!item.intertype); - var token0Text = item.tokens[0].text; - var token1Text = item.tokens[1] ? item.tokens[1].text : null; - var tokensLength = item.tokens.length; - if (item.indent === 2) { - if (tokensLength >= 5 && - (token0Text == 'store' || token1Text == 'store')) - return 'Store'; - if (tokensLength >= 3 && token0Text == 'br') - return 'Branch'; - if (tokensLength >= 2 && token0Text == 'ret') - return 'Return'; - if (tokensLength >= 2 && token0Text == 'switch') - return 'Switch'; - if (token0Text == 'unreachable') - return 'Unreachable'; - if (tokensLength >= 3 && token0Text == 'indirectbr') - return 'IndirectBr'; - if (tokensLength >= 2 && token0Text == 'resume') - return 'Resume'; - if (tokensLength >= 3 && - (token0Text == 'load' || token1Text == 'load')) - return 'Load'; - if (tokensLength >= 3 && - token0Text in MATHOPS) - return 'Mathops'; - if (tokensLength >= 3 && token0Text == 'bitcast') - return 'Bitcast'; - if (tokensLength >= 3 && token0Text == 'getelementptr') - return 'GEP'; - if (tokensLength >= 2 && token0Text == 'alloca') - return 'Alloca'; - if (tokensLength >= 3 && token0Text == 'extractvalue') - return 'ExtractValue'; - if (tokensLength >= 3 && token0Text == 'insertvalue') - return 'InsertValue'; - if (tokensLength >= 3 && token0Text == 'phi') - return 'Phi'; - if (tokensLength >= 3 && token0Text == 'va_arg') - return 'va_arg'; - if (tokensLength >= 3 && token0Text == 'landingpad') - return 'Landingpad'; - if (token0Text == 'fence') - return '/dev/null'; - } else if (item.indent === 0) { - if ((tokensLength >= 1 && token0Text.substr(-1) == ':') || - (tokensLength >= 3 && token1Text == '<label>') || - (tokensLength >= 2 && token1Text == ':')) - return 'Label'; - if (tokensLength >= 4 && token0Text == 'declare') - return 'External'; - if (tokensLength >= 3 && token1Text == '=') - return 'Global'; - if (tokensLength >= 4 && token0Text == 'define' && - item.tokens.slice(-1)[0].text == '{') - return 'FuncHeader'; - if (tokensLength >= 1 && token0Text == '}') - return 'FuncEnd'; - if (token0Text == 'module' && token1Text == 'asm') { - warn('Ignoring module asm: ' + item.tokens[2].text); - return '/dev/null'; + // Line parsers to intermediate form + + // globals: type or variable + function globalHandler(item) { + function scanConst(value, type) { + // Gets an array of constant items, separated by ',' tokens + function handleSegments(tokens) { + // Handle a single segment (after comma separation) + function handleSegment(segment) { + if (segment[1].text == 'null') { + return { intertype: 'value', ident: '0', type: 'i32' }; + } else if (segment[1].text == 'zeroinitializer') { + Types.needAnalysis[segment[0].text] = 0; + return { intertype: 'emptystruct', type: segment[0].text }; + } else if (segment[1].text in PARSABLE_LLVM_FUNCTIONS) { + return parseLLVMFunctionCall(segment); + } else if (segment[1].type && segment[1].type == '{') { + Types.needAnalysis[segment[0].text] = 0; + return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].tokens) }; + } else if (segment[1].type && segment[1].type == '<') { + Types.needAnalysis[segment[0].text] = 0; + return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].item.tokens[0].tokens) }; + } else if (segment[1].type && segment[1].type == '[') { + Types.needAnalysis[segment[0].text] = 0; + return { intertype: 'list', type: segment[0].text, contents: handleSegments(segment[1].item.tokens) }; + } else if (segment.length == 2) { + Types.needAnalysis[segment[0].text] = 0; + return { intertype: 'value', type: segment[0].text, ident: toNiceIdent(segment[1].text) }; + } else if (segment[1].text === 'c') { + // string + var text = segment[2].text; + text = text.substr(1, text.length-2); + return { intertype: 'string', text: text, type: 'i8*' }; + } else if (segment[1].text === 'blockaddress') { + return parseBlockAddress(segment); + } else { + throw 'Invalid segment: ' + dump(segment); } - if (token0Text == 'attributes') - return '/dev/null'; + }; + return splitTokenList(tokens).map(handleSegment); + } + + Types.needAnalysis[type] = 0; + if (Runtime.isNumberType(type) || pointingLevels(type) >= 1) { + return { value: toNiceIdent(value.text), type: type }; + } else if (value.text in ZEROINIT_UNDEF) { // undef doesn't really need initting, but why not + return { intertype: 'emptystruct', type: type }; + } else if (value.text && value.text[0] == '"') { + return { intertype: 'string', text: value.text.substr(1, value.text.length-2) }; + } else { + if (value.type == '<') { // <{ i8 }> etc. + value = value.item.tokens; } - if (tokensLength >= 3 && (token0Text == 'call' || token1Text == 'call')) - return 'Call'; - if (token0Text == 'target') { - if (token1Text == 'triple') { - var triple = item.tokens[3].text; - triple = triple.substr(1, triple.length-2); - var expected = TARGET_LE32 ? 'le32-unknown-nacl' : 'i386-pc-linux-gnu'; - if (triple !== expected) { - warn('using an unexpected LLVM triple: ' + [triple, ' !== ', expected] + ' (are you using emcc for everything and not clang?)'); - } - } - return '/dev/null'; + var contents; + if (value.item) { + // list of items + contents = value.item.tokens; + } else if (value.type == '{') { + // struct + contents = value.tokens; + } else if (value[0]) { + contents = value[0]; + } else { + throw '// interfailzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value); } - if (token0Text == ';') - return '/dev/null'; - if (tokensLength >= 3 && token0Text == 'invoke') - return 'Invoke'; - if (tokensLength >= 3 && token0Text == 'atomicrmw' || token0Text == 'cmpxchg') - return 'Atomic'; - throw 'Invalid token, cannot triage: ' + dump(item); + return { intertype: 'segments', contents: handleSegments(contents) }; } - var eq; - if (item.indent == 2 && (eq = findTokenText(item, '=')) >= 0) { - item.assignTo = toNiceIdent(combineTokens(item.tokens.slice(0, eq)).text); - item.tokens = item.tokens.slice(eq+1); - } - this.forwardItem(item, triage()); } - }); - // Line parsers to intermediate form - - // globals: type or variable - substrate.addActor('Global', { - processItem: function _global(item) { - function scanConst(value, type) { - // Gets an array of constant items, separated by ',' tokens - function handleSegments(tokens) { - // Handle a single segment (after comma separation) - function handleSegment(segment) { - if (segment[1].text == 'null') { - return { intertype: 'value', ident: '0', type: 'i32' }; - } else if (segment[1].text == 'zeroinitializer') { - Types.needAnalysis[segment[0].text] = 0; - return { intertype: 'emptystruct', type: segment[0].text }; - } else if (segment[1].text in PARSABLE_LLVM_FUNCTIONS) { - return parseLLVMFunctionCall(segment); - } else if (segment[1].type && segment[1].type == '{') { - Types.needAnalysis[segment[0].text] = 0; - return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].tokens) }; - } else if (segment[1].type && segment[1].type == '<') { - Types.needAnalysis[segment[0].text] = 0; - return { intertype: 'struct', type: segment[0].text, contents: handleSegments(segment[1].item.tokens[0].tokens) }; - } else if (segment[1].type && segment[1].type == '[') { - Types.needAnalysis[segment[0].text] = 0; - return { intertype: 'list', type: segment[0].text, contents: handleSegments(segment[1].item.tokens) }; - } else if (segment.length == 2) { - Types.needAnalysis[segment[0].text] = 0; - return { intertype: 'value', type: segment[0].text, ident: toNiceIdent(segment[1].text) }; - } else if (segment[1].text === 'c') { - // string - var text = segment[2].text; - text = text.substr(1, text.length-2); - return { intertype: 'string', text: text, type: 'i8*' }; - } else if (segment[1].text === 'blockaddress') { - return parseBlockAddress(segment); - } else { - throw 'Invalid segment: ' + dump(segment); - } - }; - return splitTokenList(tokens).map(handleSegment); + cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 2); + if (item.tokens[2].text == 'alias') { + cleanOutTokens(LLVM.LINKAGES, item.tokens, 3); + cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 3); + var last = getTokenIndexByText(item.tokens, ';'); + var ret = { + intertype: 'alias', + ident: toNiceIdent(item.tokens[0].text), + value: parseLLVMSegment(item.tokens.slice(3, last)), + lineNum: item.lineNum + }; + ret.type = ret.value.type; + Types.needAnalysis[ret.type] = 0; + if (!NAMED_GLOBALS) { + Variables.globals[ret.ident].type = ret.type; + } + return ret; + } + if (item.tokens[2].text == 'type') { + var fields = []; + var packed = false; + if (Runtime.isNumberType(item.tokens[3].text)) { + // Clang sometimes has |= i32| instead of |= { i32 }| + fields = [item.tokens[3].text]; + } else if (item.tokens[3].text != 'opaque') { + if (item.tokens[3].type == '<') { + packed = true; + item.tokens[3] = item.tokens[3].item.tokens[0]; } - - Types.needAnalysis[type] = 0; - if (Runtime.isNumberType(type) || pointingLevels(type) >= 1) { - return { value: toNiceIdent(value.text), type: type }; - } else if (value.text in ZEROINIT_UNDEF) { // undef doesn't really need initting, but why not - return { intertype: 'emptystruct', type: type }; - } else if (value.text && value.text[0] == '"') { - return { intertype: 'string', text: value.text.substr(1, value.text.length-2) }; - } else { - if (value.type == '<') { // <{ i8 }> etc. - value = value.item.tokens; + var subTokens = item.tokens[3].tokens; + if (subTokens) { + subTokens.push({text:','}); + while (subTokens[0]) { + var stop = 1; + while ([','].indexOf(subTokens[stop].text) == -1) stop ++; + fields.push(combineTokens(subTokens.slice(0, stop)).text); + subTokens.splice(0, stop+1); } - var contents; - if (value.item) { - // list of items - contents = value.item.tokens; - } else if (value.type == '{') { - // struct - contents = value.tokens; - } else if (value[0]) { - contents = value[0]; - } else { - throw '// interfailzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value); - } - return { intertype: 'segments', contents: handleSegments(contents) }; } } - - cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 2); - if (item.tokens[2].text == 'alias') { - cleanOutTokens(LLVM.LINKAGES, item.tokens, 3); - cleanOutTokens(LLVM.VISIBILITIES, item.tokens, 3); - var last = getTokenIndexByText(item.tokens, ';'); - var ret = { - intertype: 'alias', - ident: toNiceIdent(item.tokens[0].text), - value: parseLLVMSegment(item.tokens.slice(3, last)), - lineNum: item.lineNum - }; - ret.type = ret.value.type; - Types.needAnalysis[ret.type] = 0; - if (!NAMED_GLOBALS) { - Variables.globals[ret.ident].type = ret.type; - } - return [ret]; + return { + intertype: 'type', + name_: item.tokens[0].text, + fields: fields, + packed: packed, + lineNum: item.lineNum + }; + } else { + // variable + var ident = item.tokens[0].text; + var private_ = findTokenText(item, 'private') >= 0 || findTokenText(item, 'internal') >= 0; + var named = findTokenText(item, 'unnamed_addr') < 0; + cleanOutTokens(LLVM.GLOBAL_MODIFIERS, item.tokens, [2, 3]); + var external = false; + if (item.tokens[2].text === 'external') { + external = true; + item.tokens.splice(2, 1); } - if (item.tokens[2].text == 'type') { - var fields = []; - var packed = false; - if (Runtime.isNumberType(item.tokens[3].text)) { - // Clang sometimes has |= i32| instead of |= { i32 }| - fields = [item.tokens[3].text]; - } else if (item.tokens[3].text != 'opaque') { - if (item.tokens[3].type == '<') { - packed = true; - item.tokens[3] = item.tokens[3].item.tokens[0]; - } - var subTokens = item.tokens[3].tokens; - if (subTokens) { - subTokens.push({text:','}); - while (subTokens[0]) { - var stop = 1; - while ([','].indexOf(subTokens[stop].text) == -1) stop ++; - fields.push(combineTokens(subTokens.slice(0, stop)).text); - subTokens.splice(0, stop+1); + Types.needAnalysis[item.tokens[2].text] = 0; + var ret = { + intertype: 'globalVariable', + ident: toNiceIdent(ident), + type: item.tokens[2].text, + external: external, + private_: private_, + named: named, + lineNum: item.lineNum + }; + if (!NAMED_GLOBALS) { + Variables.globals[ret.ident].type = ret.type; + Variables.globals[ret.ident].external = external; + } + Types.needAnalysis[ret.type] = 0; + if (ident == '@llvm.global_ctors') { + ret.ctors = []; + if (item.tokens[3].item) { + var subTokens = item.tokens[3].item.tokens; + splitTokenList(subTokens).forEach(function(segment) { + var ctor = toNiceIdent(segment[1].tokens.slice(-1)[0].text); + ret.ctors.push(ctor); + if (ASM_JS) { // must export the global constructors from asm.js module, so mark as implemented and exported + Functions.implementedFunctions[ctor] = 'v'; + EXPORTED_FUNCTIONS[ctor] = 1 |