// LLVM assembly => internal intermediate representation, which is ready
// to be processed by the later stages.
var tokenizer; // TODO: Clean this up/out
function tokenize(text) {
return tokenizer.processItem({ lineText: text }, true);
}
//! @param parseFunctions We parse functions only on later passes, since we do not
//! want to parse all of them at once, and have all their
//! lines and data in memory at the same time.
function intertyper(data, parseFunctions, baseLineNum) {
//parseFunctions = true; // Uncomment to do all parsing in a single big RAM-heavy pass. Faster, if you have the RAM
baseLineNum = baseLineNum || 0;
// Substrate
if (LLVM_STYLE === null) {
// new = clang on 2.8, old = llvm-gcc anywhere or clang on 2.7
LLVM_STYLE = (data.indexOf('<label>') == -1 && data.indexOf('entry:') != -1) ? 'old' : 'new';
//dprint('LLVM_STYLE: ' + LLVM_STYLE);
}
// If the source contains debug info as LLVM metadata, process that out (and save the debugging info for later)
for (var i = data.length-1; i >= 0; i--) {
if (/^!\d+ = metadata .*/.exec(data[i])) {
data = Debugging.processMetadata(data);
//print(data.join('\n'));
//dprint(JSON.stringify(Debugging));
break;
}
}
substrate = new Substrate('Intertyper');
// Line splitter.
substrate.addActor('LineSplitter', {
processItem: function(item) {
var lines = item.llvmLines;
var ret = [];
var inContinual = false;
var inFunction = false;
var currFunctionLines;
var currFunctionLineNum;
var unparsedFunctions = [];
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
if (!parseFunctions && /^define .*/.test(line)) {
inFunction = true;
currFunctionLines = [];
currFunctionLineNum = i + 1;
}
if (!inFunction || parseFunctions) {
if (inContinual || new RegExp(/^\ +to.*/g).test(line)
|| new RegExp(/^\ +catch .*/g).test(line)
|| new RegExp(/^\ +filter .*/g).test(line)
|| new RegExp(/^\ +cleanup.*/g).test(line)) {
// to after invoke or landingpad second line
ret.slice(-1)[0].lineText += line;
if (new RegExp(/^\ +\]/g).test(line)) { // end of llvm switch
inContinual = false;
}
} else {
ret.push({
lineText: line,
lineNum: i + 1 + baseLineNum
});
if (new RegExp(/^\ +switch\ .*/g).test(line)) {
// beginning of llvm switch
inContinual = true;
}
}
} else {
currFunctionLines.push(line);
}
if (!parseFunctions && /^}.*/.test(line)) {
inFunction = false;
if (!parseFunctions) {
var func = funcHeader.processItem(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true))[0];
unparsedFunctions.push({
intertype: 'unparsedFunction',
// We need this early, to know basic function info - ident, params, varargs
ident: toNiceIdent(func.ident),
params: func.params,
hasVarArgs: func.hasVarArgs,
lineNum: currFunctionLineNum,
lines: currFunctionLines
});
currFunctionLines = [];
}
}
}
this.forwardItems(ret.filter(function(item) { return item.lineText; }), 'Tokenizer');
return unparsedFunctions;
}
});
var ENCLOSER_STARTERS = set('[', '(', '<');
var ENCLOSER_ENDERS = {
'[': ']',
'(': ')',
'<': '>'
};
// Line tokenizer
tokenizer = substrate.addActor('Tokenizer', {
processItem: function(item, inner) {
//assert(item.lineNum != 40000);
//if (item.lineNum) print(item.lineNum);
var tokens = [];
var quotes = 0;
var lastToken = null;
var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied
// Note: '{' is not an encloser, as its use in functions is split over many lines
var enclosers = {
'[': 0,
']': '[',
'(': 0,
')': '(',
'<': 0,
'>': '<'
};
var totalEnclosing = 0;
var that = this;
function makeToken(text) {
if (text.length == 0) return;
// merge certain tokens
if ( (lastToken &&