diff options
author | Alon Zakai <alonzakai@gmail.com> | 2013-09-21 16:30:15 -0500 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2013-09-23 14:33:24 -0700 |
commit | d9325637cccd019c2ac0e2f277b57308e185da02 (patch) | |
tree | 19304707d0ad4535a966438869c38cb44ac6e98a | |
parent | 07135d153c5ff990a14e7458f100ad98fe9ef03a (diff) |
unwrap tokenizer
-rw-r--r-- | src/intertyper.js | 257 |
1 files changed, 127 insertions, 130 deletions
diff --git a/src/intertyper.js b/src/intertyper.js index 9c196803..13377411 100644 --- a/src/intertyper.js +++ b/src/intertyper.js @@ -4,151 +4,148 @@ // to be processed by the later stages. // Line tokenizer -var tokenizer = { - processItem: function _tokenizer(item, inner) { - //assert(item.lineNum != 40000); - //if (item.lineNum) print(item.lineNum); - var tokens = []; - var quotes = 0; - var lastToken = null; - var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied - // Note: '{' is not an encloser, as its use in functions is split over many lines - var enclosers = { - '[': 0, - ']': '[', - '(': 0, - ')': '(', - '<': 0, - '>': '<' - }; - var totalEnclosing = 0; - function makeToken(text) { - if (text.length == 0) return; - // merge certain tokens - if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) { - lastToken.text += text; - return; - } +function tokenizer(item, inner) { + //assert(item.lineNum != 40000); + //if (item.lineNum) print(item.lineNum); + var tokens = []; + var quotes = 0; + var lastToken = null; + var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied + // Note: '{' is not an encloser, as its use in functions is split over many lines + var enclosers = { + '[': 0, + ']': '[', + '(': 0, + ')': '(', + '<': 0, + '>': '<' + }; + var totalEnclosing = 0; + function makeToken(text) { + if (text.length == 0) return; + // merge certain tokens + if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) { + lastToken.text += text; + return; + } - var token = { - text: text - }; - if (text[0] in enclosers) { - token.item = tokenizer.processItem({ - lineText: text.substr(1, text.length-2) - }, true); - token.type = text[0]; - } - // merge certain tokens - if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { - lastToken.text += ' ' + text; - } else if (lastToken && text[0] == '}') { // }, }*, etc. - var openBrace = tokens.length-1; - while (tokens[openBrace].text.substr(-1) != '{') openBrace --; - token = combineTokens(tokens.slice(openBrace+1)); - tokens.splice(openBrace, tokens.length-openBrace+1); - tokens.push(token); - token.type = '{'; - token.text = '{ ' + token.text + ' }'; - var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text); - while (pointingLevelsToAdd > 0) { - token.text += '*'; - pointingLevelsToAdd--; - } - lastToken = token; - } else { - tokens.push(token); - lastToken = token; + var token = { + text: text + }; + if (text[0] in enclosers) { + token.item = tokenizer({ + lineText: text.substr(1, text.length-2) + }, true); + token.type = text[0]; + } + // merge certain tokens + if (lastToken && isType(lastToken.text) && isFunctionDef(token)) { + lastToken.text += ' ' + text; + } else if (lastToken && text[0] == '}') { // }, }*, etc. + var openBrace = tokens.length-1; + while (tokens[openBrace].text.substr(-1) != '{') openBrace --; + token = combineTokens(tokens.slice(openBrace+1)); + tokens.splice(openBrace, tokens.length-openBrace+1); + tokens.push(token); + token.type = '{'; + token.text = '{ ' + token.text + ' }'; + var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text); + while (pointingLevelsToAdd > 0) { + token.text += '*'; + pointingLevelsToAdd--; } + lastToken = token; + } else { + tokens.push(token); + lastToken = token; } - // Split using meaningful characters - var lineText = item.lineText + ' '; - var re = /[\[\]\(\)<>, "]/g; - var segments = lineText.split(re); - segments.pop(); - var len = segments.length; - var i = -1; - var curr = ''; - var segment, letter; - for (var s = 0; s < len; s++) { - segment = segments[s]; - i += segment.length + 1; - letter = lineText[i]; - curr += segment; - switch (letter) { - case ' ': - if (totalEnclosing == 0 && quotes == 0) { - makeToken(curr); - curr = ''; - } else { - curr += ' '; - } - break; - case '"': - if (totalEnclosing == 0) { - if (quotes == 0) { - if (curr == '@' || curr == '%') { - curr += '"'; - } else { - makeToken(curr); - curr = '"'; - } + } + // Split using meaningful characters + var lineText = item.lineText + ' '; + var re = /[\[\]\(\)<>, "]/g; + var segments = lineText.split(re); + segments.pop(); + var len = segments.length; + var i = -1; + var curr = ''; + var segment, letter; + for (var s = 0; s < len; s++) { + segment = segments[s]; + i += segment.length + 1; + letter = lineText[i]; + curr += segment; + switch (letter) { + case ' ': + if (totalEnclosing == 0 && quotes == 0) { + makeToken(curr); + curr = ''; + } else { + curr += ' '; + } + break; + case '"': + if (totalEnclosing == 0) { + if (quotes == 0) { + if (curr == '@' || curr == '%') { + curr += '"'; } else { - makeToken(curr + '"'); - curr = ''; + makeToken(curr); + curr = '"'; } } else { - curr += '"'; + makeToken(curr + '"'); + curr = ''; } - quotes = 1-quotes; + } else { + curr += '"'; + } + quotes = 1-quotes; + break; + case ',': + if (totalEnclosing == 0 && quotes == 0) { + makeToken(curr); + curr = ''; + tokens.push({ text: ',' }); + } else { + curr += ','; + } + break; + default: + assert(letter in enclosers); + if (quotes) { + curr += letter; break; - case ',': - if (totalEnclosing == 0 && quotes == 0) { + } + if (letter in ENCLOSER_STARTERS) { + if (totalEnclosing == 0) { makeToken(curr); curr = ''; - tokens.push({ text: ',' }); - } else { - curr += ','; - } - break; - default: - assert(letter in enclosers); - if (quotes) { - curr += letter; - break; } - if (letter in ENCLOSER_STARTERS) { - if (totalEnclosing == 0) { - makeToken(curr); - curr = ''; - } - curr += letter; - enclosers[letter]++; - totalEnclosing++; + curr += letter; + enclosers[letter]++; + totalEnclosing++; + } else { + enclosers[enclosers[letter]]--; + totalEnclosing--; + if (totalEnclosing == 0) { + makeToken(curr + letter); + curr = ''; } else { - enclosers[enclosers[letter]]--; - totalEnclosing--; - if (totalEnclosing == 0) { - makeToken(curr + letter); - curr = ''; - } else { - curr += letter; - } + curr += letter; } - } + } } - var newItem = { - tokens: tokens, - indent: lineText.search(/[^ ]/), - lineNum: item.lineNum - }; - return newItem; - return null; } -}; + var newItem = { + tokens: tokens, + indent: lineText.search(/[^ ]/), + lineNum: item.lineNum + }; + return newItem; +} function tokenize(text) { - return tokenizer.processItem({ lineText: text }, true); + return tokenizer({ lineText: text }, true); } // Handy sets @@ -252,7 +249,7 @@ function intertyper(lines, sidePass, baseLineNums) { if (mainPass && /^}.*/.test(line)) { inFunction = false; if (mainPass) { - var func = funcHeaderHandler(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true)); + var func = funcHeaderHandler(tokenizer({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true)); if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) { warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data'); @@ -991,7 +988,7 @@ function intertyper(lines, sidePass, baseLineNums) { // Input - return lineSplitter().map(tokenizer.processItem).filter(function(item) { return item }).map(triager).filter(function(result) { + return lineSplitter().map(tokenizer).filter(function(item) { return item }).map(triager).filter(function(result) { if (!result) return false; if (result.tokens) result.tokens = null; // We do not need tokens, past the intertyper. Clean them up as soon as possible here. return true; |