diff options
author | alon@honor <none@none> | 2010-09-20 18:59:15 -0700 |
---|---|---|
committer | alon@honor <none@none> | 2010-09-20 18:59:15 -0700 |
commit | fea809cb090c648f58f11561f7f25ce813d9944f (patch) | |
tree | 355bc25d3bab157799caf339f6917683aca9ac41 /src/parseTools.js | |
parent | 9e66429fa4154058798be6cc554408697cc6bfc8 (diff) |
refactor into files
--HG--
rename : src/parser.js => src/compiler.js
Diffstat (limited to 'src/parseTools.js')
-rw-r--r-- | src/parseTools.js | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/src/parseTools.js b/src/parseTools.js new file mode 100644 index 00000000..2a689889 --- /dev/null +++ b/src/parseTools.js @@ -0,0 +1,368 @@ +// Various tools for parsing llvm + +// Simple #if/else/endif preprocessing for a file. Checks if the +// ident checked is true in our global. +function preprocess(text) { + var lines = text.split('\n'); + var ret = ''; + var show = true; + for (var i = 0; i < lines.length; i++) { + var line = lines[i]; + if (line[0] != '#') { + if (show) { + ret += line + '\n'; + } + } else { + if (line[1] == 'i') { // if + var ident = line.substr(4); + show = !!this[ident]; + } else if (line[2] == 'l') { // else + show = !show; + } else if (line[2] == 'n') { // endif + show = true; + } else { + throw "Unclear preprocessor command: " + line; + } + } + } + return ret; +} + +function addPointing(type) { return type + '*' } +function removePointing(type, num) { + if (num === 0) return type; + return type.substr(0, type.length-(num ? num : 1)) +} + +function pointingLevels(type) { + if (!type) return 0; + var ret = 0; + var len1 = type.length - 1; + while (type[len1-ret] === '*') { + ret ++; + } + return ret; +} + +function toNiceIdent(ident) { + if (parseFloat(ident) == ident) return ident; + if (ident == 'null') return '0'; // see parseNumerical + return ident.replace(/[" \.@%:<>,\*]/g, '_'); +} + +function isNumberType(type) { + var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double']; + return types.indexOf(type) != -1; +} + +function isStructPointerType(type) { + // This test is necessary for clang - in llvm-gcc, we + // could check for %struct. The downside is that %1 can + // be either a variable or a structure, and we guess it is + // a struct, which can lead to |call i32 %5()| having + // |%5()| as a function call (like |i32 (i8*)| etc.). So + // we must check later on, in call(), where we have more + // context, to differentiate such cases. + // A similar thing happns in isStructType() + return !isNumberType(type) && type[0] == '%'; +} + +function isStructType(type) { + if (isPointerType(type)) return false; + if (new RegExp(/^\[\d+\ x\ (.*)\]/g).test(type)) return true; // [15 x ?] blocks. Like structs + // See comment in isStructPointerType() + return !isNumberType(type) && type[0] == '%'; +} + +function isPointerType(type) { // TODO! + return pointingLevels(type) > 0; +} + +function isVoidType(type) { + return type == 'void'; +} + +function isType(type) { // TODO! + return isVoidType(type) || isNumberType(type) || isStructType(type) || isPointerType(type); +} + +// Detects a function definition, ([...|type,[type,...]]) +function isFunctionDef(token) { + var text = token.text; + var pointing = pointingLevels(text); + var nonPointing = removePointing(text, pointing); + if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')') + return false; + if (nonPointing == '(...)') return true; + if (!token.item) return false; + var fail = false; + splitTokenList(token.item[0].tokens).forEach(function(segment) { + var subtoken = segment[0]; + fail = fail || !isType(subtoken.text) || segment.length > 1; + }); + return !fail; +} + +function addIdent(token) { + token.ident = token.text; + return token; +} + +function combineTokens(tokens) { + var ret = { + lineNum: tokens[0].lineNum, + text: '', + tokens: [], + }; + tokens.forEach(function(token) { + ret.text += token.text; + ret.tokens.push(token); + }); + return ret; +} + +function compareTokens(a, b) { + var aId = a.__uid__; + var bId = b.__uid__; + a.__uid__ = 0; + b.__uid__ = 0; + var ret = JSON.stringify(a) == JSON.stringify(b); + a.__uid__ = aId; + b.__uid__ = bId; + return ret; +} + +function getTokenIndexByText(tokens, text) { + var i = 0; + while (tokens[i].text != ';') i++; + return i; +} + +function findTokenText(item, text) { + for (var i = 0; i < item.tokens.length; i++) { + if (item.tokens[i].text == text) return i; + } + return -1; +} + +// Splits a list of tokens separated by commas. For example, a list of arguments in a function call +function splitTokenList(tokens) { + if (tokens.length == 0) return []; + if (tokens.slice(-1)[0].text != ',') tokens.push({text:','}); + var ret = []; + var seg = []; + tokens.forEach(function(token) { + if (token.text == ',') { + ret.push(seg); + seg = []; + } else { + seg.push(token); + } + }); + return ret; +} + +// Splits an item, with the intent of later reintegration +function splitItem(parent, childSlot, copySlots) { + if (!copySlots) copySlots = []; + if (!parent[childSlot]) parent[childSlot] = {}; + var child = parent[childSlot]; + parent[childSlot] = null; + child.parentUid = parent.__uid__; + child.parentSlot = childSlot; + child.parentLineNum = child.lineNum = parent.lineNum; + copySlots.forEach(function(slot) { child[slot] = parent[slot] }); + return { + parent: parent, + child: child, + }; +} + +function makeReintegrator(afterFunc) { + // reintegration - find intermediate representation-parsed items and + // place back in parents TODO: Optimize this code to optimal O(..) + return { + process: function(items) { + var ret = []; + for (var i = 0; i < items.length; i++) { + var found = false; + if (items[i] && items[i].parentSlot) { + var child = items[i]; + for (var j = 0; j < items.length; j++) { + if (items[j] && items[j].lineNum == items[i].parentLineNum) { + var parent = items[j]; + // process the pair + parent[child.parentSlot] = child; + delete child.parentLineNum; + afterFunc.call(this, parent, child); + + items[i] = null; + items[j] = null; + found = true; + break; + } + } + } + } + this.forwardItems(items.filter(function(item) { return !!item }), this.name_); // next time hopefully + return ret; + } + }; +} + +function parseParamTokens(params) { + if (params.length === 0) return []; + var ret = []; + if (params[params.length-1].text != ',') { + params.push({ text: ',' }); + } + var absIndex = 0; + while (params.length > 0) { + var i = 0; + while (params[i].text != ',') i++; + var segment = params.slice(0, i); + params = params.slice(i+1); + segment = cleanSegment(segment); + if (segment.length == 1) { + if (segment[0].text == '...') { + ret.push({ + intertype: 'varargs', + }); + } else { + // Clang sometimes has a parameter with just a type, + // no name... the name is implied to be %{the index} + ret.push({ + intertype: 'value', + type: segment[0], + value: null, + ident: '_' + absIndex, + }); + } + } else if (segment[1].text === 'getelementptr') { + ret.push(parseGetElementPtr(segment)); + } else if (segment[1].text === 'bitcast') { + ret.push(parseBitcast(segment)); + } else { + if (segment[2] && segment[2].text == 'to') { // part of bitcast params + segment = segment.slice(0, 2); + } + while (segment.length > 2) { + segment[0].text += segment[1].text; + segment.splice(1, 1); // TODO: merge tokens nicely + } + ret.push({ + intertype: 'value', + type: segment[0], + value: segment[1], + ident: segment[1].text, + }); + // } else { + // throw "what is this params token? " + JSON.stringify(segment); + } + absIndex ++; + } + return ret; +} + +function cleanSegment(segment) { + if (segment.length == 1) return segment; + while (['noalias', 'sret', 'nocapture', 'nest', 'zeroext', 'signext'].indexOf(segment[1].text) != -1) { + segment.splice(1, 1); + } + return segment; +} + +// Expects one of the several LVM getelementptr formats: +// a qualifier, a type, a null, then an () item with tokens +function parseGetElementPtr(segment) { +//print("Parse GTP: " + dump(segment)); + segment = segment.slice(0); + segment = cleanSegment(segment); + assertTrue(['inreg', 'byval'].indexOf(segment[1].text) == -1); + //dprint('// zz: ' + dump(segment) + '\n\n\n'); + var ret = { + intertype: 'getelementptr', + type: segment[0], + params: parseParamTokens(segment[3].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); + return ret; +} + +// TODO: use this +function parseBitcast(segment) { + //print('zz parseBC pre: ' + dump(segment)); + var ret = { + intertype: 'bitcast', + type: segment[0], + params: parseParamTokens(segment[2].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); +//print('zz parseBC: ' + dump(ret)); + return ret; +} + +function cleanOutTokens(filterOut, tokens, index) { + while (filterOut.indexOf(tokens[index].text) != -1) { + tokens.splice(index, 1); + } +} + +function _HexToInt(stringy) { + var ret = 0; + var mul = 1; + var base; + for (var i = (stringy.length - 1); i >= 0; i = i - 1) { + if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) { + base = "A".charCodeAt(0) - 10; + } else { + base = "0".charCodeAt(0); + } + ret = ret + (mul*(stringy.charCodeAt(i) - base)); + mul = mul * 16; + } + return ret; +} + +function IEEEUnHex(stringy) { + var a = _HexToInt(stringy.substr(2, 8)); + var b = _HexToInt(stringy.substr(10)); + var e = (a >> ((52 - 32) & 0x7ff)) - 1023; + return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e)); +} + +function parseNumerical(value, type) { + if ((!type || type == 'double' || type == 'float') && value.substr(0,2) == '0x') { + // Hexadecimal double value, as the llvm docs say, + // "The one non-intuitive notation for constants is the hexadecimal form of floating point constants." + return IEEEUnHex(value); + } + if (value == 'null') { + // NULL *is* 0, in C/C++. No JS null! (null == 0 is false, etc.) + return '0'; + } + return value; +} + +// \0Dsometext is really '\r', then sometext +// This function returns an array of int values +function parseLLVMString(str) { + var ret = []; + var i = 0; + while (i < str.length) { + var chr = str[i]; + if (chr != '\\') { + ret.push(chr.charCodeAt(0)); + i++; + } else { + ret.push(_HexToInt(str[i+1]+str[i+2])); + i += 3; + } + } + return ret; +} + +function getLabelIds(labels) { + return labels.map(function(label) { return label.ident }); +} + |