diff options
-rw-r--r-- | src/enzymatic.js | 126 | ||||
-rw-r--r-- | src/parser.js | 2286 | ||||
-rw-r--r-- | src/postamble.js | 23 | ||||
-rw-r--r-- | src/preamble.js | 198 | ||||
-rw-r--r-- | src/utility.js | 82 | ||||
-rw-r--r-- | tests/fannkuch.cpp | 159 | ||||
-rw-r--r-- | tests/fannkuch.js | 70 | ||||
-rw-r--r-- | tests/fasta.cpp | 180 | ||||
-rw-r--r-- | tests/fasta.js | 71 | ||||
-rw-r--r-- | tests/runner.py | 473 | ||||
-rw-r--r-- | tests/settings.cfg | 16 |
11 files changed, 3684 insertions, 0 deletions
diff --git a/src/enzymatic.js b/src/enzymatic.js new file mode 100644 index 00000000..2ad8e165 --- /dev/null +++ b/src/enzymatic.js @@ -0,0 +1,126 @@ +/** + * An implementation of an 'enzymatic programming' paradigm. + */ + +DEBUG = true; +DEBUG = false; + +Substrate = function(name_) { + this.name_ = name_; + this.items = []; + this.zymes = []; + this.currUid = 1; +}; + +Substrate.prototype = { + addItem: function(item) { + if (!item.__uid__) { + item.__uid__ = this.currUid; + this.currUid ++; + } + this.items.push(item); + }, + + addZyme: function(zyme) { + this.zymes.push(zyme); + if (!zyme.select) zyme.select = Zyme.prototype.select; + if (!zyme.process) zyme.process = Zyme.prototype.process; + }, + + solve: function() { + if (DEBUG) print("Solving..."); + + var startTime = Date.now(); + var midTime = startTime; + var that = this; + function midComment() { + var curr = Date.now(); + if (curr - midTime > 1000) { + print('// Working on ' + that.name_ + ', so far ' + ((curr-startTime)/1000).toString().substr(0,10) + ' seconds. Have ' + that.items.length + ' items.'); + midTime = curr; + } + } + function finalComment() { + print('// Completed ' + that.name_ + ' in ' + ((Date.now() - startTime)/1000).toString().substr(0,10) + ' seconds.'); + } + + // Naive solver - sheer brute force. + // Assumes list of Zymes is non-changing. + var results = []; + while (true) { + if (DEBUG) print("Cycle start, " + this.items.length + " items."); + var hadProcessing = false; + for (var z = 0; z < this.zymes.length; z++) { + var zyme = this.zymes[z]; + var selected = zyme.select(this.items); + if (selected.length > 0) { + if (DEBUG) print("Calling: " + (zyme.processItem ? zyme.processItem : zyme.process)); + if (DEBUG) { + try { + print("Inputs: \n---\n\n" + outputs.map(JSON.stringify).join('\n\n') + '\n\n---'); + } catch(e) { + print("Inputs: \n---\n\n" + outputs + '\n\n---'); + } + } + hadProcessing = true; + this.items = this.items.filter(function(item) { return selected.indexOf(item) == -1 }); + var outputs; + try { + outputs = zyme.process(selected); + } catch (e) { + print("Exception, current selected are: " + selected.map(dump).join('\n\n').substr(0,100)); + print("Stack: " + new Error().stack); + throw e; + } + if (DEBUG) { + try { + print("Outputs: \n---\n\n" + outputs.map(JSON.stringify).join('\n\n') + '\n\n---'); + } catch(e) { + print("Outputs: \n---\n\n" + outputs + '\n\n---'); + } + } + if (outputs.length === 1 && outputs[0].__finalResult__) { + if (DEBUG) print("Solving complete: __finalResult__"); + delete outputs[0].__finalResult__; // Might recycle this + delete outputs[0].__uid__; + finalComment(); + return outputs[0]; + } + results = results.concat(outputs.filter(function(output) { return !!output.__result__; })) + outputs.filter(function(output) { return !output.__result__; }).forEach(this.addItem, this); + results.forEach(function(output) { + delete output.__result__; // Might recycle these + delete output.__uid__; + }); + } + } + if (this.items.length === 0) { + if (DEBUG) print("Solving complete: no remaining items"); + finalComment(); + return results; + } + if (!hadProcessing) { + print("Reached a dead end."); + this.items.forEach(function(item) { + print("remaining item:" + dump(item)); + }); + throw "failure"; + } + midComment(); + this.items = this.items.filter(function(item) { return item !== null; }); + } + }, +}; + +Zyme = function() { }; +Zyme.prototype = { + select: function(items) { + return items.filter(this.selectItem, this); + }, + process: function(items) { + var ret = []; + items.forEach(function(item) { ret = ret.concat(this.processItem(item)) }, this); + return ret; + }, +}; + diff --git a/src/parser.js b/src/parser.js new file mode 100644 index 00000000..ba0a36e4 --- /dev/null +++ b/src/parser.js @@ -0,0 +1,2286 @@ +// LLVM parser +//============ + +/* + * TODO: + * * Re-use variables (of the same kind, native/nativized vs. emulated). + */ + +// Options + +OPTIMIZE = 1; +RELOOP = 1; + +LINEDEBUG = 0; + +// Prep - allow this to run in both SpiderMonkey and V8 + +if (!this['load']) { + load = function(f) { eval(snarf(f)) } +} +if (!this['read']) { + read = function(f) { snarf(f) } +} + +load('utility.js'); +load('enzymatic.js'); + +// Tools + +function addPointing(type) { return type + '*' } +function removePointing(type) { return type.substr(0, type.length-1) } + +function pointingLevels(type) { + var ret = 0; + while (type.substr(-ret-1, 1) === '*') { + ret ++; + } + return ret; +} + +function toNiceIdent(ident) { + if (parseFloat(ident) == ident) return ident; + return ident.replace(/[" \.@%]/g, '_'); +} + +function isNumberType(type) { + var types = ['i1', 'i8', 'i32', 'i64', 'float', 'double']; + return types.indexOf(type) != -1; +} + +function isStructPointerType(type) { + var proof = '%struct'; + return type.substr(0, proof.length) == proof; +} + +function isStructType(type) { + if (/^\[\d+\ x\ (.*)\]/g.test(type)) return true; // [15 x ?] blocks. Like structs + var proof = '%struct'; + return type.substr(0, proof.length) == proof && !isPointerType(type); +} + +function isPointerType(type) { // TODO! + return pointingLevels(type) > 0; +} + +function isType(type) { // TODO! + return isNumberType(type) || isStructType(type) || isPointerType(type); +} + +function isFunctionDef(token) { + var text = token.text; + var pointing = pointingLevels(text); + var nonPointing = text; + for (var i = 0; i < pointing; i++) + nonPointing = removePointing(nonPointing); + if (nonPointing[0] != '(' || nonPointing.substr(-1) != ')') + return false; + if (nonPointing == '(...)') return true; + if (!token.item) return false; + var fail = false; + token.item[0].tokens.forEach(function(subtoken) { + fail = fail || !isType(subtoken.text); + }); + return !fail; +} + +function addIdent(token) { + token.ident = token.text; + return token; +} + +// Splits out items that pass filter. Returns also the original sans the filtered +function splitter(array, filter) { + var splitOut = array.filter(filter); + var original = array.filter(function(x) { return !filter(x) }); + return { original: original, splitOut: splitOut }; +} + +function combineTokens(tokens) { + var ret = { + lineNum: tokens[0].lineNum, + text: '', + tokens: [], + }; + tokens.forEach(function(token) { + ret.text += token.text; + ret.tokens.push(token); + }); + return ret; +} + +function compareTokens(a, b) { + var aId = a.__uid__; + var bId = b.__uid__; + a.__uid__ = 0; + b.__uid__ = 0; + var ret = JSON.stringify(a) == JSON.stringify(b); + a.__uid__ = aId; + b.__uid__ = bId; + return ret; +} + +function splitTokenList(tokens) { + if (tokens.length == 0) return []; + if (tokens.slice(-1)[0].text != ',') tokens.push({text:','}); + var ret = []; + var seg = []; + tokens.forEach(function(token) { + if (token.text == ',') { + ret.push(seg); + seg = []; + } else { + seg.push(token); + } + }); + return ret; +} + +function makeSplitter(parentSlot, parentSlotValue, parentUnrequiredSlot, childSlot, copySlots) { + return { + selectItem: function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] && item[childSlot] !== null }, + processItem: function(parent) { + var child = parent[childSlot]; + parent[childSlot] = null; + child.parentUid = parent.__uid__; + child.parentSlot = childSlot; + child.lineNum = parent.lineNum; // Debugging + if (!copySlots) copySlots = []; + copySlots.forEach(function(slot) { child[slot] = parent[slot] }); + return [parent, child]; + }, + }; +} + +function makeCombiner(parentSlot, parentSlotValue, parentUnrequiredSlot, childRequiredSlot, finalizeFunc) { + return { + select: function(items) { + var parents = items.filter(function(item) { return item[parentSlot] == parentSlotValue && !item[parentUnrequiredSlot] }); + for (var i = 0; i < parents.length; i++) { + var parent = parents[i]; + var child = items.filter(function(item) { return item[childRequiredSlot] && item.parentUid === parent.__uid__ })[0]; + if (child) return [parent, child]; + } + return []; + }, + process: function(items) { + var parent = items[0]; + var child = items[1]; + parent[child.parentSlot] = child; + delete child.parentUid; + delete child.parentSlot; + finalizeFunc(parent); + return [parent]; + }, + }; +} + +function parseParamTokens(params) { +//print('NEW params ' + JSON.stringify(params)); + if (params.length === 0) return []; + var ret = []; + if (params[params.length-1].text != ',') { + params.push({ text: ',' }); + } + while (params.length > 0) { +//print('params ' + JSON.stringify(params)); + var i = 0; + while (params[i].text != ',') i++; + var segment = params.slice(0, i); +//print(' seg ' + JSON.stringify(segment)); + params = params.slice(i+1); + if (segment[1].text === 'getelementptr' || segment[1].text === 'noalias') { + ret.push(parseGetElementPtr(segment)); + } else if (segment[1].text === 'bitcast') { + ret.push(parseBitcast(segment)); + } else { + if (segment[2] && segment[2].text == 'to') { // part of bitcast params + segment = segment.slice(0, 2); + } + while (segment.length > 2) { + segment[0].text += segment[1].text; + segment.splice(1, 1); // TODO: merge tokens nicely + } + ret.push({ + intertype: 'value', + type: segment[0], + value: segment[1], + ident: segment[1].text, + }); +// } else { +// throw "what is this params token? " + JSON.stringify(segment); + } + } + return ret; +} + +function parseGetElementPtr(segment) { + segment = segment.slice(0); + if (segment[1].text === 'noalias') { + segment.splice(1, 1); + } + var ret = { + intertype: 'getelementptr', + type: segment[0], + params: parseParamTokens(segment[3].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); + return ret; +} + +// TODO: use this +function parseBitcast(segment) { +//print('zz parseBC pre: ' + dump(segment)); + var ret = { + intertype: 'bitcast', + type: segment[0], + params: parseParamTokens(segment[2].item[0].tokens), + }; + ret.ident = toNiceIdent(ret.params[0].ident); +//print('zz parseBC: ' + dump(ret)); + return ret; +} + +function getLabelIds(labels) { + return labels.map(function(label) { return label.ident }); +} + +// ======================= + +// llvm => intertypes +function intertyper(data) { + // Substrate + + substrate = new Substrate('Intertyper'); + + // Input + + substrate.addItem({ + llvmText: data, + }); + + // Tools + + function findTokenText(item, text) { + for (var i = 0; i < item.tokens.length; i++) { + if (item.tokens[i].text == text) return i; + } + return -1; + } + + // Line splitter. + substrate.addZyme({ + selectItem: function(item) { return !!item.llvmText; }, + processItem: function(item) { + var lines = item.llvmText.split('\n'); + var ret = []; + for (var i = 0; i < lines.length; i++) { + if (/^\ +to.*/g.test(lines[i])) { + // to after invoke + ret.slice(-1)[0].lineText += lines[i]; + } else { + ret.push({ + lineText: lines[i], + lineNum: i + 1, + }); + } + } + return ret.filter(function(item) { return item.lineText; }); + }, + }); + + // Line tokenizer + substrate.addZyme({ + selectItem: function(item) { return item.lineText; }, + processItem: function(item) { +//print("line: " + item.lineText); + var lineText = item.lineText + " "; + var tokens = []; + var tokenStart = -1; + var indent = -1; + var quotes = 0; + var i = 0; + // Note: '{' is not an encloser, as its use in functions is split over many lines + var enclosers = { + '[': 0, + ']': '[', + '(': 0, + ')': '(', + '<': 0, + '>': '<', + }; + function notQuoted() { + return quotes == 0; + } + function notEnclosed() { + for (var i in enclosers) { + if (typeof enclosers[i] === 'number' && enclosers[i] > 0) + return false; + } + return true; + } + var that = this; + function tryStartToken() { + if (tokenStart == -1 && notEnclosed() && notQuoted()) { +//print("try START " + tokenStart + ',' + JSON.stringify(enclosers)); + tokenStart = i; + } + } + function tryFinishToken(includeThis) { + if (tokenStart >= 0 && notEnclosed() && notQuoted()) { +//print("try finish " + tokenStart + ',' + JSON.stringify(enclosers)); + var token = { + text: lineText.substr(tokenStart, i-tokenStart + (includeThis ? 1 : 0)), + }; + if (token.text[0] in enclosers) { + token.item = that.processItem({ + lineText: token.text.substr(1, token.text.length-2) + }); + token.type = token.text[0]; + } + if (indent == -1) { + indent = tokenStart; + } + // merge certain tokens + if ( (tokens.length > 0 && tokens.slice(-1)[0].text == '%' && token.text[0] == '"' ) || + (tokens.length > 0 && token.text.replace(/\*/g, '') == '') ) { + tokens.slice(-1)[0].text += token.text; + } else if (tokens.length > 0 && isType(tokens.slice(-1)[0].text) && isFunctionDef(token)) { + tokens.slice(-1)[0].text += ' ' + token.text; + } else if (tokens.length > 0 && token.text[token.text.length-1] == '}') { + var openBrace = tokens.length-1; + while (tokens[openBrace].text != '{') openBrace --; + token = combineTokens(tokens.slice(openBrace+1)); + tokens.splice(openBrace, tokens.length-openBrace+1); + tokens.push(token); + tokens.slice(-1)[0].type = '{'; + } else { + tokens.push(token); + } +// print("new token: " + dump(tokens.slice(-1)[0])); + tokenStart = -1; + } + } + for (; i < lineText.length; i++) { + var letter = lineText[i]; +//print("letter: " + letter); + switch (letter) { + case ' ': + tryFinishToken(); + break; + case '"': + tryFinishToken(); + tryStartToken(); + quotes = 1-quotes; + break; + case ',': + tryFinishToken(); + if (notEnclosed() && notQuoted()) { + tokens.push({ text: ',' }); + } + break; + default: + if (letter in enclosers && notQuoted()) { + if (typeof enclosers[letter] === 'number') { + tryFinishToken(); + tryStartToken(); + enclosers[letter]++; + } else { + enclosers[enclosers[letter]]--; + tryFinishToken(true); + } +//print(' post-enclosers: ' + JSON.stringify(enclosers)); + } else { + tryStartToken(); + } + } + } + return [{ + tokens: tokens, + indent: indent, + lineNum: item.lineNum, + }]; + }, + }); + + // Line parsers to intermediate form + + // Comment + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens[0].text == ';' }, + processItem: function(item) { return [] }, + }); + // target + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens[0].text == 'target' }, + processItem: function(item) { return [] }, + }); + // globals: type or constant + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.indent === 0 && item.tokens[1].text == '=' }, + processItem: function(item) { + if (item.tokens[2].text == 'type') { + // type +//print('// zz ' + dump(item)); + var fields = []; + if (item.tokens[3].text != 'opaque') { + var subTokens = item.tokens[3].tokens; + subTokens.push({text:','}); + while (subTokens[0]) { + var stop = 1; + while ([','].indexOf(subTokens[stop].text) == -1) stop ++; + fields.push(combineTokens(subTokens.slice(0, stop)).text); + subTokens.splice(0, stop+1); + } + } + return [{ + __result__: true, + intertype: 'type', + name_: item.tokens[0].text, + fields: fields, + lineNum: item.lineNum, + }] + } else if (item.tokens[2].text == 'global') { + // variable + return [{ + __result__: true, + intertype: 'globalVariable', + ident: item.tokens[0].text, + type: item.tokens[3].text, + value: item.tokens[4], + lineNum: item.lineNum, + }] + } else { + // constant + var ident = item.tokens[0].text; + while (item.tokens[2].text in { 'private': 0, 'constant': 0, 'appending': 0, 'global': 0, 'weak_odr': 0, 'internal': 0 }) + item.tokens.splice(2, 1); + var ret = { + __result__: true, + intertype: 'globalConstant', + ident: ident, + type: item.tokens[2], + lineNum: item.lineNum, + }; + if (ident == '@llvm.global_ctors') { + ret.ctors = []; + var subTokens = item.tokens[3].item[0].tokens; + splitTokenList(subTokens).forEach(function(segment) { + ret.ctors.push(segment[1].tokens.slice(-1)[0].text); + }); + } else { + if (item.tokens[3].text == 'c') + item.tokens.splice(3, 1); + ret.value = item.tokens[3]; + } + return [ret]; + } + }, + }); + // function header + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 4 && item.indent === 0 && item.tokens[0].text == 'define' && + item.tokens.slice(-1)[0].text == '{' }, + processItem: function(item) { + if (item.tokens.slice(-3,-2)[0].text == 'align') + item.tokens.splice(-3,2); + if (item.tokens.slice(-2,-1)[0].text == 'nounwind') + item.tokens.splice(-2,1); + while (item.tokens.length > 5) + item.tokens.splice(1, 1); + return [{ + __result__: true, + intertype: 'function', + ident: item.tokens[2].text, + returnType: item.tokens[1], + params: item.tokens[3], + lineNum: item.lineNum, + }]; + }, + }); + // label + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 1 && item.indent === 0 && item.tokens[0].text.substr(-1) == ':' }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'label', + ident: '%' + item.tokens[0].text.substr(0, item.tokens[0].text.length-1), + lineNum: item.lineNum, + }]; + }, + }); + // assignment + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && findTokenText(item, '=') >= 0 && + !item.intertype }, + processItem: function(item) { + var opIndex = findTokenText(item, '='); + return [{ + intertype: 'assign', + ident: combineTokens(item.tokens.slice(0, opIndex)).text, + value: null, + lineNum: item.lineNum, + }, { // Additional token, to be parsed, and later re-integrated + indent: -1, + tokens: item.tokens.slice(opIndex+1), + parentLineNum: item.lineNum, + parentSlot: 'value', + }]; + }, + }); + // reintegration - find intermediate representation-parsed items and + // place back in parents + substrate.addZyme({ + select: function(items) { + for (var i = 0; i < items.length; i++) { + if (items[i].parentSlot && items[i].intertype) { + for (var j = 0; j < items.length; j++) { + if (items[j].lineNum == items[i].parentLineNum) { + return [items[j], items[i]]; + } + } + } + } + return []; + }, + process: function(items) { + var parent = items[0]; + var child = items[1]; + parent[child.parentSlot] = child; + parent.__result__ = true; + delete child.parentLineNum; + return [parent]; + } + }); + // 'load' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'load' }, + processItem: function(item) { + item.intertype = 'load'; + item.pointerType = item.tokens[1]; + item.pointer = item.tokens[2]; + item.ident = item.pointer.text; +//print("// zz zz pointer: " + JSON.stringify(item)); + item.type = { text: removePointing(item.pointerType.text) }; + return [item]; + }, + }); + // 'bitcast' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'bitcast' }, + processItem: function(item) { + item.intertype = 'bitcast'; + item.type = item.tokens[1]; + item.ident = item.tokens[2].text; + item.type2 = item.tokens[4]; + return [item]; + }, + }); + // 'getelementptr' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'getelementptr' }, + processItem: function(item) { + var last = 0; + while (item.tokens[last].text != ';') last++; + var segment = [ item.tokens[1], { text: null }, null, { item: [ { + tokens: item.tokens.slice(2, last) + } ] } ]; + var data = parseGetElementPtr(segment); + item.intertype = 'getelementptr'; + item.type = data.type; + item.params = data.params; + item.ident = data.ident; + return [item]; + }, + }); + // 'call' + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'call' && !item.intertype }, + processItem: function(item) { + item.intertype = 'call'; + if (['signext', 'zeroext'].indexOf(item.tokens[1].text) != -1) { + item.tokens.splice(1, 1); + } + item.type = item.tokens[1]; + item.functionType = ''; + while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { + item.functionType += item.tokens[2].text; + item.tokens.splice(2, 1); + } + item.ident = item.tokens[2].text; + item.params = parseParamTokens(item.tokens[3].item[0].tokens); + if (item.indent == 2) { + // standalone call - not in assign + item.standalone = true; + item.__result__ = true; + } + return [item]; + }, + }); + // 'invoke' + substrate.addZyme({ + selectItem: function(item) { return item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'invoke' && !item.intertype }, + processItem: function(item) { + item.intertype = 'invoke'; + item.type = item.tokens[1]; + item.functionType = ''; + while (['@', '%'].indexOf(item.tokens[2].text[0]) == -1) { + item.functionType += item.tokens[2].text; + item.tokens.splice(2, 1); + } + item.ident = item.tokens[2].text; + item.params = parseParamTokens(item.tokens[3].item[0].tokens); + item.toLabel = item.tokens[6].text; + item.unwindLabel = item.tokens[9].text; + item.__result__ = true; + return [item]; + }, + }); + // 'alloca' + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'alloca' }, + processItem: function(item) { + item.intertype = 'alloca'; + item.allocatedType = item.tokens[1]; + item.type = { text: addPointing(item.tokens[1].text) }; + return [item]; + }, + }); + // mathops + substrate.addZyme({ + selectItem: function(item) { return item.indent === -1 && item.tokens && item.tokens.length >= 3 && + ['add', 'sub', 'sdiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptoui', 'fptosi', 'trunc', 'sext', 'select'] + .indexOf(item.tokens[0].text) != -1 && !item.intertype }, + processItem: function(item) { + item.intertype = 'mathop'; + item.op = item.tokens[0].text; + item.variant = null; + if (item.tokens[1].text == 'nsw') item.tokens.splice(1, 1); + if (['icmp', 'fcmp'].indexOf(item.op) != -1) { + item.variant = item.tokens[1].text; + item.tokens.splice(1, 1); + } + item.type = item.tokens[1]; + item.ident = item.tokens[2].text; + item.ident2 = item.tokens[4].text; + item.ident3 = item.tokens[5] ? item.tokens[5].text : null; + item.ident4 = item.tokens[8] ? item.tokens[8].text : null; +//print('// zz got maptop ' + item.op + ',' + item.variant + ',' + item.ident + ',' + item.value); + return [item]; + }, + }); + // 'store' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 5 && item.tokens[0].text == 'store' && + !item.intertype }, + processItem: function(item) { + if (item.tokens[3].text != ',') { + assertEq(item.tokens[2].text, 'getelementptr'); + // complex input - likely getelementptr + var commaIndex = 4; + while (item.tokens[commaIndex].text != ',') commaIndex ++; + return [{ + __result__: true, + intertype: 'store', + valueType: item.tokens[1], + value: parseGetElementPtr(item.tokens.slice(1, commaIndex)), + pointerType: item.tokens[commaIndex+1], + pointer: item.tokens[commaIndex+2], + ident: item.tokens[commaIndex+2].text, + lineNum: item.lineNum, + }]; + } + return [{ + __result__: true, + intertype: 'store', + valueType: item.tokens[1], + value: addIdent(item.tokens[2]), + pointerType: item.tokens[4], + pointer: item.tokens[5], + ident: item.tokens[5].text, + lineNum: item.lineNum, + }]; + }, + }); + // 'br' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 3 && item.tokens[0].text == 'br' && + !item.intertype }, + processItem: function(item) { + if (item.tokens[1].text == 'label') { + return [{ + __result__: true, + intertype: 'branch', + label: toNiceIdent(item.tokens[2].text), + lineNum: item.lineNum, + }]; + } else { + return [{ + __result__: true, + intertype: 'branch', + ident: item.tokens[2].text, + labelTrue: toNiceIdent(item.tokens[5].text), + labelFalse: toNiceIdent(item.tokens[8].text), + lineNum: item.lineNum, + }]; + } + }, + }); + // 'ret' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens.length >= 2 && item.tokens[0].text == 'ret' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'return', + type: item.tokens[1].text, + value: item.tokens[2] ? item.tokens[2].text : null, + lineNum: item.lineNum, + }]; + }, + }); + // function end + substrate.addZyme({ + selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 1 && item.tokens[0].text == '}' && !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'functionEnd', + lineNum: item.lineNum, + }]; + }, + }); + // external function stub + substrate.addZyme({ + selectItem: function(item) { return item.indent === 0 && item.tokens && item.tokens.length >= 4 && item.tokens[0].text == 'declare' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'functionStub', + ident: item.tokens[2].text, + returnType: item.tokens[1], + params: item.tokens[3], + lineNum: item.lineNum, + }]; + }, + }); + // 'unreachable' + substrate.addZyme({ + selectItem: function(item) { return item.indent === 2 && item.tokens && item.tokens[0].text == 'unreachable' && + !item.intertype }, + processItem: function(item) { + return [{ + __result__: true, + intertype: 'unreachable', + lineNum: item.lineNum, + }]; + }, + }); + + return substrate.solve(); +} + +// Analyze intertype data + +VAR_NATIVE = 'native'; +VAR_NATIVIZED = 'nativized'; +VAR_EMULATED = 'emulated'; + +function cleanFunc(func) { + func.lines = func.lines.filter(function(line) { return line.intertype !== null }); + func.labels.forEach(function(label) { + label.lines = label.lines.filter(function(line) { return line.intertype !== null }); + }); +} + +function analyzer(data) { +//print('zz analaz') + substrate = new Substrate('Analyzer'); + + substrate.addItem({ + items: data, + }); + + // Sorter + substrate.addZyme({ + selectItem: function(item) { return !item.sorted; }, + processItem: function(item) { + item.items.sort(function (a, b) { return a.lineNum - b.lineNum }); + item.sorted = true; + return [item]; + }, + }); + + // Gatherer + substrate.addZyme({ + selectItem: function(item) { return item.sorted && !item.gathered; }, + processItem: function(item) { + // Single-liners + ['globalConstant', 'globalVariable', 'functionStub', 'type'].forEach(function(intertype) { + var temp = splitter(item.items, function(item) { return item.intertype == intertype }); + item[intertype + 's'] = temp.splitOut; + item.items = temp.original; + }); + // Functions & labels + item.functions = [] + for (var i = 0; i < item.items.length; i++) { + var subItem = item.items[i]; + if (subItem.intertype == 'function') { + item.functions.push(subItem); + subItem.endLineNum = null; + subItem.lines = []; + subItem.labels = []; + } else if (subItem.intertype == 'functionEnd') { + item.functions.slice(-1)[0].endLineNum = subItem.lineNum; + } else if (subItem.intertype == 'label') { + item.functions.slice(-1)[0].labels.push(subItem); + subItem.lines = []; + } else if (item.functions.slice(-1)[0].endLineNum === null) { + // Internal line + item.functions.slice(-1)[0].lines.push(subItem); + item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem); + } else { + print("ERROR: what is this? " + JSON.stringify(subItem)); + } + } + delete item.items; + item.gathered = true; + return [item]; + }, + }); + + // IdentiNicer + substrate.addZyme({ + selectItem: function(item) { return item.gathered && !item.identiniced; }, + processItem: function(output) { + walkJSON(output, function(item) { + ['', '2', '3', '4', '5'].forEach(function(ext) { + if (item && item['ident' + ext]) + item['ident' + ext] = toNiceIdent(item['ident' + ext]); + }); + }); + output.identiniced = true; + return [output]; + } + }); + + function addType(type, data) { + if (['<', '(', 'internal', 'inbounds', 'void'].indexOf(type) != -1) return; + var check = /^\[(\d+)\ x\ (.*)\]$/g.exec(type); + // 'blocks': [14 x %struct.X] etc. + if (check) { + var num = parseInt(check[1]); + var subType = check[2]; + data.types.push({ + name_: type, + fields: range(num).map(function() { return subType }), + lineNum: '?', + }); + return; + } + if (['['].indexOf(type) != -1) return; + if (isNumberType(type) || isPointerType(type)) return; + if (!data.types[type]) { +// print("// New type: " + type); + data.types.push({ + name_: type, + fields: [ 'int32' ], // XXX + flatSize: 1, + lineNum: '?', + }); + } + } + + // TypeVestigator + substrate.addZyme({ + selectItem: function(item) { return item.gathered && !item.typevestigated; }, + processItem: function(data) { + walkJSON(data, function(item) { + if (!item) return; + if (item.type) { + addType(!item.type.text ? item.type : item.type.text, data); + } + if (item.type2) { + addType(!item.type2.text ? item.type2 : item.type2.text, data); + } + }); + data.typevestigated = true; + return [data]; + } + }); + + // Type analyzer + substrate.addZyme({ + selectItem: function(item) { return item.typevestigated && !item.typed; }, + processItem: function(item) { +//print('zz analaz types') + // 'fields' is the raw list of LLVM fields. However, we embed + // child structures into parent structures, basically like C. + // So { int, { int, int }, int } would be represented as + // an Array of 4 ints. getelementptr on the parent would take + // values 0, 1, 2, where 2 is the entire middle structure. + // We also need to be careful with getelementptr to child + // structures - we return a pointer to the same slab, just + // a different offset. Likewise, need to be careful for + // getelementptr of 2 (the last int) - it's real index is 4. + // The benefit of this approach is inheritance - + // { { ancestor } , etc. } = descendant + // In this case it is easy to bitcast ancestor to descendant + // pointers - nothing needs to be done. If the ancestor were + // a new slab, it would need some pointer to the outer one + // for casting in that direction. + // TODO: bitcasts of non-inheritance cases of embedding (not at start) + var more = true; + while (more) { + more = false; + function getType(t) { + return item.types.filter(function(type) { return type.name_ == t })[0]; + } + item.types.forEach(function(type) { + var ready = true; + type.fields.forEach(function(field) { +//print('// zz getT: ' + type.name_ + ' : ' + field); + if (isStructType(field)) { + if (!getType(field)) { + addType(field, item); + ready = false; + } else { + if (!getType(field).flatIndexes) { + ready = false; + } + } + } + }); + if (!ready) { + more = true; + return; + } + type.flatSize = 0; + type.needsFlattening = false; + var sizes = []; + type.flatIndexes = type.fields.map(function(field) { + var curr = type.flatSize; + if (isStructType(field)) { + var size = getType(field).flatSize; + type.flatSize += size; + sizes.push(size); + type.needsFlattening = true; + } else { + type.flatSize ++; + } + return curr; + }); + if (type.needsFlattening && dedup(sizes).length == 1) { + type.flatFactor = sizes[0]; + } + }); + } + + item.types.forEach(function(type) { + print('// type: ' + type.name_);// + ' : ' + JSON.stringify(type.fields)); + }); + item.typed = true; + return [item]; + }, + }); + + // Variable analyzer + substrate.addZyme({ + selectItem: function(item) { return item.typevestigated && !item.variablized; }, + processItem: function(item) { + item.functions.forEach(function(func) { + func.variables = {}; + + // LLVM is SSA, so we always have a single assignment/write. We care about + // the reads/other uses. + walkJSON(func.lines, function(item) { +//if (item && item.intertype == 'assign') print('zz assign: ' + JSON.stringify(item)); + if (item && item.intertype == 'assign' && ['alloca', 'load', 'call', 'bitcast', 'mathop', 'getelementptr'].indexOf(item.value.intertype) != -1) { +//print("zz add var " + item.ident + ',' + item.intertype); + func.variables[item.ident] = { + ident: item.ident, + type: item.value.type.text, + origin: item.value.intertype, + uses: parseInt(item.value.tokens.slice(-1)[0].item[0].tokens[0].text.split('=')[1]), + }; + } + }); + + for (vname in func.variables) { + var variable = func.variables[vname]; + + // Whether the value itself is used. For an int, always yes. For a pointer, + // we might never use the pointer's value - we might always just store to it / + // read from it. If so, then we can optimize away the pointer. + variable.hasValueTaken = false; + // Whether our address was used. If not, then we do not need to bother with + // implementing this variable in a way that other functions can access it. + variable.hasAddrTaken = false; + + variable.pointingLevels = pointingLevels(variable.type); + + // Analysis! + + if (variable.pointingLevels > 0) { + // Pointers + variable.loads = 0; + variable.stores = 0; + + func.lines.forEach(function(line) { +//print(dump(line)) + if (line.intertype == 'store' && line.ident == vname) { + variable.stores ++; + } else if (line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) { + variable.loads ++; + } + }); + + variable.otherUses = variable.uses - variable.loads - variable.stores; + if (variable.otherUses > 0) + variable.hasValueTaken = true; + } + + // Decision time + + if (variable.origin == 'getelementptr') { + // Use our implementation that emulates pointers etc. + variable.impl = VAR_EMULATED; + } else if ( variable.pointingLevels === 0 && !variable.hasAddrTaken ) { + // A simple int value, can be implemented as a native variable + variable.impl = VAR_NATIVE; + } else if ( variable.pointingLevels === 1 && variable.origin === 'alloca' && !isStructPointerType(variable.type) && !variable.hasAddrTaken && !variable.hasValueTaken ) { + // A pointer to a value which is only accessible through this pointer. Basically + // a local value on the stack, which nothing fancy is done on. So we can + // optimize away the pointing altogether, and just have a native variable + variable.impl = VAR_NATIVIZED; + } else { + variable.impl = VAR_EMULATED; + } +//print('// var ' + vname + ': ' + JSON.stringify(variable)); + } + }); + item.variablized = true; + return [item]; + }, + }); + + // ReLooper - reconstruct nice loops, as much as possible + substrate.addZyme({ + selectItem: function(item) { return item.variablized && !item.relooped }, + processItem: function(item) { + function finish() { + item.relooped = true; + return [item]; + } + + // Tools + + function replaceLabels(line, labelId, toLabelId) { +//print('// XXX replace ' + labelId + ' with ' + toLabelId); + if (line.intertype != 'branch') return; + ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel'].forEach(function(id) { + if (line[id] && line[id] == labelId) { + line[id] = toLabelId; +//print(' replaced!'); + } + }); + } + + function replaceLabelLabels(label, labelId, toLabelId) { + label.lines.forEach(function(line) { replaceLabels(line, labelId, toLabelId) }); + return label; + } + + function replaceInLabels(labels, toReplace, replaceWith) { +//print('// XXX replaceIn ' + toReplace + ' with ' + replaceWith); + assertEq(!replaceWith || toReplace.length == 1, true); // TODO: implement other case + labels.forEach(function(label) { + ['inLabels'].forEach(function(l) { + label[l] = label[l].map(function(labelId) { return toReplace.indexOf(labelId) == -1 ? labelId : replaceWith}) + .filter(function(labelId) { return !!labelId }); + }); + }); + return labels; + } + + function calcLabelBranchingData(labels, labelsDict) { + item.functions.forEach(function(func) { + labels.forEach(function(label) { + label.outLabels = []; + label.inLabels = []; + label.hasReturn = false; + label.hasBreak = false; + }); + }); + // Find direct branchings + labels.forEach(function(label) { +//print('zz at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels); + label.lines.forEach(function(line) { + if (['branch', 'invoke'].indexOf(line.intertype) != -1) { + ['label', 'labelTrue', 'labelFalse', 'toLabel', 'unwindLabel'].forEach(function(id) { + if (line[id]) { + if (line[id][0] == 'B') { // BREAK, BCONT, BNOPP + label.hasBreak = true; + } else { + label.outLabels.push(line[id]); + labelsDict[line[id]].inLabels.push(label.ident); + } + } + }); + } + label.hasReturn |= line.intertype == 'return'; + }); + }); + // Find all incoming and all outgoing - recursively + labels.forEach(function(label) { + label.allInLabels = []; + label.allOutLabels = []; + //! MustGetTo ignores return - |if (x) return; else Y| must get to Y. + label.mustGetHere = []; + }); + + var worked = true; + while (worked) { + worked = false; + labels.forEach(function(label) { +//print('zz at label: ' + label.ident + ':' + label.inLabels + ':' + label.outLabels); + function inout(s, l) { + var temp = label[s].slice(0); + label[s].forEach(function(label2Id) { + temp = temp.concat(labelsDict[label2Id][l]); + }); + temp = dedup(temp); + temp.sort(); + if (JSON.stringify(label[l]) != JSON.stringify(temp)) { +//print('zz noticed ' + label.ident + ' ? ' + s + ',' + l + ' : ' + label[s] + ' | ' + label[l]); + label[l] = temp; + worked = true; + } + } + inout('inLabels', 'allInLabels'); + inout('outLabels', 'allOutLabels'); + }); + } + + // Find all mustGetTos + labels.forEach(function(label) { +//print('path for: ' + label.ident + ',' + dump(label)); + function walk(path, label) { +//print('path is: ' + getLabelIds(path.concat([label]))); + // If all we are is a break/return - then stop here. Otherwise, continue to other path +//p//rint('??? ' + label.hasReturn + ' : ' + label.hasBreak + ' : ' + label.outLabels.length); + if (label.hasReturn || (label.hasBreak && label.outLabels.length == 0)) { + //print('path done.'); + return [path.concat([label])] + }; + if (path.indexOf(label) != -1) { + //print('looping path - abort it.'); + return []; // loop - drop this path + } + path = path.concat([label]); + return label.outLabels.map(function(outLabelId) { return walk(path, labelsDict[outLabelId]) }) + .reduce(function(a, b) { return a.concat(b) }, []) + .filter(function(path) { return path.length > 0 }); + } + var paths = walk([], label).map(function(path) { return getLabelIds(path) }); +//print('XXX paths: ' + JSON.stringify(paths)); + var possibles = dedup(paths.reduce(function(a,b) { return a.concat(b) }, [])); + label.mustGetTo = possibles.filter(function(possible) { + return paths.filter(function(path) { return path.indexOf(possible) == -1 }) == 0; + }).filter(function(possible) { return possible != label.ident }); +//print('XXX must get to: ' + JSON.stringify(label.mustGetTo)); + }); + + labels.forEach(function(label) { + label.mustGetTo.forEach(function (mustGetTo) { + labelsDict[mustGetTo].mustGetHere.push(label.ident); + }); + }); + + labels.forEach(function(label) { +//print('// label: ' + label.ident + ' :out : ' + JSON.stringify(label.outLabels)); +//print('// ' + label.ident + ' :in : ' + JSON.stringify(label.inLabels)); +//print('// ' + label.ident + ' :ALL out : ' + JSON.stringify(label.allOutLabels)); +//print('// ' + label.ident + ' :ALL in : ' + JSON.stringify(label.allInLabels)); +//print('// ZZZZZZ ' + label.ident + ' must get to all of ' + JSON.stringify(label.mustGetTo)); + }); + } + +/* // Disabled merging as it seems it just removes a return now and then. + function mergeLabels(label1Ident, label2Ident) { + var label1 = func.labelsDict[label1Ident]; + var label2 = func.labelsDict[label2Ident]; + label1.lines.pop(); + label1.lines = label1.lines.concat(label2.lines); + label1.outLabels = label2.outLabels; + label2.lines = null; + func.labels = func.labels.filter(function(label) { return !!label.lines }); +print('// zz Merged away! ' + label2.ident + ' into ' + label1.ident); + delete func.labelsDict[label2.ident]; + replaceInLabels(func.labels, [label2.ident], label1.ident); + } +//print('Labels pre merge : ' + getLabelIds(func.labels)); + + // Merge trivial labels + var worked = true; + while (worked) { + worked = false; + func.labels.forEach(function(label) { + if (label.lines === null) return; // We were deleted +//print("// Consider: " + label.ident + ' : out/in: ' + label.outLabels.length + ',' + label.inLabels.length);// + dump(label.lines.slice(-1)[0])); + if (label.outLabels.length == 1 && + label.lines.slice(-1)[0].intertype == 'branch' && +// label.lines.slice(-1)[0].label && // nonconditional branch + label.lines.slice(-1)[0].label == label.outLabels[0] && + func.labelsDict[label.outLabels[0]].inLabels.length == 1) { +//print("// and target: " + func.labelsDict[label.outLabels[0]].inLabels); + mergeLabels(label.ident, label.outLabels[0]); + worked = true; + } + }); + } +*/ + + // 'block': A self-enclosed part of the program, for example a loop or an if + function makeBlock(labels, entry, labelsDict) { + var def = { + type: 'emulated', // a block we cannot map to a nicer structure like a loop. We emulate it with a barbaric switch + labels: labels, + entry: entry, + }; + if (!RELOOP) return def; + + function getLastLine(block) { + if (block.next) return getLastLine(block.next); + switch(block.type) { + case 'loop': + return getLastLine(block.rest); + case 'if': + case 'breakingif': + return getLastLine(block.ifTrue); + case 'emulated': + case 'simple': + if (block.labels.length == 1) { + return block.labels[0].lines.slice(-1)[0]; + } else { + throw "Not clear what the last line is." + } + } + } + function getAll(fromId, beforeIds) { + beforeIds = beforeIds ? beforeIds : []; +print("//getAll : " + fromId + ' : ' + beforeIds); + if (beforeIds && beforeIds.indexOf(fromId) != -1) return []; +//print("getAll proceeding"); + var from = labelsDict[fromId]; + return dedup([from].concat( + from.outLabels.map(function(outLabel) { return getAll(outLabel, beforeIds.concat(fromId)) }) + .reduce(function(a,b) { return a.concat(b) }, []) + ), 'ident'); + } + function isolate(label) { + label.inLabels = []; + label.outLabels = []; + return label; + } +print("\n\n// XXX MAKEBLOCK " + entry + ' : ' + labels.length + ' : ' + getLabelIds(labels)); + if (labels.length == 0 || !entry) { + print('//empty labels or entry'); + return; + } + function forLabelLines(labels, func) { + labels.forEach(function(label) { + label.lines.forEach(function(line) { func(line, label) }); + }); + } + + // Begin + + calcLabelBranchingData(labels, labelsDict); + + var split = splitter(labels, function(label) { return label.ident == entry }); + var first = split.splitOut[0]; + if (!first) { + print("//no first line"); + return; + } + var others = split.original; + var lastLine = first.lines.slice(-1)[0]; +print("// makeBlock " + entry + ' : ' + getLabelIds(labels) + ' IN: ' + first.inLabels + ' OUT: ' + first.outLabels); + // If we have one outgoing, and none incoming - make this a block of 1, + // and move on the others (forgetting ourself, so they are now also + // totally self-enclosed, once we start them) + if (first.inLabels.length == 0 && first.outLabels.length == 1) { +print('// XXX simple emulated ' + dump(first)); + assertEq(lastLine.intertype, 'branch'); +// assertEq(!!lastLine.label, true); + return { + type: 'simple', + labels: [replaceLabelLabels(first, first.outLabels[0], 'BNOPP')], + entry: entry, + next: makeBlock(replaceInLabels(others, entry), first.outLabels[0], labelsDict), + }; + } +print('// loop ? a'); + // Looping structures - in some way, we can get back to here + if (first.outLabels.length > 0 && first.allInLabels.indexOf(entry) != -1) { +print('// loop ? b'); + // Look for outsiders - labels no longer capable of getting here. Those must be + // outside the loop. Insiders are those that can get back to the entry + var split2 = splitter(others, function(label) { return label.allOutLabels.indexOf(entry) == -1 }); + var outsiders = split2.splitOut; + var insiders = split2.original; +print('// potential loop : in/out : ' + getLabelIds(insiders) + ' to ' + getLabelIds(outsiders)); + // Hopefully exactly one of the outsiders is a 'pivot' - a label to which all those leaving + // the loop must go. Then even some |if (falala) { ... break; }| will get ... + // as an outsider, but it will actually still be in the loop + var pivots = + outsiders.filter(function(outsider) { + return insiders.filter(function(insider) { + return insider.mustGetTo.indexOf(outsider.ident) == -1; + }) == 0; + }); + // Find the earliest pivot. They must be staggered, each leading to another, + // as all insiders must go through *all* of these. So we seek a pivot that + // is never reached by another pivot. That must be the one with fewest + // mustGetTo +print("//pivots: " + pivots.length + ',' + JSON.stringify(getLabelIds(pivots))); + if (pivots.length >= 1) { // We have ourselves a loop + pivots.sort(function(a, b) { return b.mustGetTo.length - a.mustGetTo.length }); + var pivot = pivots[0]; +print('// XXX LOOP : ' + getLabelIds(insiders) + ' to ' + pivot.ident); + var otherLoopLabels = insiders; + var loopLabels = insiders.concat([first]); + var nextLabels = outsiders; + // Rework branches out of the loop into new 'break' labels + forLabelLines(loopLabels, function(line) { + replaceLabels(line, pivot.ident, 'BREAK' + entry); + }); + // Rework branches to the inc part of the loop into |continues| + forLabelLines(loopLabels, function(line, label) { + if (0) {// XXX - make this work :label.outLabels.length == 1 && label.outLabels[0] == entry && !label.hasBreak && !label.hasReturn) { + // it can remove unneeded continues (but does too much right now, as the continue might have been + // placed into an emulated while(1) switch { } + replaceLabels(line, entry, 'BNOPP'); print("// zeezee " + line.lineNum); + } else { + replaceLabels(line, entry, 'BCONT' + entry); + } + }); + // Fix inc branch into rest + var nextEntry; + first.outLabels.forEach(function(outLabel) { + if (outLabel != pivot.ident) { + replaceLabels(lastLine, outLabel, 'BNOPP'); + nextEntry = outLabel; + } + }); + var ret = { + type: 'loop', + labels: loopLabels, + entry: entry, + inc: makeBlock([isolate(first)], entry, labelsDict), + rest: makeBlock(replaceInLabels(otherLoopLabels, entry), nextEntry, labelsDict), + }; + var lastLoopLine = getLastLine(ret.rest); + lastLoopLine.comment = 'Trying to remove continue ' + entry + ' here'; + replaceLabels(lastLoopLine, 'BCONT' + entry, 'BNOPP'); // Last line will feed back into the loop anyhow + ret.next = makeBlock(replaceInLabels(nextLabels, getLabelIds(loopLabels)), pivot.ident, labelsDict); + return ret; + } + } + + // Try an 'if' structure + if (first.outLabels.length == 2) { + if (labelsDict[first.outLabels[1]].mustGetTo.indexOf(first.outLabels[0]) != -1) { + first.outLabels.push(first.outLabels.shift()); // Reverse order - normalize. Very fast check anyhow + } +print('// if? labels are ' + JSON.stringify(first.outLabels)); + if (labelsDict[first.outLabels[0]].mustGetTo.indexOf(first.outLabels[1]) != -1) { + var ifLabelId = first.outLabels[0]; + var outLabelId = first.outLabels[1]; + // 0 - the if area. 1 - where we all exit to later + var ifTrueLabels = getAll(ifLabelId, [outLabelId]); + var ifLabels = ifTrueLabels.concat([first]); + var nextLabels = getAll(outLabelId); + // If we can get to the outside in more than 2 ways (one from if, one from True clause) - have breaks + var breaking = labelsDict[outLabelId].allInLabels.length > 2; +print('// XXX IF: ' + getLabelIds(ifTrueLabels) + ' to ' + outLabelId + ' ==> ' + getLabelIds(nextLabels) + ' breaking: ' + breaking); +print('// if separation: ' + labels.length + ' = ' + ifLabels.length + ' + ' + nextLabels.length + ' (' + ifTrueLabels.length + ')'); + if (breaking) { + // Rework branches out of the if into new 'break' labels + forLabelLines(ifTrueLabels, function(line) { + replaceLabels(line, outLabelId, 'BREAK' + entry); + }); + } + // Remove branching op - we will do it manually + replaceLabels(lastLine, ifLabelId, 'BNOPP'); + replaceLabels(lastLine, outLabelId, 'BNOPP'); +// TODO: Look if there are actual branchings out of the if in the middle. If not, cn use a real if instead of one-time do { } while (false) + return { + type: (breaking ? 'breaking' : '') + 'if', + labels: ifLabels, + entry: entry, + ifVar: lastLine.ident, + check: makeBlock([isolate(first)], entry, labelsDict), + ifTrue: makeBlock(replaceInLabels(ifTrueLabels, entry), ifLabelId, labelsDict), + next: makeBlock(replaceInLabels(nextLabels, getLabelIds(ifLabels)), outLabelId, labelsDict), + }; + } + } + + // Give up on this structure - emulate it +print('// XXX complex emulated'); + return def; + } + + // TODO: each of these can be run in parallel + item.functions.forEach(function(func) { + print("// relooping function: " + func.ident); + func.labelsDict = {}; + func.labels.forEach(function(label) { + func.labelsDict[label.ident] = label; + }); + func.block = makeBlock(func.labels, toNiceIdent('%entry'), func.labelsDict); + }); + + return finish(); + }, + }); + + // Optimizer + substrate.addZyme({ + selectItem: function(item) { return item.relooped && !item.optimized; }, + processItem: function(item) { + function finish() { + item.optimized = true; + item.__finalResult__ = true; + return [item]; + } + if (!OPTIMIZE) return finish(); + + // Check if a line has side effects *aside* from an explicit assign if it has one + function isLineSideEffecting(line) { + if (line.intertype == 'assign' && line.value.intertype !== 'call') return false; + if (['fastgetelementptrload'].indexOf(line.intertype) != -1) return false; + return true; + } + + function replaceVars(line, ident, replaceWith) { + if (!replaceWith) { + print('// Not replacing ' + dump(ident) + ' : ' + dump(replaceWith)); + return false; + } + var found = false; + // assigns, loads, mathops + var POSSIBLE_VARS = ['ident', 'ident2']; + for (var i = 0; i < POSSIBLE_VARS.length; i++) { + var possible = POSSIBLE_VARS[i]; + if (line[possible] == ident) { + line[possible] = replaceWith; + found = true; + } + if (line.value && line.value[possible] == ident) { + line.value[possible] = replaceWith; + found = true; + } + } + // getelementptr, call params + [line, line.value].forEach(function(element) { + if (!element || !element.params) return; + var params = element.params; + for (var j = 0; j < params.length; j++) { + var param = params[j]; + if (param.intertype == 'value' && param.ident == ident) { + param.ident = replaceWith; + found = true; + } + } + }); + return found; + } + + // Fast getelementptr loads + item.functions.forEach(function(func) { + for (var i = 0; i < func.lines.length-1; i++) { + var a = func.lines[i]; + var b = func.lines[i+1]; + if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' && + b.intertype == 'assign' && b.value.intertype == 'load' && + a.ident == b.value.ident) { +// print("// LOADSUSPECT: " + i + ',' + (i+1) + ':' + a.ident + ':' + b.value.ident); + a.intertype = 'fastgetelementptrload'; + a.ident = b.ident; + b.intertype = null; + i++; + } + } + cleanFunc(func); + }); + + // Fast getelementptr stores + item.functions.forEach(function(func) { + for (var i = 0; i < func.lines.length-1; i++) { + var a = func.lines[i]; + var b = func.lines[i+1]; + if (a.intertype == 'assign' && a.value.intertype == 'getelementptr' && + b.intertype == 'store' && b.value.text && + a.ident == b.ident) { +//print("// STORESUSPECT: " + a.lineNum + ',' + b.lineNum); + a.intertype = 'fastgetelementptrstore'; + a.ident = toNiceIdent(b.value.text); + b.intertype = null; + i++; + } + } + cleanFunc(func); + }); + + // TODO: Use for all that can + function optimizePairs(worker, minSlice, maxSlice) { + minSlice = minSlice ? minSlice : 2; + maxSlice = maxSlice ? maxSlice : 2; + item.functions.forEach(function(func) { + func.labels.forEach(function(label) { + for (var i = 0; i < label.lines.length-1; i++) { + for (var j = i+minSlice-1; j < Math.min(i+maxSlice+1, label.lines.length); j++) { + if (worker(func, label.lines.slice(i, j+1))) { + i += j-i; + break; // stop working on this i + } + } + } + }); + cleanFunc(func); + }); + } + + // Fast bitcast&something after them + optimizePairs(function(func, lines) { + var a = lines[0], b = lines[1]; + if (a.intertype == 'assign' && a.value.intertype == 'bitcast' && replaceVars(b, a.ident, a.value.ident)) { + a.intertype = null; + return true; + } + }); + +/* + // Remove unnecessary branches + item.functions.forEach(function(func) { + for (var i = 0; i < func.labels.length-1; i++) { + var a = func.labels[i].lines.slice(-1)[0]; + var b = func.labels[i+1]; + if (a.intertype == 'branch' && a.label == b.ident) { + a.intertype = null; + } + } + cleanFunc(func); + }); +*/ + + // Remove temp variables around nativized + item.functions.forEach(function(func) { + // loads, mathops + var worked = true; + while (worked) { + worked = false; + for (var i = 0; i < func.lines.length-1; i++) { + var a = func.lines[i]; + var b = func.lines[i+1]; + if (a.intertype == 'assign' && a.value.intertype == 'load' && + func.variables[a.value.ident] && // Not global + func.variables[a.value.ident].impl === VAR_NATIVIZED) { +//print('// ??zzzz ' + dump(a) + ',\n // ??zzbb' + dump(b)); + // If target is only used on next line - do not need it. + if (func.variables[a.ident].uses == 1 && + replaceVars(b, a.ident, a.value.ident)) { + a.intertype = null; + i ++; + worked = true; + } + } + } + cleanFunc(func); + } + + // stores + for (var i = 0; i < func.lines.length-1; i++) { + var a = func.lines[i]; + var b = func.lines[i+1]; +//print('// ??zzaa ' + dump(a) + ',\n // ??zzbb' + dump(b)); + if (b.intertype == 'store' && + func.variables[b.ident] && // Not global + func.variables[b.ident].impl === VAR_NATIVIZED) { + // If target is only used on prev line - do not need it. + if (func.variables[b.value.ident] && func.variables[b.value.ident].uses == 1 && + ['assign', 'fastgetelementptrload'].indexOf(a.intertype) != -1 && a.ident == b.value.ident) { + a.ident = b.ident; + a.overrideSSA = true; + b.intertype = null; + i ++; + } + } + } + cleanFunc(func); + }); + + // Remove redundant vars - SLOW! XXX + optimizePairs(function(func, lines) { + // a - a line defining a var + // b - a line defining a var that is identical to a + // c - the only line using b, hopefully + var a = lines[0], b = lines[lines.length-2], c = lines[lines.length-1]; + if (a.intertype == 'assign' && b.intertype == 'assign' && + func.variables[b.ident] && func.variables[b.ident].uses == 1 && + compareTokens(a.value, b.value) && + lines.slice(0,-1).filter(isLineSideEffecting).length == 0 && + replaceVars(c, b.ident, a.ident)) { + b.intertype = null; + return true; + } + }, 3, 12); + + return finish(); + }, + }); + + return substrate.solve(); +} + +// Convert analyzed data to javascript +function JSify(data) { + substrate = new Substrate('JSifyer'); + + [].concat(data.types.filter(function(type) { return type.lineNum != '?' })) + .concat(data.globalConstants) + .concat(data.globalVariables) + .concat(data.functions) + .concat(data.functionStubs) + .forEach(function(item) { + item.passes = {}; + substrate.addItem(item); + }); + + var TYPES = {}; + data.types.forEach(function(type) { + TYPES[type.name_] = type; + }); + + // type + substrate.addZyme({ + selectItem: function(item) { return item.intertype == 'type' && !item.JS }, + processItem: function(item) { + var type = TYPES[item.name_]; + if (type.needsFlattening) { + item.JS = 'var ' + toNiceIdent(item.name_) + '___FLATTENER = ' + JSON.stringify(TYPES[item.name_].flatIndexes) + ';'; + } else { + item.JS = '// type: ' + item.name_; + } + item.__result__ = true; + return [item]; + }, + }); + + function makePointer(slab, pos) { +// XXX hardcoded ptr impl + if (slab == 'HEAP') return pos; + return 'Pointer_make(' + slab + ', ' + (pos ? pos : 0) + ')'; +// return '{ slab: ' + slab + ', pos: ' + (pos ? pos : 0) + ' }'; +// return '[' + slab + ', ' + (pos ? pos : 0) + ']'; + } + + function makeGetSlab(ptr) { +// XXX hardcoded ptr impl +// return ptr + '.slab'; + return 'HEAP'; + } + + function makeGetPos(ptr) { +// XXX hardcoded ptr impl +// return ptr + '.pos'; + return ptr; + } + + function makeGetValue(ptr, pos, noNeedFirst) { + return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + ']'; + } + + function makeSetValue(ptr, pos, value, noNeedFirst) { + return makeGetSlab(ptr) + '[' + (noNeedFirst ? '0' : makeGetPos(ptr)) + (pos ? ' + ' + pos : '') + '] = ' + value; + } + + function makeEmptyStruct(type) { + print('// ??makeemptystruct?? ' + dump(type) + ' : ' + dump(TYPES)); +// XXX hardcoded ptr impl + var ret = []; + var typeData = TYPES[type]; + assertTrue(typeData); + for (var i = 0; i < typeData.flatSize; i++) { + ret.push(0); + } + return ret; + } + + // globalVariable + substrate.addZyme({ + selectItem: function(item) { return item.intertype == 'globalVariable' && !item.JS }, + processItem: function(item) { +//print('// zz global Var: ' + dump(item) + ' :: ' + dump(item.value)); + var value = item.value; + item.JS = 'var ' + item.ident + ' = '; + if (value.text == 'zeroinitializer') { + item.JS += makePointer(JSON.stringify(makeEmptyStruct(item.type))); + } else { + // Generate a constant + item.JS += parseConst(value); + } + item.JS += ';'; + item.__result__ = true; + return [item]; + }, + }); + + // Gets an entire constant expression + function parseConst(value) { +//print('//yyyyy ' + JSON.stringify(value)); + if (value.text[0] == '"') { + value.text = value.text.substr(1, value.text.length-2); + return makePointer('intArrayFromString("' + value.text + '")'); + } else { + // Gets an array of constant items, separated by ',' tokens + function handleSegments(tokens) { + // Handle a single segment (after comma separation) + function handleSegment(segment) { +//print('// seggg: ' + JSON.stringify(segment) + '\n') + if (segment[1].text == 'null') { + return 'null'; + } else if (segment[1].text == 'zeroinitializer') { + return JSON.stringify(makeEmptyStruct(segment[0].text)); + } else if (segment[1].text == 'getelementptr') { + return finalizeGetElementPtr(parseGetElementPtr(segment)); + } else if (segment[1].text == 'bitcast') { + return toNiceIdent(segment[2].item[0].tokens[1].text); + } else if (segment[1].text in searchable('inttoptr', 'ptrtoint')) { + var type = segment[2].item[0].tokens.slice(-1)[0].text; + return handleSegment(segment[2].item[0].tokens.slice(0, -2)); + } else if (segment[1].text == 'add') { + var subSegments = splitTokenList(segment[2].item[0].tokens); + return '(' + handleSegment(subSegments[0]) + ' + ' + handleSegment(subSegments[1]) + ')'; + } else if (segment[1].type == '{') { + return '[' + handleSegments(segment[1].tokens) + ']'; + } else if (segment.length == 2) { + return toNiceIdent(segment[1].text); + } else { +//print('// seggg: ' + JSON.stringify(segment) + '???!???\n') + return '???!???'; + } + }; + return splitTokenList(tokens).map(handleSegment).join(', '); + } + if (value.item) { + // list of items + return makePointer('[ ' + handleSegments(value.item[0].tokens) + ' ]'); + } else if (value.type == '{') { +//print('// qqq!\n') + // struct + return makePointer('[ ' + handleSegments(value.tokens) + ' ]'); + } else { +print('// failzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value)); + return 'X?X?X?X?X?X'; + } + } + } + + // globalConstant + substrate.addZyme({ + selectItem: function(item) { return item.intertype == 'globalConstant' && !item.JS }, + processItem: function(item) { + if (item.ident == '_llvm_global_ctors') { + item.JS = '\n__globalConstructor__ = function() {\n' + + item.ctors.map(function(ctor) { return ' ' + toNiceIdent(ctor) + '();' }).join('\n') + + '\n}\n'; + } else if (item.type.text == 'external') { + item.JS = 'var ' + item.ident + ' = ' + '0; /* external value? */'; + } else { +//print('// cheeckit ' + dump(item)); + // VAR + //item.JS = 'var ' + item.ident + ' = ' + parseConst(item.value) + ';'; + // GETTER - lazy loading, fixes issues with ordering + item.JS = 'this.__defineGetter__("' + item.ident + '", function() { return ' + parseConst(item.value) + ' });'; + } + item.__result__ = true; + return [item]; + }, + }); + + // functionStub + substrate.addZyme({ + selectItem: function(item) { return item.intertype == 'functionStub' && !item.JS }, + processItem: function(item) { + item.JS = '// stub for ' + item.ident; + item.__result__ = true; + return [item]; + }, + }); + + // function splitter + substrate.addZyme({ + selectItem: function(item) { return item.intertype == 'function' && !item.passes.splitted }, + processItem: function(item) { + var ret = [item]; + item.splitItems = 0; + item.labels.forEach(function(label) { + label.lines.forEach(function(line) { + line.func = item.ident; + line.funcData = item; + line.parentLabel = label.ident; + ret.push(line); + item.splitItems ++; + }); + }); + + item.passes.splitted = true; + return ret; + }, + }); + // function reconstructor & post-JS optimizer + substrate.addZyme({ + select: function(items) { + var func = items.filter(function(item) { return item.intertype == 'function' && item.passes.splitted })[0]; + if (!func) return []; + var lines = items.filter(function(item) { return item.JS && item.func === func.ident }); + if (lines.length === 0) return []; + return [func].concat(lines); + }, + process: function(items) { + var func = items[0]; + var lines = items.slice(1); + + lines.forEach(function(line) { + delete line.funcData; // clean up + + var label = func.labels.filter(function(label) { return label.ident == line.parentLabel })[0]; + label.lines = label.lines.map(function(line2) { + return (line2.lineNum !== line.lineNum) ? line2 : line; + }); + }); + + func.splitItems -= lines.length; + if (func.splitItems === 0) { + postJSOptimize(func); + + // Final recombination +//print('zz params::::: ' + JSON.stringify(func.params)); +//print('zz params::::: ' + JSON.stringify(parseParamTokens(func.params.item[0].tokens))); + + var params = parseParamTokens(func.params.item[0].tokens).map(function(param) { + return toNiceIdent(param.ident); + }).join(', '); + + func.JS = '\nfunction ' + func.ident + '(' + params + ') {\n'; + if (LINEDEBUG) func.JS += " print(INDENT + 'Entering: " + func.ident + "'); INDENT += ' ';\n"; + + // Walk function blocks and generate JS + function walkBlock(block, indent) { + if (!block) return ''; +//print('block: ' + dump(block) + ' ::: ' + dump(getLabelIds(block.labels))); + function getLabelLines(label, indent) { +//print('LABELLINES HAS INDENT ' + indent.length + ' ::' + label.lines[0].JS); + return label.lines.map(function(line) { return indent + line.JS + (line.comment ? ' // ' + line.comment : '') }).join('\n'); + } + var ret = ''; + if (block.type == 'emulated' || block.type == 'simple') { +//print('BLOCK HAS INDENT ' + indent.length); +//print('block has: ' + block.entry + ' :: ' + getLabelIds(block.labels)); + if (block.labels.length > 1) { + ret += indent + 'var __label__ = ' + getLabelId(block.entry) + '; /* ' + block.entry + ' */\n'; + ret += indent + 'while(1) switch(__label__) {\n'; + ret += block.labels.map(function(label) { + return indent + ' case ' + getLabelId(label.ident) + ':\n' + getLabelLines(label, indent + ' '); + }).join('\n'); + ret += '\n' + indent + '}'; + } else { + ret += getLabelLines(block.labels[0], indent); + } + ret += '\n'; + } else if (block.type == 'loop') { +// if (mustGetTo(first.outLabels[0], [first.ident, first.outLabels[1]])) { /* left branch must return here, or go to right branch */ + ret += indent + block.entry + ': while(1) {\n'; + ret += walkBlock(block.inc, indent + ' '); + ret += walkBlock(block.rest, indent + ' '); + ret += indent + '}\n'; + } else if (block.type == 'breakingif') { + ret += walkBlock(block.check, indent); + ret += indent + block.entry + ': do { if (' + block.ifVar + ') {\n'; + ret += walkBlock(block.ifTrue, indent + ' '); + ret += indent + '} } while(0);\n'; + } else if (block.type == 'if') { + ret += walkBlock(block.check, indent); + ret += indent + 'if (' + block.ifVar + ') {\n'; + ret += walkBlock(block.ifTrue, indent + ' '); + ret += indent + '}\n'; + } else { + ret = 'XXXXXXXXX!'; + } + return ret + walkBlock(block.next, indent); + } + func.JS += walkBlock(func.block, ' '); + // Finalize function + if (LINEDEBUG) func.JS += " INDENT = INDENT.substr(0, INDENT.length-2);\n"; + func.JS += '}\n'; + func.__result__ = true; + } + + return [func]; + }, + }); + function postJSOptimize(func) { + // Some optimizations are easier to apply after JS-ing the code - for example, a lot + // of stuff can end up as x = y; for example, bitcasts, or nativized, etc. If we + // we to optimize pre-JS, we would need to be aware of all of that. + } + + function getVarData(funcData, ident) { + if (funcData.variables[ident]) { + return funcData.variables[ident].impl; + } else { + return 'emulated'; // All global are emulated + } + } + + // 'assign' + substrate.addZyme(makeSplitter('intertype', 'assign', 'JS', 'value', ['funcData'])); + substrate.addZyme(makeCombiner('intertype', 'assign', 'JS', 'JS', function (item) { + // 'var', since this is SSA - first assignment is the only assignment, and where it is defined + item.JS = (item.overrideSSA ? '' : 'var ') + toNiceIdent(item.ident); + + var type = item.value.type.text; + var value = item.value.JS; +//print("zz var: " + item.JS); + var impl = getVarData(item.funcData, item.ident); + switch (impl) { + case VAR_NATIVE: break; + case VAR_NATIVIZED: { + // SSA, so this must be the alloca. No need for a value + if (!item.overrideSSA) value = ''; + break; + } + case VAR_EMULATED: { +// value = '(((((' + value + ')))))'; + break; + } + default: print('zz unknown impl: ' + impl); + } + if (value) + item.JS += ' = ' + value; + item.JS += ';'; + if (LINEDEBUG && value) { + item.JS += '\nprint(INDENT + "' + item.ident + ' == " + JSON.stringify(' + item.ident + '));'; + item.JS += '\nprint(INDENT + "' + item.ident + ' == " + (' + item.ident + ' && ' + item.ident + '.toString ? ' + item.ident + '.toString() : ' + item.ident + '));'; + } + })); + + // Function lines + function makeFuncLineZyme(intertype, func) { + substrate.addZyme({ + selectItem: function(item) { return item.intertype == intertype && !item.JS }, + processItem: function(item) { + item.JS = func(item); + if (!item.JS) throw "XXX - no JS generated for " + dump(item); + return [item]; + }, + }); + } + makeFuncLineZyme('store', function(item) { +//print('// zzqqzz ' + dump(item.value) + ' :::: ' + dump(item.pointer) + ' :::: '); + var ident = toNiceIdent(item.ident); + var value; + if (item.value.intertype == 'getelementptr') { + value = finalizeGetElementPtr(item.value); + } else { + value = toNiceIdent(item.value.ident); + } +//print("// zz seek " + ident + ',' + dump(item)); + var impl = getVarData(item.funcData, item.ident); + var ret; + switch (impl) { + case VAR_NATIVIZED: ret = ident + ' = ' + value + ';'; break; // We have the actual value here + case VAR_EMULATED: ret = makeSetValue(ident, 0, value) + ';'; break; + default: print('zz unknown [store] impl: ' + impl); + } + if (LINEDEBUG && value) { + ret += '\nprint(INDENT + "' + ident + ' == " + JSON.stringify(' + ident + '));'; + ret += '\nprint(INDENT + "' + ident + ' == " + (' + ident + ' && ' + ident + '.toString ? ' + ident + '.toString() : ' + ident + '));'; + } + return ret; + }); + + var LABEL_IDs = {}; + var LABEL_ID_COUNTER = 0; + function getLabelId(label) { +//print('needs id: ' + label + ' : ' + JSON.stringify(LABEL_IDs)); + label = toNiceIdent(label); + if (label in LABEL_IDs) return LABEL_IDs[label]; + return LABEL_IDs[label] = LABEL_ID_COUNTER ++; + } + + makeFuncLineZyme('branch', function(item) { +//print('branch: ' + dump(item)); + function getIt(tf) { + if (tf[0] == 'B') { + if (tf[1] == 'R') { + return 'break ' + tf.substr(5) + ';'; + } else if (tf[1] == 'C') { + return 'continue ' + tf.substr(5) + ';'; + } else { // NOPP + return ';'; // Returning no text might confuse this parser + } + } else { + return '__label__ = ' + getLabelId(tf) + '; break;'; + } + } + if (!item.ident) { + return getIt(item.label); + } else { + var labelTrue = getIt(item.labelTrue); + var labelFalse = getIt(item.labelFalse); + if (labelTrue == ';' && labelFalse == ';') return ';'; + var head = 'if (' + item.ident + ') { '; + var head2 = 'if (!(' + item.ident + ')) { '; + var else_ = ' } else { '; + var tail = ' }'; + if (labelTrue == ';') { + return head2 + labelFalse + tail; + } else if (labelFalse == ';') { + return head + labelTrue + tail; + } else { + return head + labelTrue + else_ + labelFalse + tail; + } + } + }); + makeFuncLineZyme('return', function(item) { + var ret = ''; + if (LINEDEBUG) ret += "INDENT = INDENT.substr(0, INDENT.length-2);\n"; + ret += 'return'; + if (item.value) { + ret += ' ' + toNiceIdent(item.value); + } + return ret + ';'; + }); + makeFuncLineZyme('invoke', function(item) { + var ret = 'try { '; + ret += makeFunctionCall(item.ident, item.params); + ret += '; __label__ = ' + getLabelId(item.toLabel) + '; } catch(e) { __label__ = ' + getLabelId(item.unwindLabel) + '; }; break;'; + return ret; + }); + makeFuncLineZyme('load', function(item) { +print('// zz LOAD ' + dump(item) + ' :: ' + dump(item.tokens)); + var ident = toNiceIdent(item.ident); + var impl = getVarData(item.funcData, item.ident); + switch (impl) { + case VAR_NATIVIZED: { + return ident; // We have the actual value here + } + case VAR_EMULATED: return makeGetValue(ident); + default: return "unknown [load] impl: " + impl; + } + }); + makeFuncLineZyme('alloca', function(item) { + if (pointingLevels(item.allocatedType.text) == 0 && isStructType(item.allocatedType.text)) { + return makePointer(JSON.stringify(makeEmptyStruct(item.allocatedType.text))); + } else { + return makePointer('[0]'); + } + }); + makeFuncLineZyme('mathop', function(item) { with(item) { + switch (item.op) { + case 'add': return ident + ' + ' + ident2; + case 'sub': return ident + ' - ' + ident2; + case 'sdiv': case 'udiv': return 'Math.floor(' + ident + ' / ' + ident2 + ')'; + case 'mul': return ident + ' * ' + ident2; + case 'urem': case 'srem': return 'Math.floor(' + ident + ' % ' + ident2 + ')'; + case 'fadd': return ident + ' + ' + ident2; + case 'fsub': return ident + ' - ' + ident2; + case 'fdiv': return ident + ' / ' + ident2; + case 'fmul': return ident + ' * ' + ident2; + case 'uitofp': case 'sitofp': return ident; + case 'fptoui': case 'fptosi': return 'Math.floor(' + ident + ')'; + case 'icmp': { + switch (variant) { + case 'uge': case 'sge': return '0+(' + ident + ' >= ' + ident2 + ')'; + case 'ule': case 'sle': return '0+(' + ident + ' <= ' + ident2 + ')'; + case 'ugt': case 'sgt': return '0+(' + ident + ' > ' + ident2 + ')'; + case 'ult': case 'slt': return '0+(' + ident + ' < ' + ident2 + ')'; + case 'ne': case 'une': return '0+(' + ident + ' != ' + ident2 + ')'; + case 'eq': return '0+(' + ident + ' == ' + ident2 + ')'; + } + } + case 'fcmp': { + switch (variant) { + case 'uge': case 'sge': return '0+(' + ident + ' >= ' + ident2 + ')'; + case 'ule': case 'sle': return '0+(' + ident + ' <= ' + ident2 + ')'; + case 'ugt': case 'sgt': return '0+(' + ident + ' > ' + ident2 + ')'; + case 'ult': case 'slt': return '0+(' + ident + ' < ' + ident2 + ')'; + case 'ne': case 'une': return '0+(' + ident + ' != ' + ident2 + ')'; + case 'eq': return '0+(' + ident + ' == ' + ident2 + ')'; + } + } + case 'zext': case 'fpext': case 'trunc': case 'sext': return ident; + case 'select': return '(' + ident + ' ? ' + ident3 + ' : ' + ident4 + ')'; + } + return '&*&*&*&*&*&' + item.op; + } }); +/* + return [{ + __result__: true, + intertype: 'mathop', + op: op, + variant: variant, + type: item.tokens[1].text, + ident: item.tokens[2].text, + value: item.tokens[4].text, + lineNum: item.lineNum, + }]; +*/ + + function getGetElementPtrIndexes(item) { + var ident = item.ident; + var type = item.params[0].type.text; // param 0 == type + // struct pointer, struct*, and getting a ptr to an element in that struct. Param 1 is which struct, then we have items in that + // struct, and possibly further substructures, all embedded + // can also be to 'blocks': [8 x i32]*, not just structs + type = removePointing(type); + var indexes = [makeGetPos(ident)]; + var offset = toNiceIdent(item.params[1].ident); + if (offset != 0) { + indexes.push((isStructType(type) && TYPES[type].flatSize != 1 ? TYPES[type].flatSize + '*' : '') + offset); + } + item.params.slice(2, item.params.length).forEach(function(arg) { + var curr = toNiceIdent(arg.ident); + // TODO: If index is constant, optimize + var typeData = TYPES[type]; + if (isStructType(type) && typeData.needsFlattening) { + if (typeData.flatFactor) { + indexes.push(curr + '*' + typeData.flatFactor); + } else { + indexes.push(toNiceIdent(type) + '___FLATTENER[' + curr + ']'); + } + } else { + if (curr != 0) { + indexes.push(curr); + } + } + type = TYPES[type] ? TYPES[type].fields[curr] : ''; + }); + return indexes.join('+'); + } + + function finalizeGetElementPtr(item) { +//print('//zz finalize: ' + dump(item.params)); + // TODO: statically combine indexes here if consts + return makePointer(makeGetSlab(item.ident), getGetElementPtrIndexes(item)); + } + + function finalizeBitcast(item) { +//print('//zz finalizeBC: ' + dump(item)); + return item.ident; + } + + // aka makeStruct + function pourShape(shape, typeName) { + var type = TYPES[typeName]; +//print('// zz pour: ' + typeName); + type.fields.forEach(function(field) { +//print('// zz pour field: ' + field); + if (isNumberType(field)) { + shape.push(0); + } else if (isPointerType(field)) { + shape.push({slab:[], pos: 0}); + } else if (isStructType(field)) { + var subShape = []; + pourShape(subShape, field); + shape = shape.concat(subShape); + } +//print('// zz poured so far: ' + dump(shape)); + }); + } + + makeFuncLineZyme('bitcast', function(item) { + // XXX Don't we need to copy ptr - i.e. create new ones (at least if uses > just the next line)? + var ident = toNiceIdent(item.ident); +// if (pointingLevels(item.type.text) > 0 && !isStructPointerType(item.type.text) && isStructPointerType(item.type2.text)) { +// // Converting to a struct pointer - need to add struct shapes +// var shape = []; +// pourShape(shape, removePointing(item.type2.text)); +// +// return '_ensureStructures(' + ident + ', 1 /* XXX */, ' + JSON.stringify(shape) + ')'; +// } else { + return ident; +// } + }); + function makeFunctionCall(ident, params) { +//print('// zz makeFC: ' + ident + ' : ' + dump(params)); + var params = params.map(function(param) { + if (param.intertype === 'getelementptr') { + return finalizeGetElementPtr(param); + } else if (param.intertype === 'bitcast') { + return finalizeBitcast(param); + } else { + return toNiceIdent(param.ident); //.value;//'??arg ' + param.intertype + '??'; + } + }); + return ident + '(' + params.join(', ') + ')'; + } + makeFuncLineZyme('getelementptr', function(item) { return finalizeGetElementPtr(item) }); + makeFuncLineZyme('call', function(item) { + return makeFunctionCall(item.ident, item.params) + (item.standalone ? ';' : ''); + }); + + // Optimzed intertypes + + makeFuncLineZyme('fastgetelementptrload', function(item) { +//print('// FAST ' + dump(item)); + return 'var ' + item.ident + ' = ' + makeGetValue(item.value.ident, getGetElementPtrIndexes(item.value), true) + ';'; + }); + makeFuncLineZyme('fastgetelementptrstore', function(item) { +//print('// FAST ' + dump(item)); + return makeSetValue(item.value.ident, getGetElementPtrIndexes(item.value), item.ident, true) + ';'; + }); + + makeFuncLineZyme('unreachable', function(item) { return '// unreachable' }); + + // Final combiner + + function finalCombiner(items) { + var ret = items.filter(function(item) { return item.intertype == 'type' }); + ret = ret.concat(items.filter(function(item) { return item.intertype == 'globalConstant' })); + ret = ret.concat(items.filter(function(item) { return item.intertype == 'globalVariable' })); + ret = ret.concat(items.filter(function(item) { return item.intertype == 'function' })); + return ret.map(function(item) { return item.JS }).join('\n'); + } + + return read('preamble.js') + finalCombiner(substrate.solve()) + read('postamble.js'); +// return finalCombiner(substrate.solve()); +} + +// Main + +var lines = []; +var line; +do { + line = readline(); + if (line == null) break; + lines.push(line); +} while(true); +var data = lines.join("\n"); + +//print('zz prepared') +data = intertyper(data); +//print('zz intertyped') +data = analyzer(data); +//print('zz analyzed') +data = JSify(data); +print(data); + diff --git a/src/postamble.js b/src/postamble.js new file mode 100644 index 00000000..fe5d91f9 --- /dev/null +++ b/src/postamble.js @@ -0,0 +1,23 @@ + +// === Auto-generated postamble setup entry stuff === + +function run(args) { + var argc = args.length+1; + var argv = [Pointer_make(intArrayFromString("/bin/this.program")) ]; + for (var i = 0; i < argc-1; i = i + 1) { + argv.push(Pointer_make(intArrayFromString(args[i]))); + } + argv = Pointer_make(argv); + + __globalConstructor__(); + + _main(argc, argv); +} + +try { + run(arguments); +} catch (e) { + print("Fatal exception: " + e.stack); + throw e; +} + diff --git a/src/preamble.js b/src/preamble.js new file mode 100644 index 00000000..8f98c113 --- /dev/null +++ b/src/preamble.js @@ -0,0 +1,198 @@ + +// === Auto-generated preamble library stuff === + +function __globalConstructor__() { +} + +UNDEFINED = null; // None in python; good for pypy + +var HEAP = []; +var HEAPTOP = 0; + +function abort(text) { + text = "ABORT: " + text; + print(text + "\n"); +// print((new Error).stack); // for stack traces + print("\n"); + throw text; +} + +function Pointer_niceify(ptr) { +// XXX hardcoded ptr impl + return { slab: HEAP, pos: ptr }; +// if (!ptr.slab) +// return { slab: ptr[0], pos: ptr[1] }; +// else +// return ptr; +} + +function Pointer_make(slab, pos) { + pos = pos ? pos : 0; +// XXX hardcoded ptr impl + if (slab === HEAP) return pos; + // Flatten out - needed for global consts/vars + function flatten(slab) { + if (!slab || slab.length === undefined || typeof slab === 'function') return [slab]; + return slab.map(flatten).reduce(function(a,b) { return a.concat(b) }, []); + } + var slab = flatten(slab); + // Finalize + var ret = _malloc(Math.max(slab.length - pos, 1)); + for (var i = 0; i < slab.length - pos; i++) { + HEAP[ret + i] = slab[pos + i]; + } + return ret; +// return { slab: slab, pos: pos ? pos : 0 }; +} + +function Pointer_stringify(ptr) { + ptr = Pointer_niceify(ptr); + + var ret = ""; + var i = 0; + var t; + while (1) { +// if ((ptr.pos + i) >= ptr.slab.length) { return "<< Invalid read: " + (ptr.pos+i) + " : " + ptr.slab.length + " >>"; } else {} + if ((ptr.pos+i) >= ptr.slab.length) { break; } else {} + t = String.fromCharCode(ptr.slab[ptr.pos + i]); + if (t == "\0") { break; } else {} + ret = ret + t; + i = i + 1; + } + return ret; +} + +NULL = Pointer_make([]); + +function _malloc(size) { +// XXX hardcoded ptr impl + var ret = HEAPTOP; + HEAPTOP += size; + return ret; + // We don't actually do new Array(size) - memory is uninitialized anyhow +// return Pointer_make([]); +} + +__Znwj = _malloc; // Mangled "new" +__Znaj = _malloc; // Mangled "new" + +function _free(ptr) { +// XXX hardcoded ptr impl + // XXX TODO - actual implementation! Currently we leak it all + + // Nothing needs to be done! But we mark the pointer + // as invalid. Note that we should do it for all other + // pointers of this slab too. +// ptr.slab = null; +// ptr[0] = null; +} + +__ZdlPv = _free; // Mangled "delete" +__ZdaPv = _free; // Mangled "delete" + +// stdio.h + +function _printf() { + var text = Pointer_stringify(arguments[0]); + var textIndex; + for (var argIndex = 1; argIndex < arguments.length; argIndex = argIndex + 1) { + textIndex = text.indexOf("%d"); // We only support numbers - use puts for strings! + if (textIndex < 0) textIndex = text.indexOf("%f"); + if (textIndex < 0) break; + text = text.substr(0, textIndex) + String(arguments[argIndex]) + text.substr(textIndex + 2); + } + print(text); +} + +function _puts(p) { + _printf(p); +// print("\n"); // XXX print already always adds one +} + +function _putchar(p) { + print(String.fromCharCode(p)); +} + +function _strlen(p) { +// XXX hardcoded ptr impl + var q = p; + while (HEAP[q] != 0) q++; + return q - p; +// p = Pointer_niceify(p); +// return p.slab.length; // XXX might want to find the null terminator... +} + +// stdlib.h + +// Get a pointer, return int value of the string it points to +function _atoi(s) { + return Math.floor(Number(Pointer_stringify(s))); +} + +function _llvm_memcpy_i32(dest, src, num, idunno) { +// XXX hardcoded ptr impl + for (var i = 0; i < num; i++) { + HEAP[dest + i] = HEAP[src + i]; + } +// dest = Pointer_niceify(dest); +// src = Pointer_niceify(src); +// dest.slab = src.slab.slice(src.pos); +} + +// Tools +// println((new Error).stack); // for stack traces + +function println(text) { + print(text);// + "\n"); // XXX print already always adds one +} + +function jrint(label, obj) { + if (!obj) { + obj = label; + label = ''; + } else + label = label + ' : '; + print(label + JSON.stringify(obj)); +} + +function intArrayFromString(stringy) { + var ret = []; + var t; + var i = 0; + while (i < stringy.length) { + ret.push(stringy.charCodeAt(i)); + i = i + 1; + } + ret.push(0); + return ret; +} + +// Utilities + +function _HexToInt(stringy) { + var ret = 0; + var mul = 1; + var base; + for (var i = (stringy.length - 1); i >= 0; i = i - 1) { + if (stringy.charCodeAt(i) >= "A".charCodeAt(0)) { + base = "A".charCodeAt(0) - 10; + } else { + base = "0".charCodeAt(0); + } + ret = ret + (mul*(stringy.charCodeAt(i) - base)); + mul = mul * 16; + } + return ret; +} + +function _IEEEUnHex(stringy) { + var a = _HexToInt(stringy.substr(2, 8)); + var b = _HexToInt(stringy.substr(10)); + var e = (a >> ((52 - 32) & 0x7ff)) - 1023; + return ((((a & 0xfffff | 0x100000) * 1.0) / Math.pow(2,52-32)) * Math.pow(2, e)) + (((b * 1.0) / Math.pow(2, 52)) * Math.pow(2, e)); +} + +var INDENT = ''; // Debugging + +// === Body === + diff --git a/src/utility.js b/src/utility.js new file mode 100644 index 00000000..447c52a6 --- /dev/null +++ b/src/utility.js @@ -0,0 +1,82 @@ + +function dump(item) { + try { + return JSON.stringify(item); + } catch(e) { + var ret = []; + for (var i in item) { + var j = item[i]; + if (typeof j === 'string' || typeof j === 'number') { + ret.push(i + ': ' + j); + } else { + ret.push(i + ': [?]'); + } + } + return ret.join(', '); + } +} + +function dumpKeys(item) { + var ret = []; + for (var i in item) { + var j = item[i]; + if (typeof j === 'string' || typeof j === 'number') { + ret.push(i + ': ' + j); + } else { + ret.push(i + ': [?]'); + } + } + return ret.join(', '); +} + +function assertEq(a, b) { + if (a !== b) { + print("Stack: " + new Error().stack); + throw "Should have been equal: " + a + " : " + b; + } +} + +function assertTrue(a) { + assertEq(!!a, true); +} + +function dedup(items, ident) { + var seen = {}; + if (ident) { + return items.filter(function(item) { + if (seen[item[ident]]) return false; + seen[item[ident]] = true; + return true; + }); + } else { + return items.filter(function(item) { + if (seen[item]) return false; + seen[item] = true; + return true; + }); + } +} + +function range(size) { + var ret = []; + for (var i = 0; i < size; i++) ret.push(i); + return ret; +} + +function searchable() { + var ret = {}; + for (var i = 0; i < arguments.length; i++) { + ret[arguments[i]] = 0; + } + return ret; +} + +function walkJSON(item, func) { + func(item); + for (x in item) { + if (typeof item[x] === 'object') { + walkJSON(item[x], func); + } + } +} + diff --git a/tests/fannkuch.cpp b/tests/fannkuch.cpp new file mode 100644 index 00000000..c2fcfaa2 --- /dev/null +++ b/tests/fannkuch.cpp @@ -0,0 +1,159 @@ +/* + * The Computer Language Benchmarks Game + * http://shootout.alioth.debian.org/ + * + * Contributed by Eckehard Berns + * Based on code by Heiner Marxen + * and the ATS version by Hongwei Xi + * + * Modified for emscripten by azakai + */ + +#include <stdlib.h> +#include <stdio.h> + +struct worker_args { + int i, n; + struct worker_args *next; +}; + +int +fannkuch_worker(void *_arg) +{ + struct worker_args *args = (worker_args*)_arg; + int *perm1, *count, *perm; + int maxflips, flips, i, n, r, j, k, tmp; + + maxflips = 0; + n = args->n; + perm1 = (int*)malloc(n * sizeof(int)); + perm = (int*)malloc(n * sizeof(int)); + count = (int*)malloc(n * sizeof(int)); + for (i = 0; i < n; i++) + perm1[i] = i; + perm1[args->i] = n - 1; + perm1[n - 1] = args->i; + r = n; + + for (;;) { + for (; r > 1; r--) + count[r - 1] = r; + if (perm1[0] != 0 && perm1[n - 1] != n - 1) { + for (i = 0; i < n; i++) + perm[i] = perm1[i]; + flips = 0; + k = perm[0]; + do { + for (i = 1, j = k - 1; i < j; i++, j--) { + tmp = perm[i]; + perm[i] = perm[j]; + perm[j] = tmp; + } + flips++; + tmp = perm[k]; + perm[k] = k; + k = tmp; + } while (k); + if (maxflips < flips) + maxflips = flips; + } + for (;;) { + if (r >= n - 1) { + free(perm1); + free(perm); + free(count); + return maxflips; + } + + { + int p0 = perm1[0]; + for (i = 0; i < r; i++) + perm1[i] = perm1[i + 1]; + perm1[i] = p0; + } + if (--count[r] > 0) + break; + r++; + } + } +} + +static int +fannkuch(int n) +{ + struct worker_args *args, *targs; + int showmax = 30; + int *perm1, *count, i, r, maxflips, flips; + + args = NULL; + for (i = 0; i < n - 1; i++) { + targs = (worker_args*)malloc(sizeof(struct worker_args)); + targs->i = i; + targs->n = n; + targs->next = args; + args = targs; + } + + perm1 = (int*)malloc(n * sizeof(int)); + count = (int*)malloc(n * sizeof(int)); + + for (i = 0; i < n; i++) + perm1[i] = i; + + r = n; + for (;;) { + if (showmax) { + for (i = 0; i < n; i++) + printf("%d", perm1[i] + 1); + printf("\n"); + showmax--; + } else + goto cleanup; + + for (; r > 1; r--) + count[r - 1] = r; + + for (;;) { + if (r == n) + goto cleanup; + { + int p0 = perm1[0]; + for (i = 0; i < r; i++) + perm1[i] = perm1[i + 1]; + perm1[i] = p0; + } + if (--count[r] > 0) + break; + + r++; + } + } + + cleanup: + free(perm1); + free(count); + maxflips = 0; + while (args != NULL) { + flips = (int)fannkuch_worker(args); + if (maxflips < flips) + maxflips = flips; + targs = args; + args = args->next; + free(targs); + } + return maxflips; +} + +int +main(int ac, char **av) +{ + int n = ac > 1 ? atoi(av[1]) : 0; + + if (n < 1) { + printf("Wrong argument.\n"); + return 1; + } + printf("Pfannkuchen(%d) = %d.\n", n, fannkuch(n)); + return 0; +} + diff --git a/tests/fannkuch.js b/tests/fannkuch.js new file mode 100644 index 00000000..f8554270 --- /dev/null +++ b/tests/fannkuch.js @@ -0,0 +1,70 @@ +/* The Computer Language Benchmarks Game + http://shootout.alioth.debian.org/ + contributed by Isaac Gouy + modified by Matthew Wilson */ + +function fannkuch(n) { + var check = 0; + var perm = Array(n); + var perm1 = Array(n); + var count = Array(n); + var maxPerm = Array(n); + var maxFlipsCount = 0; + var m = n - 1; + + for (var i = 0; i < n; i++) perm1[i] = i; + var r = n; + + while (true) { + // write-out the first 30 permutations + if (check < 30){ + var s = ""; + for(var i=0; i<n; i++) s += (perm1[i]+1).toString(); + print(s); + check++; + } + + while (r != 1) { count[r - 1] = r; r--; } + if (!(perm1[0] == 0 || perm1[m] == m)) { + for (var i = 0; i < n; i++) perm[i] = perm1[i]; + + var flipsCount = 0; + var k; + + while (true) { + k = perm[0] + if (k == 0) break; + var k2 = (k + 1) >> 1; + for (var i = 0; i < k2; i++) { + var temp = perm[i]; perm[i] = perm[k - i]; perm[k - i] = temp; + } + flipsCount++; + } + + if (flipsCount > maxFlipsCount) { + maxFlipsCount = flipsCount; + for (var i = 0; i < n; i++) maxPerm[i] = perm1[i]; + } + } + + while (true) { + if (r == n) return maxFlipsCount; + var perm0 = perm1[0]; + var i = 0; + while (i < r) { + var j = i + 1; + perm1[i] = perm1[j]; + i = j; + } + perm1[r] = perm0; + + count[r] = count[r] - 1; + if (count[r] > 0) break; + r++; + } + } +} + +var n = parseInt(arguments[0]); +print("Pfannkuchen(" + n.toString() + ") = " + fannkuch(n).toString()); + diff --git a/tests/fasta.cpp b/tests/fasta.cpp new file mode 100644 index 00000000..7954feef --- /dev/null +++ b/tests/fasta.cpp @@ -0,0 +1,180 @@ +/* The Computer Language Benchmarks Game + http://shootout.alioth.debian.org/ + contributed by Andrew Moon +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +struct Random { + enum { IM = 139968, IA = 3877, IC = 29573 }; + Random() : last(42) {} + float get( float max = 1.0f ) { + last = ( last * IA + IC ) % IM; + return max * last / IM; + } +protected: + unsigned int last; +} rng; + +struct IUB { + int c; + double p; + unsigned int pi; +}; + +struct Cumulative { + enum { slots = 512, }; + + Cumulative( IUB *start ) { + double p = 0; + for ( IUB *iter = start; iter->c; ++iter ) { + p += iter->p; + iter->p = p < 1.0 ? p : 1.0; + iter->pi = (unsigned int )( iter->p * slots ); + } + + for ( unsigned int i = 0; i <= slots; i++ ) { + while ( i > start->pi ) { + ++start; + } + + table[i] = start; + } + } + + const char operator[] ( float pct ) const { + IUB *iter = table[(unsigned int )( pct * slots )]; + while ( iter->p < pct ) + ++iter; + return iter->c; + } + +protected: + IUB *table[slots + 1]; +}; + +static const size_t lineLength = 60; + +struct LineBuffer { + LineBuffer() : lastN(0) {} + LineBuffer &genrand( Cumulative &table, size_t N ) { + //assert(N <= lineLength); + for ( size_t i = 0; i < N; i++ ) + buffer[i] = table[rng.get()]; + buffer[N] = '\n'; + buffer[N+1] = '\0'; + lastN = N + 1; + return *this; + } + void writeline() { puts(buffer); } +protected: + char buffer[lineLength + 2]; + size_t lastN; +}; + +struct RotatingString { + RotatingString( const char *in ) : pos(0) { + size = strlen( in ); + buffer = new char[size + lineLength]; + memcpy( buffer, in, size ); + memcpy( buffer + size, in, lineLength ); + } + ~RotatingString() { delete[] buffer; } + void write( size_t bytes ) { + char* temp = new char[bytes+2]; + memcpy(temp, buffer + pos, bytes); + temp[bytes] = '\n'; + temp[bytes] = '\0'; + puts(temp); + delete temp; + pos += bytes; + if ( pos > size ) + pos -= size; + } +protected: + char *buffer; + size_t size, pos; +}; + +template< class Output > +void makeFasta( const char *id, const char *desc, size_t N, Output &output ) { + while ( N ) { + const size_t bytes = N < lineLength ? N : lineLength; + output.writeline( bytes ); + N -= bytes; + } +} + +struct Repeater { + Repeater( const char *alu ) : rot(alu) {} + void writeline( size_t bytes ) { rot.write( bytes ); } + void run( const char *id, const char *desc, size_t N ) { + makeFasta( id, desc, N, *this ); + } +protected: + RotatingString rot; +}; + +struct Randomized { + Randomized( IUB *start ) : table(start) {} + void writeline( size_t bytes ) { line.genrand(table, bytes).writeline(); } + void run( const char *id, const char *desc, size_t N ) { + makeFasta( id, desc, N, *this ); + } +protected: + Cumulative table; + LineBuffer line; +}; + +IUB iub[] = { + { 'a', 0.27, 0 }, + { 'c', 0.12, 0 }, + { 'g', 0.12, 0 }, + { 't', 0.27, 0 }, + + { 'B', 0.02, 0 }, + { 'D', 0.02, 0 }, + { 'H', 0.02, 0 }, + { 'K', 0.02, 0 }, + { 'M', 0.02, 0 }, + { 'N', 0.02, 0 }, + { 'R', 0.02, 0 }, + { 'S', 0.02, 0 }, + { 'V', 0.02, 0 }, + { 'W', 0.02, 0 }, + { 'Y', 0.02, 0 }, + { 0, 0, 0 }, +}; + +IUB homosapiens[] = { + { 'a', 0.3029549426680, 0 }, + { 'c', 0.1979883004921, 0 }, + { 'g', 0.1975473066391, 0 }, + { 't', 0.3015094502008, 0 }, + { 0, 0, 0 }, +}; + +static const char alu[] = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG" + "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA" + "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA" + "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT" + "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC" + "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG" + "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; + +int main( int argc, const char *argv[] ) { + const size_t n = ( argc > 1 ) ? atoi( argv[1] ) : 512; + + Repeater(alu) + .run( "ONE", "Homo sapiens alu", n*2 ); + Randomized(iub) + .run( "TWO", "IUB ambiguity codes", n*3 ); + Randomized(homosapiens) + .run( "THREE", "Homo sapiens frequency", n*5 ); + + return 0; +} + diff --git a/tests/fasta.js b/tests/fasta.js new file mode 100644 index 00000000..f8bca39c --- /dev/null +++ b/tests/fasta.js @@ -0,0 +1,71 @@ +// The Computer Language Benchmarks Game +// http://shootout.alioth.debian.org +// +// Contributed by Ian Osgood +// Largely rewritten by Matthew Wilson + +function fastaRepeat(n, seq) { + var seqi = 0, len = seq.length, i, j, k, l, block, + str = Array(len*60+1).join(seq), lines = Array(i=j=len*len); + while (--j>-1) { lines[j] = str.substr(60*j, 60) } + block = lines.join("\n"); + for (j=0, k=Math.floor((l=Math.floor(n/60))/i); j<k; ++j) { print(block) } + for (j = 0, k = l % i; j < k; ++j) { print(lines[j]) } + if (n % 60 > 0) { print(lines[k].substr(0, n % 60)) } +} + +var rand=(function() { + var Last = 42; + return function() { return (Last=(Last * 3877 + 29573) % 139968) / 139968 } +})(); + +function printLineMaker(table) { + var h = 0, k = [], v = [], c, l=0; + for (c in table) { l = v[h] = table[k[h++] = c]+=l; } + return function(x) { + var line = ""; + next: for (var i=0; i<x; ++i) { + var r = rand(), j=0; + for (;;++j) { + if (r < v[j]) { + line += k[j]; + continue next; + } + } + } + print(line); + } +} + +function fastaRandom(n, table) { + var printLine=printLineMaker(table); + while ((n -= 60) > -1) { printLine(60) } + if (n<0 && n>-60) { printLine(60 + n) } +} + +(function main(n) { + var ALU = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; + + var IUB = { a:0.27, c:0.12, g:0.12, t:0.27, B:0.02, D:0.02, H:0.02, K:0.02, + M:0.02, N:0.02, R:0.02, S:0.02, V:0.02, W:0.02, Y:0.02 } + + var HomoSap = { + a:0.3029549426680, c:0.1979883004921, g:0.1975473066391, t:0.3015094502008 + } + + print(">ONE Homo sapiens alu") + fastaRepeat(2*n, ALU) + + print(">TWO IUB ambiguity codes") + fastaRandom(3*n, IUB) + + print(">THREE Homo sapiens frequency") + fastaRandom(5*n, HomoSap) +}).call(this, 1*arguments[0]*1) + diff --git a/tests/runner.py b/tests/runner.py new file mode 100644 index 00000000..e675815d --- /dev/null +++ b/tests/runner.py @@ -0,0 +1,473 @@ +''' +Simple test runner + +See settings.cfg file for options¶ms. Edit as needed. +''' + +from subprocess import Popen, PIPE, STDOUT +import os, unittest, tempfile, shutil, time + +# Params + +def path_from_root(pathelems): + return os.path.join(os.path.sep, *(((os.path.abspath(os.path.dirname(__file__)).split(os.sep)))[:-1] + pathelems)) + +exec(open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'settings.cfg'), 'r').read()) + +def timeout_run(proc, timeout, note): + start = time.time() + while time.time() - start < timeout and proc.poll() is None: + time.sleep(0.1) + if proc.poll() is None: + proc.kill() + raise Exception("Timed out: " + note) + return proc.communicate()[0] + +class T(unittest.TestCase): + def do_test(self, src, expected_output, args=[], output_nicerizer=None, no_python=False, no_build=False): + global DEBUG + dirname = TEMP_DIR + '/tmp' # tempfile.mkdtemp(dir=TEMP_DIR) + if not os.path.exists(dirname): + os.makedirs(dirname) + filename = os.path.join(dirname, 'src.cpp') + if not no_build: + f = open(filename, 'w') + f.write(src) + f.close() + if DEBUG: print "[[C++ => LLVM]]" + output = Popen([LLVM_GCC, '-emit-llvm', '-c', filename, '-o', filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0] + if DEBUG: print output + if DEBUG: print "[[LLVM => JS]]" + if False: + # Use an llc backend, written in C++, to generate JS + output = Popen([LLC, '-march='+LLVM_BACKEND, filename + '.o', '-o=' + filename + '.o.cpp'], stdout=PIPE, stderr=STDOUT).communicate()[0] + elif False: + # Use python parser to generate JS from disassembled llvm + output = Popen([LLVM_DIS, filename + '.o', '-o=' + filename + '.o.llvm'], stdout=PIPE, stderr=STDOUT).communicate()[0] + if DEBUG: print output + output = Popen(['python', PY_PARSER, filename + '.o.llvm'], stdout=open(filename + '.o.js', 'w'), stderr=STDOUT).communicate()[0] + else: + # JS parser/compiler + output = Popen([LLVM_DIS, filename + '.o', '-o=' + filename + '.o.llvm'], stdout=PIPE, stderr=STDOUT).communicate()[0] + if DEBUG: print output + cwd = os.getcwd() + os.chdir(path_from_root(['src'])) + output = timeout_run(Popen([PARSER_ENGINE] + PARSER_OPTS + [JS_COMPILER], stdin=open(filename + '.o.llvm', 'r'), stdout=open(filename + '.o.js', 'w'), stderr=STDOUT), 20, 'Parser') + os.chdir(cwd) + # return + if DEBUG: print output + output = open(filename + '.o.js').read() + if output is not None and 'Traceback' in output: print output; assert (0) # 'generating JavaScript failed' + if DEBUG: print "\nGenerated JavaScript:\n\n===\n\n%s\n\n===\n\n" % output + # if not DEBUG: + js_output = timeout_run(Popen([JS_ENGINE] + JS_ENGINE_OPTS + [filename + '.o.js'] + args, stdout=PIPE, stderr=STDOUT), 20, 'Execution') + if output_nicerizer is not None: + js_output = output_nicerizer(js_output) + # else: + # print "[[JS output]]" + # ret = "Output shown on screen, test not actually run!" + # Popen([JS_ENGINE, filename + '.o.js'] + args, stderr=STDOUT).communicate()[0] + self.assertContained(expected_output, js_output) + self.assertNotContained('ERROR', js_output) + return + + if not no_python: + #DEBUG = True + SPIDERMONKEY = True + if SPIDERMONKEY: + if DEBUG: print "[[RJS ==> SpiderMonkey parsed tree]]" + args = [SPIDERMONKEY_SHELL, '-e', 'parse(snarf(\"%s\"))' % (filename + '.o.js')] + output = Popen(args, stdout=PIPE, stderr=STDOUT).communicate()[0] + f = open(filename + 'o.js.sm', 'w') + f.write(output) + f.close() + else: + if DEBUG: print "[[RJS ==> RPython]]" + output = Popen(['python', RJS_RPYTHON, filename + '.o.js', filename + '.o.js.py'], stdout=PIPE, stderr=STDOUT).communicate()[0] + if DEBUG: print output + + py_output = Popen(['python', filename + '.o.js.py'] + args, stdout=PIPE, stderr=STDOUT).communicate()[0] + if output_nicerizer is not None: + py_output = output_nicerizer(py_output) + self.assertContained(expected_output, py_output) + if js_output != py_output: + print "WARNING: js and py outputs not identical (but each is similar enough to the expected_output)" + + PYPY = True +# PYPY = False + if PYPY: + pypy_source = filename.replace('.', '_') + '_o_js_py.py' + if DEBUG: print "[[RPython ==> PyPy]]" + output = Popen(['python', RJS_PYPY, filename + '.o.js.py', pypy_source], stdout=PIPE, stderr=STDOUT).communicate()[0] + print output + +# # Python on pypy-ready source + # pypy_output = Popen(['python', pypy_source] + args, stdout=PIPE, stderr=STDOUT).communicate()[0] + # if output_nicerizer is not None: + # pypy_output = output_nicerizer(pypy_output) + # self.assertContained(expected_output, pypy_output) + # if js_output != pypy_output: + # print "WARNING: js and PYpy outputs not identical (but each is similar enough to the expected_output)" + + # PyPy compilation of source to binary + +# shutil.rmtree(dirname) + + def assertContained(self, value, string): + if value not in string: + print "Expected to find '%s' in '%s'" % (value, string) + self.assertTrue(value in string) + + def assertNotContained(self, value, string): + if value in string: + print "Expected to NOT find '%s' in '%s'" % (value, string) + self.assertTrue(value not in string) + + def test_hello_world(self): + src = ''' + #include <stdio.h> + int main() + { + printf("hello, world!\\n"); + return 0; + } + ''' + self.do_test(src, 'hello, world!') + + def test_intvars(self): + src = ''' + #include <stdio.h> + int main() + { + int x = 5; + int y = x+17; + int z = (y-1)/2; // Should stay an integer after division! + y += 1; + int w = x*3+4; + int k = w < 15 ? 99 : 101; + int i = k > 100; // Should be an int, not a bool! + printf("*%d,%d,%d,%d,%d,%d*\\n", x, y, z, w, k,i); + return 0; + } + ''' + self.do_test(src, '*5,23,10,19,101,1*') + + def test_if(self): + src = ''' + #include <stdio.h> + int main() + { + int x = 5; + if (x > 3) { + printf("*yes*\\n"); + } + return 0; + } + ''' + self.do_test(src, '*yes*') + + def test_loop(self): + src = ''' + #include <stdio.h> + int main() + { + int x = 5; + for (int i = 0; i < 6; i++) + x += x*i; + printf("*%d*\\n", x); + return 0; + } + ''' + self.do_test(src, '*3600*') + + def test_strings(self): + src = ''' + #include <stdio.h> + #include <stdlib.h> + int main(int argc, char **argv) + { + printf("*%d", argc); + puts(argv[1]); + puts(argv[2]); + printf("%d*", atoi(argv[3])+2); + return 0; + } + ''' + self.do_test(src, '*4*wowie*too*76*', ['wowie', 'too', '74'], lambda x: x.replace('\n', '*')) + + def test_funcs(self): + src = ''' + #include <stdio.h> + int funcy(int x) + { + return x*9; + } + int main() + { + printf("*%d,%d*\\n", funcy(8), funcy(10)); + return 0; + } + ''' + self.do_test(src, '*72,90*') + + def test_structs(self): + src = ''' + #include <stdio.h> + struct S + { + int x, y; + }; + int main() + { + S a, b; + a.x = 5; a.y = 6; + b.x = 101; b.y = 7009; + S *c, *d; + c = &a; + c->x *= 2; + c = &b; + c->y -= 1; + d = c; + d->y += 10; + printf("*%d,%d,%d,%d,%d,%d,%d,%d*\\n", a.x, a.y, b.x, b.y, c->x, c->y, d->x, d->y); + return 0; + } + ''' + self.do_test(src, '*10,6,101,7018,101,7018,101,7018*') + + gen_struct_src = ''' + #include <stdio.h> + #include <stdlib.h> + struct S + { + int x, y; + }; + int main() + { + S* a = {{gen_struct}}; + a->x = 51; a->y = 62; + printf("*%d,%d*\\n", a->x, a->y); + {{del_struct}}(a); + return 0; + } + ''' + + def test_mallocstruct(self): + self.do_test(self.gen_struct_src.replace('{{gen_struct}}', '(S*)malloc(sizeof(S))').replace('{{del_struct}}', 'free'), '*51,62*') + + def test_newstruct(self): + self.do_test(self.gen_struct_src.replace('{{gen_struct}}', 'new S').replace('{{del_struct}}', 'delete'), '*51,62*') + + def test_addr_of_stacked(self): + src = ''' + #include <stdio.h> + void alter(int *y) + { + *y += 5; + } + int main() + { + int x = 2; + alter(&x); + printf("*%d*\\n", x); + return 0; + } + ''' + self.do_test(src, '*7*') + + def test_linked_list(self): + src = ''' + #include <stdio.h> + struct worker_args { + int value; + struct worker_args *next; + }; + int main() + { + worker_args a; + worker_args b; + a.value = 60; + a.next = &b; + b.value = 900; + b.next = NULL; + worker_args* c = &a; + int total = 0; + while (c) { + total += c->value; + c = c->next; + } + printf("*%d*\\n", total); + return 0; + } + ''' + self.do_test(src, '*960*') + + def test_class(self): + src = ''' + #include <stdio.h> + struct Random { + enum { IM = 139968, IA = 3877, IC = 29573 }; + Random() : last(42) {} + float get( float max = 1.0f ) { + last = ( last * IA + IC ) % IM; + return max * last / IM; + } + protected: + unsigned int last; + } rng1; + int main() + { + Random rng2; + int count = 0; + for (int i = 0; i < 100; i++) { + float x1 = rng1.get(); + float x2 = rng2.get(); + printf("%f, %f\\n", x1, x2); + if (x1 != x2) count += 1; + } + printf("*%d*\\n", count); + return 0; + } + ''' + self.do_test(src, '*0*') + + def test_inherit(self): + src = ''' + #include <stdio.h> + struct Parent { + int x1, x2; + }; + struct Child : Parent { + int y; + }; + int main() + { + Parent a; + a.x1 = 50; + a.x2 = 87; + Child b; + b.x1 = 78; + b.x2 = 550; + b.y = 101; + Child* c = (Child*)&a; + c->x1 ++; + c = &b; + c->y --; + printf("*%d,%d,%d,%d,%d,%d,%d*\\n", a.x1, a.x2, b.x1, b.x2, b.y, c->x1, c->x2); + return 0; + } + ''' + self.do_test(src, '*51,87,78,550,100,78,550*') + + def test_polymorph(self): + src = ''' + #include <stdio.h> + struct Parent { + virtual int getit() { return 11; }; + }; + struct Child : Parent { + int getit() { return 74; } + }; + int main() + { + Parent *x = new Parent(); + Parent *y = new Child(); + printf("*%d,%d*\\n", x->getit(), y->getit()); + return 0; + } + ''' + self.do_test(src, '*11,74*') + + def zzzzzzzzzzzzzzztest_constglobalstructs(self): # TODO: make this work + src = ''' + #include <stdio.h> + struct IUB { + int c; + double p; + unsigned int pi; + }; + + IUB iub[] = { + { 'a', 0.27, 5 }, + { 'c', 0.15, 4 }, + { 'g', 0.12, 3 }, + { 't', 0.27, 2 }, + }; + + int main( int argc, const char *argv[] ) { + printf("*%d,%d,%d*\\n", iub[0].c, int(iub[1].p*100), iub[2].pi); + return 0; + } + ''' + self.do_test(src, '*97,15,3*') + + def test_conststructs(self): + src = ''' + #include <stdio.h> + struct IUB { + int c; + double p; + unsigned int pi; + }; + + int main( int argc, const char *argv[] ) { + IUB iub[] = { + { 'a', 0.27, 5 }, + { 'c', 0.15, 4 }, + { 'g', 0.12, 3 }, + { 't', 0.27, 2 }, + }; + printf("*%d,%d,%d*\\n", iub[0].c, int(iub[1].p*100), iub[2].pi); +// printf("*%d*\\n", int(iub[1].p*100)); + return 0; + } + ''' + self.do_test(src, '*97,15,3*') + + + def test_memcpy(self): + src = ''' + #include <stdio.h> + #include <string.h> + + int main( int argc, const char *argv[] ) { + int *a = new int[10]; + int *b = new int[1]; + int *c = new int[10]; + for (int i = 0; i < 10; i++) + a[i] = 2; + *b = 5; + for (int i = 0; i < 10; i++) + c[i] = 8; + printf("*%d,%d,%d,%d,%d*\\n", a[0], a[9], *b, c[0], c[9]); + // Should overwrite a, but not touch b! + memcpy(a, c, 10*sizeof(int)); + printf("*%d,%d,%d,%d,%d*\\n", a[0], a[9], *b, c[0], c[9]); + return 0; + } + ''' + self.do_test(src, '*2,2,5,8,8*\n*8,8,5,8,8*') + + def test_fannkuch(self): + results = [ (1,0), (2,1), (3,2), (4,4), (5,7), (6,10), (7, 16), (8,22) ] + for i, j in results: + src = open(path_from_root(['tests', 'fannkuch.cpp']), 'r').read() + self.do_test(src, 'Pfannkuchen(%d) = %d.' % (i,j), [str(i)], no_build=i>1) + + def zzztest_fasta(self): + results = [ (1,'''GG*ctt**tgagc**'''), (20,'''GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT*cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg**tacgtgtagcctagtgtttgtgttgcgttatagtctatttgtggacacagtatggtcaaa**tgacgtcttttgatctgacggcgttaacaaagatactctg**'''), +(50,'''GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA*TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT*cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg**tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa**NtactMcSMtYtcMgRtacttctWBacgaa**agatactctgggcaacacacatacttctctcatgttgtttcttcggacctttcataacct**ttcctggcacatggttagctgcacatcacaggattgtaagggtctagtggttcagtgagc**ggaatatcattcgtcggtggtgttaatctatctcggtgtagcttataaatgcatccgtaa**gaatattatgtttatttgtcggtacgttcatggtagtggtgtcgccgatttagacgtaaa**ggcatgtatg**''') ] + for i, j in results: + src = open(path_from_root(['tests', 'fasta.cpp']), 'r').read() + self.do_test(src, j, [str(i)], lambda x: x.replace('\n', '*'), no_python=True, no_build=i>1) + +if __name__ == '__main__': + if DEBUG: print "LLVM_GCC:", LLVM_GCC + if DEBUG: print "LLC:", LLC + if DEBUG: print "PARSER:", PARSER + if DEBUG: print "JS_ENGINE:", JS_ENGINE + for cmd in [LLVM_GCC, JS_ENGINE]: + if DEBUG: print "Checking for existence of", cmd + assert(os.path.exists(cmd)) + unittest.main() + diff --git a/tests/settings.cfg b/tests/settings.cfg new file mode 100644 index 00000000..34f90c87 --- /dev/null +++ b/tests/settings.cfg @@ -0,0 +1,16 @@ +DEBUG=False +TEMP_DIR='/dev/shm' +LLVM_GCC=os.path.expanduser('~/Dev/llvm/llvm-gcc-27/cbuild/bin/bin/llvm-g++') +LLVM_DIS=os.path.expanduser('~/Dev/llvm/llvm-27/cbuild/bin/llvm-dis') +PY_PARSER=path_from_root(['llvm-parser', 'parser.py']) +SPIDERMONKEY_SHELL=os.path.expanduser(os.path.expanduser('~/Dev/tracemonkey/js/src/js')) +V8_ENGINE=os.path.expanduser('~/Dev/v8/d8') # Note: Fails in test_strings etc. due to commandline arguments parsing +JS_ENGINE=SPIDERMONKEY_SHELL +JS_ENGINE_OPTS=[]#['-j'] +PARSER_ENGINE=SPIDERMONKEY_SHELL +PARSER_OPTS=[]#['-j'] +#PARSER_ENGINE=V8_ENGINE +#PARSER_OPTS = [] +#JS_ENGINE=V8_ENGINE +JS_COMPILER=path_from_root(['src', 'parser.js']) + |