diff options
-rw-r--r-- | src/analyzer.js | 10 | ||||
-rw-r--r-- | src/jsifier.js | 52 | ||||
-rw-r--r-- | src/parseTools.js | 32 | ||||
-rw-r--r-- | src/postamble.js | 7 | ||||
-rw-r--r-- | src/preamble.js | 16 | ||||
-rw-r--r-- | src/settings.js | 15 | ||||
-rw-r--r-- | src/utility.js | 6 | ||||
-rw-r--r-- | tests/runner.py | 61 | ||||
-rw-r--r-- | tests/settings.py | 12 |
9 files changed, 178 insertions, 33 deletions
diff --git a/src/analyzer.js b/src/analyzer.js index 93e61f1c..13c5c648 100644 --- a/src/analyzer.js +++ b/src/analyzer.js @@ -92,7 +92,7 @@ function analyzer(data) { if (isNumberType(type) || isPointerType(type)) return; data.types[type] = { name_: type, - fields: [ 'int32' ], // XXX + fields: [ 'i32' ], // XXX flatSize: 1, lineNum: '?', }; @@ -166,9 +166,13 @@ function analyzer(data) { var sizes = []; type.flatIndexes = type.fields.map(function(field) { var soFar = type.flatSize; - var size = 1; - if (isStructType(field)) { + var size; + if (isNumberType(field) || isPointerType(field)) { + size = getNativeFieldSize(field); + } else if (isStructType(field)) { size = item.types[field].flatSize; + } else { + assert(0); } type.flatSize += size; sizes.push(size); diff --git a/src/jsifier.js b/src/jsifier.js index 305f4a8f..381b4cf7 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -66,6 +66,33 @@ function JSify(data) { return ret; } + function alignStruct(values, type) { + dprint('types', 'alignStruct: ' + dump(type)); + // XXX hardcoded ptr impl + var ret = []; + var typeData = TYPES[type]; + assertTrue(typeData); + var i = 0; + while (i < values.length) { + var currField = typeData.fields[i]; + var currValue = values[i]; + if (isStructType[currField]) { + var fieldTypeData = TYPES[currField]; + assertTrue(fieldTypeData); + ret = ret.concat(alignStruct(values.slice(i, fieldTypeData.fields.length), currField)); + i += fieldTypeData.fields.length; + } else { + ret.push(currValue); + // pad to align, unless it's a structure and already aligned + if (currValue[0] != '[') { + ret = ret.concat(zeros(getNativeFieldSize(currField)-1)); + } + i += 1; + } + } + return ret; + } + // Gets an entire constant expression function parseConst(value, type) { dprint('gconst', '//yyyyy ' + JSON.stringify(value) + ',' + type + '\n'); @@ -95,25 +122,27 @@ function JSify(data) { var subSegments = splitTokenList(segment[2].item[0].tokens); return '(' + handleSegment(subSegments[0]) + ' + ' + handleSegment(subSegments[1]) + ')'; } else if (segment[1].type == '{') { - return '[' + handleSegments(segment[1].tokens) + ']'; + // struct + var type = segment[0].text; + return '[' + alignStruct(handleSegments(segment[1].tokens), type).join(', ') + ']'; } else if (segment[1].type == '[') { - return '[' + handleSegments(segment[1].item[0].tokens) + ']'; + return '[' + handleSegments(segment[1].item[0].tokens).join(', ') + ']'; // XXX alignStruct? } else if (segment.length == 2) { return parseNumerical(toNiceIdent(segment[1].text)); } else { throw 'Invalid segment: ' + dump(segment); } }; - return splitTokenList(tokens).map(handleSegment).map(parseNumerical).join(', '); + return splitTokenList(tokens).map(handleSegment).map(parseNumerical); } if (value.item) { // list of items - return makePointer('[ ' + handleSegments(value.item[0].tokens) + ' ]'); + return makePointer('[ ' + alignStruct(handleSegments(value.item[0].tokens), type).join(', ') + ' ]'); } else if (value.type == '{') { // struct - return makePointer('[ ' + handleSegments(value.tokens) + ' ]'); + return makePointer('[ ' + alignStruct(handleSegments(value.tokens), type).join(', ') + ' ]'); } else if (value[0]) { - return makePointer('[ ' + handleSegments(value[0].tokens) + ' ]'); + return makePointer('[ ' + alignStruct(handleSegments(value[0].tokens), type).join(', ') + ' ]'); } else { throw '// failzzzzzzzzzzzzzz ' + dump(value.item) + ' ::: ' + dump(value); } @@ -571,7 +600,11 @@ function JSify(data) { var indexes = [makeGetPos(ident)]; var offset = toNiceIdent(item.params[1].ident); if (offset != 0) { - indexes.push((isStructType(type) && TYPES[type].flatSize != 1 ? TYPES[type].flatSize + '*' : '') + offset); + if (isStructType(type)) { + indexes.push((TYPES[type].flatSize != 1 ? TYPES[type].flatSize + '*' : '') + offset); + } else { + indexes.push(getNativeFieldSize(type, true) + '*' + offset); + } } item.params.slice(2, item.params.length).forEach(function(arg) { var curr = toNiceIdent(arg.ident); @@ -585,7 +618,7 @@ function JSify(data) { } } else { if (curr != 0) { - indexes.push(curr); + indexes.push(curr); // XXX QUANTUM_SIZE? } } type = TYPES[type] ? TYPES[type].fields[curr] : ''; @@ -669,7 +702,8 @@ function JSify(data) { substrate.addItems(data.functions, 'FunctionSplitter'); substrate.addItems(data.functionStubs, 'FunctionStub'); - return preprocess(read('preamble.js')) + finalCombiner(substrate.solve()) + read('postamble.js'); + var params = { 'QUANTUM_SIZE': QUANTUM_SIZE }; + return preprocess(read('preamble.js'), params) + finalCombiner(substrate.solve()) + preprocess(read('postamble.js'), params); // return finalCombiner(substrate.solve()); } diff --git a/src/parseTools.js b/src/parseTools.js index cdd20e2f..5d868920 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -1,8 +1,11 @@ // Various tools for parsing llvm // Simple #if/else/endif preprocessing for a file. Checks if the -// ident checked is true in our global. -function preprocess(text) { +// ident checked is true in our global. Also replaces some constants +function preprocess(text, constants) { + for (constant in constants) { + text = text.replace(eval('/' + constant + '/g'), constants[constant]); + } var lines = text.split('\n'); var ret = ''; var show = true; @@ -375,3 +378,28 @@ function getLabelIds(labels) { return labels.map(function(label) { return label.ident }); } +//! Returns the size of a field, as C/C++ would have it (in 32-bit, +//! for now). +//! @param field The field type, by name +//! @param alone Whether this is inside a structure (so padding is +//! used) or alone (line in char*, where no padding is done). +function getNativeFieldSize(field, alone) { + var size; + if (QUANTUM_SIZE > 1) { + size = { + 'i1': alone ? 1 : 4, // inside a struct, aligned to 4, + 'i8': alone ? 1 : 4, // most likely...? XXX + 'i32': 4, + 'i64': 8, + 'float': 4, + 'double':8, + }[field]; // XXX 32/64 bit stuff + if (!size) { + size = 4; // Must be a pointer XXX 32/64 + } + } else { + size = 1; + } + return size; +} + diff --git a/src/postamble.js b/src/postamble.js index c156cd21..a34370b2 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -3,9 +3,16 @@ function run(args) { var argc = args.length+1; + function pad() { + for (var i = 0; i < QUANTUM_SIZE-1; i++) { + argv.push(0); + } + } var argv = [Pointer_make(intArrayFromString("/bin/this.program")) ]; + pad(); for (var i = 0; i < argc-1; i = i + 1) { argv.push(Pointer_make(intArrayFromString(args[i]))); + pad(); } argv = Pointer_make(argv); diff --git a/src/preamble.js b/src/preamble.js index e458384d..f53f50ad 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -8,8 +8,10 @@ var __THREW__ = false; // Used in checking for thrown exceptions. var __ATEXIT__ = []; -var HEAP = intArrayFromString('(null)'); // So printing %s of NULL gives '(null)' -var HEAPTOP = HEAP.length+1; // Leave 0 as an invalid address, 'NULL' +var HEAP = []; +var HEAPTOP = 0; +Pointer_make(intArrayFromString('(null)')); // So printing %s of NULL gives '(null)' + // Also this ensures we leave 0 as an invalid address, 'NULL' #if SAFE_HEAP // Semi-manual memory corruption debugging @@ -89,14 +91,16 @@ function Pointer_stringify(ptr) { if ((ptr.pos+i) >= ptr.slab.length) { break; } else {} t = String.fromCharCode(ptr.slab[ptr.pos + i]); if (t == "\0") { break; } else {} - ret = ret + t; - i = i + 1; + ret += t; + i += 1; } return ret; } function _malloc(size) { // XXX hardcoded ptr impl + size = Math.ceil(size/QUANTUM_SIZE)*QUANTUM_SIZE; // Allocate blocks of proper minimum size + // Also keeps HEAPTOP aligned var ret = HEAPTOP; HEAPTOP += size; return ret; @@ -139,7 +143,7 @@ function __formatString() { if (curr == '%'.charCodeAt(0) && ['d', 'f', '.'].indexOf(String.fromCharCode(next)) != -1) { var argText = String(arguments[argIndex]); // Handle very very simply formatting, namely only %.Xf - if (HEAP[textIndex+1] == '.'.charCodeAt(0)) { + if (next == '.'.charCodeAt(0)) { var limit = parseInt(String.fromCharCode(HEAP[textIndex+2])); var dotIndex = argText.indexOf('.'); argText = argText.substr(0, dotIndex+1+limit); @@ -156,7 +160,7 @@ function __formatString() { textIndex += 2; } else { ret.push(curr); - textIndex ++; + textIndex += 1; } } return Pointer_make(ret); diff --git a/src/settings.js b/src/settings.js index f84391fd..f6282ed4 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1,3 +1,18 @@ +// Tuning +QUANTUM_SIZE = 1; // This is the size of an individual field in a structure. 1 would + // lead to e.g. doubles and chars both taking 1 memory address. This + // is a form of 'compressed' memory, with shrinking and stretching + // according to the type, when compared to C/C++. On the other hand + // 8 means all fields take 8 memory addresses, so a double takes + // the same as a char. Note that we only actually store something in + // the top address - the others are just empty, an 'alignment cost' + // of sorts. + // + // llvm-gcc works with 1. However, clang uses llvm_memcpy for various + // things, and the number of bytes it copies is hardcoded. A simple + // way to prevent problems with that is to set QUANTUM_SIZE to 8. + // See the 'copyop' automatic test. + // Code embetterments OPTIMIZE = 1; // Optimize llvm operations into js commands RELOOP = 0; // Recreate js native loops from llvm data XXX - disabled pending optimizing rewrite diff --git a/src/utility.js b/src/utility.js index e4f20b7e..8907eb91 100644 --- a/src/utility.js +++ b/src/utility.js @@ -79,6 +79,12 @@ function range(size) { return ret; } +function zeros(size) { + var ret = []; + for (var i = 0; i < size; i++) ret.push(0); + return ret; +} + function searchable() { if (typeof arguments[0] === 'object') arguments = arguments[0]; var ret = {}; diff --git a/tests/runner.py b/tests/runner.py index 519494ed..53c553d0 100644 --- a/tests/runner.py +++ b/tests/runner.py @@ -27,7 +27,7 @@ def timeout_run(proc, timeout, note): return proc.communicate()[0] class T(unittest.TestCase): - def do_test(self, src, expected_output, args=[], output_nicerizer=None, output_processor=None, no_python=False, no_build=False, main_file=None, emscripten_settings=[]): + def do_test(self, src, expected_output, args=[], output_nicerizer=None, output_processor=None, no_python=False, no_build=False, main_file=None): global DEBUG dirname = TEMP_DIR + '/tmp' # tempfile.mkdtemp(dir=TEMP_DIR) if not os.path.exists(dirname): @@ -66,16 +66,15 @@ class T(unittest.TestCase): output = Popen([LLVM_DIS, filename + '.o', '-o=' + filename + '.o.llvm'], stdout=PIPE, stderr=STDOUT).communicate()[0] if DEBUG: print output # Run Emscripten - if type(emscripten_settings) not in [list, tuple]: - emscripten_settings = [emscripten_settings] - Popen([EMSCRIPTEN, filename + '.o.llvm', PARSER_ENGINE] + emscripten_settings, stdout=open(filename + '.o.js', 'w'), stderr=STDOUT).communicate() + emscripten_settings = ['{ "QUANTUM_SIZE": %d }' % QUANTUM_SIZE] + timeout_run(Popen([EMSCRIPTEN, filename + '.o.llvm', PARSER_ENGINE] + emscripten_settings, stdout=open(filename + '.o.js', 'w'), stderr=STDOUT), 120, 'Compiling') output = open(filename + '.o.js').read() if output_processor is not None: output_processor(output) if output is not None and 'Traceback' in output: print output; assert (0) # 'generating JavaScript failed' if DEBUG: print "\nGenerated JavaScript:\n\n===\n\n%s\n\n===\n\n" % output #assert(0) # XXX - js_output = timeout_run(Popen([JS_ENGINE] + JS_ENGINE_OPTS + [filename + '.o.js'] + args, stdout=PIPE, stderr=STDOUT), 60, 'Execution') + js_output = timeout_run(Popen([JS_ENGINE] + JS_ENGINE_OPTS + [filename + '.o.js'] + args, stdout=PIPE, stderr=STDOUT), 120, 'Execution') if output_nicerizer is not None: js_output = output_nicerizer(js_output) self.assertContained(expected_output, js_output) @@ -473,18 +472,19 @@ class T(unittest.TestCase): }; int main( int argc, const char *argv[] ) { + int before = 70; IUB iub[] = { { 'a', 0.3029549426680, 5 }, { 'c', 0.15, 4 }, { 'g', 0.12, 3 }, { 't', 0.27, 2 }, }; - printf("*%d,%d,%d,%d*\\n", iub[0].c, int(iub[1].p*100), iub[2].pi, int(iub[0].p*10000)); -// printf("*%d*\\n", int(iub[1].p*100)); + int after = 90; + printf("*%d,%d,%d,%d,%d,%d*\\n", before, iub[0].c, int(iub[1].p*100), iub[2].pi, int(iub[0].p*10000), after); return 0; } ''' - self.do_test(src, '*97,15,3,3029*') + self.do_test(src, '*70,97,15,3,3029,90*') def test_ptrtoint(self): src = ''' @@ -637,6 +637,36 @@ class T(unittest.TestCase): ''' self.do_test(src, '*staticccz*') + def test_copyop(self): + # clang generated code is vulnerable to this, as it uses + # memcpy for assignments, with hardcoded numbers of bytes + # (llvm-gcc copies items one by one). See QUANTUM_SIZE in + # settings.js. + src = ''' + #include <stdio.h> + #include <math.h> + + struct vec { + double x,y,z; + vec() : x(0), y(0), z(0) { }; + vec(const double a, const double b, const double c) : x(a), y(b), z(c) { }; + }; + + struct basis { + vec a, b, c; + basis(const vec& v) { + a=v; // should not touch b! + printf("*%.2f,%.2f,%.2f*\\n", b.x, b.y, b.z); + } + }; + + int main() { + basis B(vec(1,0,0)); + return 0; + } + ''' + self.do_test(src, '*0,0,0*') + def test_nestedstructs(self): src = ''' #include <stdio.h> @@ -684,7 +714,12 @@ class T(unittest.TestCase): return 0; } ''' - self.do_test(src, '*4,0,1,2,2,3|5,0,1,1,2,3,3,4|6,0,5,0,1,1,2,3,3,4*') + if QUANTUM_SIZE == 1: + # Compressed memory + self.do_test(src, '*4,0,1,2,2,3|5,0,1,1,2,3,3,4|6,0,5,0,1,1,2,3,3,4*') + else: + # Bloated memory; same layout as C/C++ + self.do_test(src, '*16,0,4,8,8,12|20,0,4,4,8,12,12,16|24,0,20,0,4,4,8,12,12,16*') def test_fannkuch(self): results = [ (1,0), (2,1), (3,2), (4,4), (5,7), (6,10), (7, 16), (8,22) ] @@ -715,14 +750,16 @@ def make_test(compiler): class TT(T): def setUp(self): global COMPILER - COMPILER=compiler + COMPILER = compiler['path'] + global QUANTUM_SIZE + QUANTUM_SIZE = compiler['quantum_size'] return TT for name in COMPILERS.keys(): - exec('T_' + name + ' = make_test(COMPILERS["' + name + '"])') + exec('T_%s = make_test(COMPILERS["%s"])' % (name, name)) del T # T is just a shape for the specific subclasses, we don't test it itself if __name__ == '__main__': - for cmd in COMPILERS.values() + [LLVM_DIS, PARSER_ENGINE, JS_ENGINE]: + for cmd in map(lambda compiler: compiler['path'], COMPILERS.values()) + [LLVM_DIS, PARSER_ENGINE, JS_ENGINE]: print "Checking for existence of", cmd assert(os.path.exists(cmd)) print "Running Emscripten tests..." diff --git a/tests/settings.py b/tests/settings.py index 53765fbe..92f35203 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -3,7 +3,17 @@ TEMP_DIR='/dev/shm' CLANG=os.path.expanduser('~/Dev/llvm-2.7/cbuild/bin/clang++') LLVM_GCC=os.path.expanduser('~/Dev/llvm-gcc-2.7/cbuild/install/bin/llvm-g++') -COMPILERS = { 'clang': CLANG, 'llvm_gcc': LLVM_GCC } + +COMPILERS = { + 'clang': { + 'path': CLANG, + 'quantum_size': 4, # See settings.js + }, + 'llvm_gcc': { + 'path': LLVM_GCC, + 'quantum_size': 1, + } +} COMPILER_OPTS = ['-m32'] # Need to build as 32bit arch, for now - # various errors on 64bit compilation |