diff options
-rw-r--r-- | src/jsifier.js | 2 | ||||
-rw-r--r-- | src/parseTools.js | 157 | ||||
-rwxr-xr-x | tests/runner.py | 78 |
3 files changed, 134 insertions, 103 deletions
diff --git a/src/jsifier.js b/src/jsifier.js index 2c0332e3..32c224b7 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -487,7 +487,7 @@ function JSify(data, functionsOnly, givenFunctions) { } else { // If this is not linkable, anything not in the library is definitely missing if (!LINKABLE && !LibraryManager.library.hasOwnProperty(shortident) && !LibraryManager.library.hasOwnProperty(shortident + '__inline')) { - if (VERBOSE) printErr('warning: missing function: ' + shortident); + if (VERBOSE || WARN_ON_UNDEFINED_SYMBOLS) printErr('warning: unresolved symbol: ' + shortident); LibraryManager.library[shortident] = new Function("Module['printErr']('missing function: " + shortident + "'); abort(-1);"); } item.JS = addFromLibrary(shortident); diff --git a/src/parseTools.js b/src/parseTools.js index 06e33028..5fc4cd9e 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -241,7 +241,21 @@ function isFunctionType(type, out) { returnType = 'i8*'; // some pointer type, no point in analyzing further } if (out) out.returnType = returnType; - var argText = type.substr(lastOpen); + // find ( that starts the arguments + var depth = 0, i = type.length-1, argText = null; + while (i >= 0) { + var curr = type[i]; + if (curr == ')') depth++; + else if (curr == '(') { + depth--; + if (depth == 0) { + argText = type.substr(i); + break; + } + } + i--; + } + assert(argText); return isFunctionDef({ text: argText, item: tokenize(argText.substr(1, argText.length-2), true) }, out); } @@ -583,7 +597,9 @@ function parseLLVMFunctionCall(segment) { function eatLLVMIdent(tokens) { var ret; if (tokens[0].text in PARSABLE_LLVM_FUNCTIONS) { - ret = parseLLVMFunctionCall([{text: 'i0'}].concat(tokens.slice(0,2))).ident; // TODO: Handle more cases, return a full object, process it later + var item = parseLLVMFunctionCall([{text: '?'}].concat(tokens.slice(0,2))); // TODO: Handle more cases, return a full object, process it later + if (item.intertype == 'bitcast') checkBitcast(item); + ret = item.ident; tokens.shift(); tokens.shift(); } else { @@ -1630,80 +1646,83 @@ function makeGetSlabs(ptr, type, allowMultiple, unsigned) { return []; } -function finalizeLLVMFunctionCall(item, noIndexizeFunctions) { - if (item.intertype == 'getelementptr') { // TODO finalizeLLVMParameter on the ident and the indexes? - return makePointer(makeGetSlabs(item.ident, item.type)[0], getGetElementPtrIndexes(item), null, item.type); - } - if (item.intertype == 'bitcast') { - // Warn about some types of casts, then fall through to the handling code below - var oldType = item.params[0].type; - var newType = item.type; - if (isPossiblyFunctionType(oldType) && isPossiblyFunctionType(newType)) { - var oldInfo = {}, newInfo = {}; - var oldCount = countNormalArgs(oldType, oldInfo); - var newCount = countNormalArgs(newType, newInfo); - var warned = false; - function showWarning() { - if (warned) return; - warned = true; - if (VERBOSE || ASM_JS) { - warnOnce('Casting potentially incompatible function pointer ' + oldType + ' to ' + newType + ', for ' + item.params[0].ident.slice(1)); - } else { - warnOnce('Casting a function pointer type to a potentially incompatible one (use VERBOSE=1 to see more)'); - } - if (ASM_JS) warnOnce('Incompatible function pointer casts are very dangerous with ASM_JS=1, you should investigate and correct these'); +function checkBitcast(item) { + // Warn about some types of casts, then fall through to the handling code below + var oldType = item.params[0].type; + var newType = item.type; + if (isPossiblyFunctionType(oldType) && isPossiblyFunctionType(newType)) { + var oldInfo = {}, newInfo = {}; + var oldCount = countNormalArgs(oldType, oldInfo); + var newCount = countNormalArgs(newType, newInfo); + var warned = false; + function showWarning() { + if (warned) return; + warned = true; + if (VERBOSE || ASM_JS) { + warnOnce('Casting potentially incompatible function pointer ' + oldType + ' to ' + newType + ', for ' + item.params[0].ident.slice(1)); + } else { + warnOnce('Casting a function pointer type to a potentially incompatible one (use VERBOSE=1 to see more)'); } - if (oldCount != newCount && oldCount && newCount) { + warnOnce('See checkBitcast in src/parseTools.js for more information on dangerous function pointer casts'); + if (ASM_JS) warnOnce('Incompatible function pointer casts are very dangerous with ASM_JS=1, you should investigate and correct these'); + } + if (oldCount != newCount && oldCount && newCount) { + showWarning(); + + // General concerns + // ================ + // + // This may be dangerous as clang generates different code for C and C++ calling conventions. The only problem + // case appears to be passing a structure by value, C will have (field1, field2) as function args, and the + // function will internally create a structure with that data, while C++ will have (struct* byVal) and it + // will create a copy before calling the function, then call it with a pointer to the copy. Mixing the two + // first of all leads to two copies being made, so this is a bad idea even regardless of Emscripten. But, + // what is a problem for Emscripten is that mixing these two calling conventions (say, calling a C one from + // C++) will then assume that (struct* byVal) is actually the same as (field1, field2). In native code, this + // is easily possible, you place the two fields on the stack and call the function (you know to place the + // values since there is 'byVal'). In Emscripten, though, this means we would need to always do one or the + // other of the two possibilities, for example, always passing by-value structs as (field1, field2). This + // would slow down everything, just to handle this corner case. (Which, just to point out how much of a + // corner case it is, does not appear to happen with nested structures!) + // + // The recommended solution for this problem is not to mix C and C++ calling conventions when passing structs + // by value. Either always pass structs by value within C code or C++ code, but not mixing the two by + // defining a function in one and calling it from the other (so, just changing .c to .cpp, or moving code + // from one file to another, would be enough to fix this), or, do not pass structs by value (which in general + // is inefficient, and worth avoiding if you can). + // + // Note that removing all arguments is acceptable, as a vast to void ()*. + // + + // asm.js concerns + // =============== + // + // asm must be even more careful, any change in number of args can make function calls simply fail (since + // they will look in the wrong table). You should investigate each one to see if it is problematic, and + // adjust the source code to avoid potential issues. The warnings will tell you which types and variables + // are involved, look in the LLVM IR to see what is going on, and to connect that to the original source. + } + if (ASM_JS) { + if (oldCount != newCount) showWarning(); + else if (!isIdenticallyImplemented(oldInfo.returnType, newInfo.returnType)) { showWarning(); - - // General concerns - // ================ - // - // This may be dangerous as clang generates different code for C and C++ calling conventions. The only problem - // case appears to be passing a structure by value, C will have (field1, field2) as function args, and the - // function will internally create a structure with that data, while C++ will have (struct* byVal) and it - // will create a copy before calling the function, then call it with a pointer to the copy. Mixing the two - // first of all leads to two copies being made, so this is a bad idea even regardless of Emscripten. But, - // what is a problem for Emscripten is that mixing these two calling conventions (say, calling a C one from - // C++) will then assume that (struct* byVal) is actually the same as (field1, field2). In native code, this - // is easily possible, you place the two fields on the stack and call the function (you know to place the - // values since there is 'byVal'). In Emscripten, though, this means we would need to always do one or the - // other of the two possibilities, for example, always passing by-value structs as (field1, field2). This - // would slow down everything, just to handle this corner case. (Which, just to point out how much of a - // corner case it is, does not appear to happen with nested structures!) - // - // The recommended solution for this problem is not to mix C and C++ calling conventions when passing structs - // by value. Either always pass structs by value within C code or C++ code, but not mixing the two by - // defining a function in one and calling it from the other (so, just changing .c to .cpp, or moving code - // from one file to another, would be enough to fix this), or, do not pass structs by value (which in general - // is inefficient, and worth avoiding if you can). - // - // Note that removing all arguments is acceptable, as a vast to void ()*. - // - - // asm.js concerns - // =============== - // - // asm must be even more careful, any change in number of args can make function calls simply fail (since - // they will look in the wrong table). You should investigate each one to see if it is problematic, and - // adjust the source code to avoid potential issues. The warnings will tell you which types and variables - // are involved, look in the LLVM IR to see what is going on, and to connect that to the original source. - } - if (ASM_JS) { - if (oldCount != newCount) showWarning(); - else if (!isIdenticallyImplemented(oldInfo.returnType, newInfo.returnType)) { - showWarning(); - } else { - for (var i = 0; i < oldCount; i++) { - if (!isIdenticallyImplemented(oldInfo.segments[i][0].text, newInfo.segments[i][0].text)) { - showWarning(); - break; - } + } else { + for (var i = 0; i < oldCount; i++) { + if (!isIdenticallyImplemented(oldInfo.segments[i][0].text, newInfo.segments[i][0].text)) { + showWarning(); + break; } } } } } +} + +function finalizeLLVMFunctionCall(item, noIndexizeFunctions) { + if (item.intertype == 'getelementptr') { // TODO finalizeLLVMParameter on the ident and the indexes? + return makePointer(makeGetSlabs(item.ident, item.type)[0], getGetElementPtrIndexes(item), null, item.type); + } + if (item.intertype == 'bitcast') checkBitcast(item); var temp = { op: item.intertype, variant: item.variant, diff --git a/tests/runner.py b/tests/runner.py index b94affb2..a92f4bc4 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -3376,17 +3376,41 @@ Exiting setjmp function, level: 0, prev_jmp: -1 if self.emcc_args is None: return self.skip('need c99') self.emcc_args += ['-std=c99'] src = open(path_from_root('tests', 'life.c'), 'r').read() - self.do_run(src, '''-------- - [][][] -[][] - [] - [] -[][] - - - --------- -''', ['8', '8', '25000'], force_c=True) + self.do_run(src, '''-------------------------------- +[] [] [][][] + [] [] [] [][] [] [] [] +[] [][] [][] [][][] [] + [] [] [] [] [][] [] [] + [] [][] [] [] [] [] [][][][] + [][] [][] [] [][][] [] [] + [] [][] [][] [][] [][][] + [][] [][][] [] [] + [][] [][] [] + [][][] + [] + + + + + [][][] + [] [][] [][] + [][] [] [][] [][] + [][] [][] + [] + [][] + [][] [] +[] [][] [] + [][][] [] + [] [][] +[] [] [] + [] +[] [] [] + [][][] + + [] + [][][] [] +-------------------------------- +''', ['0'], force_c=True) def test_array2(self): src = ''' @@ -9404,7 +9428,7 @@ Options that are modified or new in %s include: output = Popen([PYTHON, compiler, 'twopart_main.o'] + args, stdout=PIPE, stderr=PIPE).communicate() assert os.path.exists(target), '\n'.join(output) #print '\n'.join(output) - self.assertContained('is not a function', run_js(target, stderr=STDOUT)) + self.assertContained('missing function', run_js(target, stderr=STDOUT)) try_delete(target) # Combining those bc files into js should work @@ -9722,21 +9746,6 @@ f.close() self.assertContained('result: 62', run_js(os.path.join(self.get_dir(), 'a.out.js'))) - def test_asm_undefined(self): - src = r''' - #include <stdio.h> - extern void doit(); - int main(int argc, char **argv) { - if (argc == 121) doit(); - printf("done\n"); - return 1; - } - ''' - filename = self.in_dir('src.cpp') - open(filename, 'w').write(src) - out, err = Popen([PYTHON, EMCC, filename, '-s', 'ASM_JS=1', '-O2'], stderr=PIPE).communicate() - assert 'Unresolved symbol' in err, 'always warn on undefs in asm, since it breaks validation: ' + err - def test_redundant_link(self): lib = "int mult() { return 1; }" lib_name = os.path.join(self.get_dir(), 'libA.c') @@ -9862,7 +9871,7 @@ f.close() Popen([PYTHON, EMCC, 'testa.cpp', '-o', 'liba.js', '-s', 'BUILD_AS_SHARED_LIB=2', '-s', 'LINKABLE=1', '-s', 'NAMED_GLOBALS=1', '-I.']).communicate() Popen([PYTHON, EMCC, 'testb.cpp', '-o', 'libb.js', '-s', 'BUILD_AS_SHARED_LIB=2', '-s', 'LINKABLE=1', '-s', 'NAMED_GLOBALS=1', '-I.']).communicate() - Popen([PYTHON, EMCC, 'main.cpp', '-o', 'main.js', '-s', 'RUNTIME_LINKED_LIBS=["liba.js", "libb.js"]', '-s', 'NAMED_GLOBALS=1', '-I.']).communicate() + Popen([PYTHON, EMCC, 'main.cpp', '-o', 'main.js', '-s', 'RUNTIME_LINKED_LIBS=["liba.js", "libb.js"]', '-s', 'NAMED_GLOBALS=1', '-I.', '-s', 'LINKABLE=1']).communicate() Popen([PYTHON, EMCC, 'main.cpp', 'testa.cpp', 'testb.cpp', '-o', 'full.js', '-I.']).communicate() @@ -10189,11 +10198,14 @@ f.close() return 0; } ''') - output = Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp'), '-s', 'WARN_ON_UNDEFINED_SYMBOLS=1'], stderr=PIPE).communicate() - self.assertContained('Unresolved symbol: _something', output[1]) - output = Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp')], stderr=PIPE).communicate() - self.assertNotContained('Unresolved symbol: _something\n', output[1]) + for args in [[], ['-O2']]: + print args + output = Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp'), '-s', 'WARN_ON_UNDEFINED_SYMBOLS=1'] + args, stderr=PIPE).communicate() + self.assertContained('unresolved symbol: something', output[1]) + + output = Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp')] + args, stderr=PIPE).communicate() + self.assertNotContained('unresolved symbol: something\n', output[1]) def test_toobig(self): open(os.path.join(self.get_dir(), 'main.cpp'), 'w').write(r''' @@ -10462,7 +10474,7 @@ f.close() output = Popen([os.path.join(self.get_dir(), 'files.o.run')], stdin=open(os.path.join(self.get_dir(), 'stdin')), stdout=PIPE, stderr=PIPE).communicate() self.assertContained('''size: 37 data: 119,97,107,97,32,119,97,107,97,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35,35 -loop: 119 97 107 97 32 119 97 107 97 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 +loop: 119 97 107 97 32 119 97 107 97 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 input:inter-active texto $ |