diff options
-rwxr-xr-x | emcc | 7 | ||||
-rw-r--r-- | src/parseTools.js | 9 | ||||
-rw-r--r-- | src/settings.js | 10 | ||||
-rwxr-xr-x | tests/runner.py | 17 | ||||
-rw-r--r-- | tools/shared.py | 1 |
5 files changed, 42 insertions, 2 deletions
@@ -140,7 +140,12 @@ Options that are modified or new in %s include: optimizations, and no runtime assertions or C++ exception catching (to re-enable C++ exception catching, use - -s DISABLE_EXCEPTION_CATCHING=0 ). + -s DISABLE_EXCEPTION_CATCHING=0 ). 32-bit + multiplication is done in JS doubles which + is fast but imprecise for high values. + (For details on the affects of different + opt levels, see apply_opt_level() in + tools/shared.py) Note: Optimizations are only done when compiling to JavaScript, not to intermediate bitcode. diff --git a/src/parseTools.js b/src/parseTools.js index 86e3c643..e37f3a99 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -1794,7 +1794,14 @@ function processMathop(item) { case 'add': return handleOverflow(getFastValue(idents[0], '+', idents[1], item.type), bits); case 'sub': return handleOverflow(getFastValue(idents[0], '-', idents[1], item.type), bits); case 'sdiv': case 'udiv': return makeRounding(getFastValue(idents[0], '/', idents[1], item.type), bits, op[0] === 's'); - case 'mul': return handleOverflow(getFastValue(idents[0], '*', idents[1], item.type), bits); + case 'mul': { + if (bits == 32 && PRECISE_I32_MUL) { + preciseI64MathUsed = true; + return '(i64Math.multiply(' + idents[0] + ',0,' + idents[1] + ',0),i64Math.result[0])'; + } else { + return handleOverflow(getFastValue(idents[0], '*', idents[1], item.type), bits); + } + } case 'urem': case 'srem': return getFastValue(idents[0], '%', idents[1], item.type); case 'or': { if (bits > 32) { diff --git a/src/settings.js b/src/settings.js index 75b30003..110cc246 100644 --- a/src/settings.js +++ b/src/settings.js @@ -76,6 +76,16 @@ var DOUBLE_MODE = 1; // How to load and store 64-bit doubles. Without typed arra // NaN or an infinite number. var PRECISE_I64_MATH = 1; // If enabled, i64 addition etc. is emulated - which is slow but precise. If disabled, // we use the 'double trick' which is fast but incurs rounding at high values. + // Note that we do not catch 32-bit multiplication by default (which must be done in + // 64 bits for high values for full precision) - you must manually set PRECISE_I32_MUL + // for that. +var PRECISE_I32_MUL = 1; // If enabled, i64 math is done in i32 multiplication. This is necessary if the values + // exceed the JS double-integer limit of ~52 bits. This option can normally be disabled + // because generally i32 multiplication works ok without it, and enabling it has a big + // impact on performance. + // Note that you can hand-optimize your code to avoid the need for this: If you do + // multiplications that actually need 64-bit precision inside 64-bit values, things + // will work properly. (Unless the LLVM optimizer turns them into 32-bit values?) var CLOSURE_ANNOTATIONS = 0; // If set, the generated code will be annotated for the closure // compiler. This potentially lets closure optimize the code better. diff --git a/tests/runner.py b/tests/runner.py index 876990d2..32776167 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -953,6 +953,23 @@ m_divisor is 1091269979 ''' self.do_run(src, 'zero 2, 104', ['hallo']) + def test_i32_mul_precise(self): + if self.emcc_args == None: return self.skip('needs ta2') + + self.emcc_args += ['-s', 'PRECISE_I32_MUL=1'] + src = r''' + #include <stdio.h> + + int main(int argc, char **argv) { + unsigned long d1 = 0x847c9b5d; + unsigned long q = 0x549530e1; + if (argc > 1000) { q += argc; d1 -= argc; } // confuse optimizer + printf("%lu\n", d1*q); + return 0; + } + ''' + self.do_run(src, '3217489085') + def test_i16_emcc_intrinsic(self): Settings.CORRECT_SIGNS = 1 # Relevant to this test diff --git a/tools/shared.py b/tools/shared.py index 672a1a18..57541077 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -413,6 +413,7 @@ class Settings: if opt_level >= 1: Settings.ASSERTIONS = 0 Settings.DISABLE_EXCEPTION_CATCHING = 1 + Settings.PRECISE_I32_MUL = 0 if opt_level >= 2: Settings.RELOOP = 1 if opt_level >= 3: |