aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2014-05-03 12:09:37 -0700
committerAlon Zakai <alonzakai@gmail.com>2014-05-03 12:29:23 -0700
commit77aa2a72f38d73e71eef48cacfe72cce33fc7201 (patch)
tree6dd3e801732e0aa3307e2af370eabd1c8570e207
parent2308587a85de571844e71e574fcc63c7955d6012 (diff)
emit a global const for Math_fround(0) to avoid function call overheads in the fround polyfill
-rwxr-xr-xemcc2
-rwxr-xr-xemscripten.py2
-rw-r--r--src/settings.js9
-rw-r--r--tools/js-optimizer.js55
-rw-r--r--tools/test-js-optimizer-asm-pre-f32.js8
-rw-r--r--tools/test-js-optimizer-asm-pre-output-f32.js8
6 files changed, 54 insertions, 30 deletions
diff --git a/emcc b/emcc
index b9d74713..1629f5c7 100755
--- a/emcc
+++ b/emcc
@@ -1727,7 +1727,7 @@ try:
# with commaified code breaks late aggressive variable elimination)
if shared.Settings.SIMPLIFY_IFS and (debug_level == 0 or profiling) and shared.Settings.OUTLINING_LIMIT == 0: js_optimizer_queue += ['simplifyIfs']
- if opt_level >= 3 and shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
+ if shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
if closure and not shared.Settings.ASM_JS:
flush_js_optimizer_queue()
diff --git a/emscripten.py b/emscripten.py
index bf55ee43..16f9eb33 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -1165,7 +1165,7 @@ var asm = (function(global, env, buffer) {
var nan = +env.NaN, inf = +env.Infinity;
var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0;
''' + ''.join(['''
- var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ['''
+ var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ([' const f0 = Math_fround(0);\n'] if settings.get('PRECISE_F32') else []) + ['''
// EMSCRIPTEN_START_FUNCS
function stackAlloc(size) {
size = size|0;
diff --git a/src/settings.js b/src/settings.js
index a9a72425..3289eace 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -124,13 +124,20 @@ var PRECISE_I32_MUL = 1; // If enabled, i32 multiplication is done with full pre
var PRECISE_F32 = 0; // 0: Use JS numbers for floating-point values. These are 64-bit and do not model C++
// floats exactly, which are 32-bit.
// 1: Model C++ floats precisely, using Math.fround, polyfilling when necessary. This
- // can be slow if the polyfill is used on heavy float32 computation.
+ // can be slow if the polyfill is used on heavy float32 computation. See note on
+ // browser support below.
// 2: Model C++ floats precisely using Math.fround if available in the JS engine, otherwise
// use an empty polyfill. This will have much less of a speed penalty than using the full
// polyfill in cases where engine support is not present. In addition, we can
// remove the empty polyfill calls themselves on the client when generating html,
// which should mean that this gives you the best of both worlds of 0 and 1, and is
// therefore recommended.
+ // XXX Note: To optimize float32-using code, we use the 'const' keyword in the emitted
+ // code. This allows us to avoid unnecessary calls to Math.fround, which would
+ // slow down engines not yet supporting that function. 'const' is present in
+ // all modern browsers, including Firefox, Chrome and Safari, but in IE is only
+ // present in IE11 and above. Therefore if you need to support legacy versions of
+ // IE, you should not enable PRECISE_F32 1 or 2.
var SIMD = 0; // Whether to emit SIMD code ( https://github.com/johnmccutchan/ecmascript_simd )
var CLOSURE_COMPILER = 0; // Whether closure compiling is being run on this output
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js
index db85965d..2914b6e8 100644
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@@ -1342,13 +1342,21 @@ var ASM_DOUBLE = 1;
var ASM_FLOAT = 2;
var ASM_NONE = 3;
-function detectAsmCoercion(node, asmInfo) {
+var ASM_FLOAT_ZERO = null; // TODO: share the entire node?
+
+function detectAsmCoercion(node, asmInfo, inVarDef) {
// for params, +x vs x|0, for vars, 0.0 vs 0
if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE;
if (node[0] === 'unary-prefix') return ASM_DOUBLE;
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') return ASM_FLOAT;
if (asmInfo && node[0] == 'name') return getAsmType(node[1], asmInfo);
- if (node[0] === 'name') return ASM_NONE;
+ if (node[0] === 'name') {
+ if (!inVarDef) return ASM_NONE;
+ // We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0)
+ if (!ASM_FLOAT_ZERO) ASM_FLOAT_ZERO = node[1];
+ else assert(ASM_FLOAT_ZERO === node[1]);
+ return ASM_FLOAT;
+ }
return ASM_INT;
}
@@ -1366,7 +1374,13 @@ function makeAsmVarDef(v, type) {
switch (type) {
case ASM_INT: return [v, ['num', 0]];
case ASM_DOUBLE: return [v, ['unary-prefix', '+', ['num', 0]]];
- case ASM_FLOAT: return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
+ case ASM_FLOAT: {
+ if (ASM_FLOAT_ZERO) {
+ return [v, ['name', ASM_FLOAT_ZERO]];
+ } else {
+ return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
+ }
+ }
default: throw 'wha? ' + JSON.stringify([node, type]) + new Error().stack;
}
}
@@ -1409,9 +1423,7 @@ function normalizeAsm(func) {
var name = v[0];
var value = v[1];
if (!(name in data.vars)) {
- assert(value[0] === 'num' || (value[0] === 'unary-prefix' && value[2][0] === 'num') // must be valid coercion no-op
- || (value[0] === 'call' && value[1][0] === 'name' && value[1][1] === 'Math_fround'));
- data.vars[name] = detectAsmCoercion(value);
+ data.vars[name] = detectAsmCoercion(value, null, true);
v.length = 1; // make an un-assigning var
} else {
assert(j === 0, 'cannot break in the middle');
@@ -1425,22 +1437,6 @@ function normalizeAsm(func) {
traverse(stats[i], function(node, type) {
if (type === 'var') {
assert(0, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node));
- /*
- for (var j = 0; j < node[1].length; j++) {
- var v = node[1][j];
- var name = v[0];
- var value = v[1];
- if (!(name in data.vars)) {
- if (value[0] != 'name') {
- data.vars[name] = detectAsmCoercion(value); // detect by coercion
- } else {
- var origin = value[1];
- data.vars[name] = data.vars[origin] || ASM_INT; // detect by origin variable, or assume int for non-locals
- }
- }
- }
- unVarify(node[1], node);
- */
} else if (type === 'call' && node[1][0] === 'function') {
assert(!node[1][1]); // anonymous functions only
data.inlines.push(node[1]);
@@ -3721,7 +3717,7 @@ function minifyGlobals(ast) {
var first = true; // do not minify initial 'var asm ='
// find the globals
traverse(ast, function(node, type) {
- if (type === 'var') {
+ if (type === 'var' || type === 'const') {
if (first) {
first = false;
return;
@@ -4971,10 +4967,19 @@ function safeHeap(ast) {
function optimizeFrounds(ast) {
// collapse fround(fround(..)), which can happen due to elimination
+ // also emit f0 instead of fround(0) (except in returns)
+ var inReturn = false;
function fix(node) {
+ if (node[0] === 'return') inReturn = true;
traverseChildren(node, fix);
- if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround' && node[2][0][0] === 'call' && node[2][0][1][0] === 'name' && node[2][0][1][1] === 'Math_fround') {
- return node[2][0];
+ if (node[0] === 'return') inReturn = false;
+ if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') {
+ var arg = node[2][0];
+ if (arg[0] === 'num') {
+ if (!inReturn && arg[1] === 0) return ['name', 'f0'];
+ } else if (arg[0] === 'call' && arg[1][0] === 'name' && arg[1][1] === 'Math_fround') {
+ return arg;
+ }
}
}
traverseChildren(ast, fix);
diff --git a/tools/test-js-optimizer-asm-pre-f32.js b/tools/test-js-optimizer-asm-pre-f32.js
index 5471deeb..be515b36 100644
--- a/tools/test-js-optimizer-asm-pre-f32.js
+++ b/tools/test-js-optimizer-asm-pre-f32.js
@@ -14,4 +14,10 @@ function dupe() {
x = Math_fround(Math_fround(Math_fround(x)));
x = Math_fround(Math_fround(Math_fround(Math_fround(x))));
}
-// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe"]
+function zeros(x) {
+ x = Math_fround(x);
+ var y = Math_fround(0);
+ print(Math_fround(y) + Math_fround(0));
+ return Math_fround(0); // return needs to stay as is
+}
+// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe", "zeros"]
diff --git a/tools/test-js-optimizer-asm-pre-output-f32.js b/tools/test-js-optimizer-asm-pre-output-f32.js
index 19059619..f0f2d0da 100644
--- a/tools/test-js-optimizer-asm-pre-output-f32.js
+++ b/tools/test-js-optimizer-asm-pre-output-f32.js
@@ -4,7 +4,7 @@ function badf() {
HEAP32[$gep23_asptr >> 2] = $9;
}
function badf2() {
- var $9 = Math_fround(0);
+ var $9 = f0;
$9 = Math_fround($8);
HEAPF32[$gep23_asptr >> 2] = $9;
}
@@ -14,4 +14,10 @@ function dupe() {
x = Math_fround(x);
x = Math_fround(x);
}
+function zeros(x) {
+ x = Math_fround(x);
+ var y = f0;
+ print(Math_fround(y) + f0);
+ return Math_fround(0);
+}