18 files changed, 333 insertions, 159 deletions
diff --git a/emscripten.py b/emscripten.py
index 5576baba..7c9cbdbe 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -11,6 +11,7 @@ headers, for the libc implementation in JS).
 
 import os, sys, json, optparse, subprocess, re, time, multiprocessing, string, logging
 
+from tools import shared
 from tools import jsrun, cache as cache_module, tempfiles
 from tools.response_file import read_response_file
 
@@ -25,7 +26,6 @@ def get_configuration():
   if hasattr(get_configuration, 'configuration'):
     return get_configuration.configuration
 
-  from tools import shared
   configuration = shared.Configuration(environ=os.environ)
   get_configuration.configuration = configuration
   return configuration
@@ -425,8 +425,8 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None,
       Counter.i += 1
       bad = 'b' + str(i)
       params = ','.join(['p%d' % p for p in range(len(sig)-1)])
-      coercions = ';'.join(['p%d = %sp%d%s' % (p, '+' if sig[p+1] != 'i' else '', p, '' if sig[p+1] != 'i' else '|0') for p in range(len(sig)-1)]) + ';'
-      ret = '' if sig[0] == 'v' else ('return %s0' % ('+' if sig[0] != 'i' else ''))
+      coercions = ';'.join(['p%d = %s' % (p, shared.JS.make_coercion('p%d' % p, sig[p+1], settings)) for p in range(len(sig)-1)]) + ';'
+      ret = '' if sig[0] == 'v' else ('return %s' % shared.JS.make_initializer(sig[0], settings))
       start = raw.index('[')
       end = raw.rindex(']')
       body = raw[start+1:end].split(',')
@@ -451,7 +451,7 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None,
     math_envs = ['Math.min'] # TODO: move min to maths
     asm_setup += '\n'.join(['var %s = %s;' % (f.replace('.', '_'), f) for f in math_envs])
 
-    if settings['TO_FLOAT32']: maths += ['Math.toFloat32']
+    if settings['PRECISE_F32']: maths += ['Math.fround']
 
     basic_funcs = ['abort', 'assert', 'asmPrintInt', 'asmPrintFloat'] + [m.replace('.', '_') for m in math_envs]
     if settings['RESERVED_FUNCTION_POINTERS'] > 0: basic_funcs.append('jsCall')
@@ -476,18 +476,14 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None,
 
     asm_runtime_funcs = ['stackAlloc', 'stackSave', 'stackRestore', 'setThrew'] + ['setTempRet%d' % i for i in range(10)]
     # function tables
-    def asm_coerce(value, sig):
-      if sig == 'v': return value
-      return ('+' if sig != 'i' else '') + value + ('|0' if sig == 'i' else '')
-
     function_tables = ['dynCall_' + table for table in last_forwarded_json['Functions']['tables']]
     function_tables_impls = []
 
     for sig in last_forwarded_json['Functions']['tables'].iterkeys():
       args = ','.join(['a' + str(i) for i in range(1, len(sig))])
-      arg_coercions = ' '.join(['a' + str(i) + '=' + asm_coerce('a' + str(i), sig[i]) + ';' for i in range(1, len(sig))])
-      coerced_args = ','.join([asm_coerce('a' + str(i), sig[i]) for i in range(1, len(sig))])
-      ret = ('return ' if sig[0] != 'v' else '') + asm_coerce('FUNCTION_TABLE_%s[index&{{{ FTM_%s }}}](%s)' % (sig, sig, coerced_args), sig[0])
+      arg_coercions = ' '.join(['a' + str(i) + '=' + shared.JS.make_coercion('a' + str(i), sig[i], settings) + ';' for i in range(1, len(sig))])
+      coerced_args = ','.join([shared.JS.make_coercion('a' + str(i), sig[i], settings) for i in range(1, len(sig))])
+      ret = ('return ' if sig[0] != 'v' else '') + shared.JS.make_coercion('FUNCTION_TABLE_%s[index&{{{ FTM_%s }}}](%s)' % (sig, sig, coerced_args), sig[0], settings)
       function_tables_impls.append('''
   function dynCall_%s(index%s%s) {
     index = index|0;
@@ -497,7 +493,7 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None,
 ''' % (sig, ',' if len(sig) > 1 else '', args, arg_coercions, ret))
 
       for i in range(settings['RESERVED_FUNCTION_POINTERS']):
-        jsret = ('return ' if sig[0] != 'v' else '') + asm_coerce('jsCall(%d%s%s)' % (i, ',' if coerced_args else '', coerced_args), sig[0])
+        jsret = ('return ' if sig[0] != 'v' else '') + shared.JS.make_coercion('jsCall(%d%s%s)' % (i, ',' if coerced_args else '', coerced_args), sig[0], settings)
         function_tables_impls.append('''
   function jsCall_%s_%s(%s) {
     %s
@@ -505,7 +501,6 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None,
   }
 
 ''' % (sig, i, args, arg_coercions, jsret))
-      from tools import shared
       shared.Settings.copy(settings)
       asm_setup += '\n' + shared.JS.make_invoke(sig) + '\n'
       basic_funcs.append('invoke_%s' % sig)
@@ -585,7 +580,7 @@ var asm = (function(global, env, buffer) {
   var undef = 0;
   var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0;
 ''' + ''.join(['''
-  var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs + '''
+  var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + ['  var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ['''
 // EMSCRIPTEN_START_FUNCS
 function stackAlloc(size) {
   size = size|0;
@@ -727,14 +722,12 @@ def main(args, compiler_engine, cache, jcache, relooper, temp_files, DEBUG, DEBU
       relooper = cache.get_path('relooper.js')
     settings.setdefault('RELOOPER', relooper)
     if not os.path.exists(relooper):
-      from tools import shared
       shared.Building.ensure_relooper(relooper)
   
   settings.setdefault('STRUCT_INFO', cache.get_path('struct_info.compiled.json'))
   struct_info = settings.get('STRUCT_INFO')
   
   if not os.path.exists(struct_info):
-    from tools import shared
     shared.Building.ensure_struct_info(struct_info)
   
   emscript(args.infile, settings, args.outfile, libraries, compiler_engine=compiler_engine,
@@ -833,7 +826,6 @@ WARNING: You should normally never use this! Use emcc instead.
     temp_files = tempfiles.TempFiles(temp_dir)
 
   if keywords.compiler is None:
-    from tools import shared
     keywords.compiler = shared.COMPILER_ENGINE
 
   if keywords.verbose is None:
diff --git a/src/jsifier.js b/src/jsifier.js
index 0da48a8c..97317756 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -756,14 +756,7 @@ function JSify(data, functionsOnly, givenFunctions) {
           if (func.setjmpTable && !ASM_JS) {
             ret += ' } catch(e) { if (!e.longjmp || !(e.id in mySetjmpIds)) throw(e); setjmpTable[setjmpLabels[e.id]](e.value) }';
           }
-          if (ASM_JS && func.returnType !== 'void') {
-            // Add a return
-            if (func.returnType in Runtime.FLOAT_TYPES) {
-              ret += ' return +0;\n';
-            } else {
-              ret += ' return 0;\n';
-            }
-          }
+          if (ASM_JS && func.returnType !== 'void') ret += '  return ' + asmInitializer(func.returnType) + ';\n'; // Add a return
         } else {
           ret += (SHOW_LABELS ? indent + '/* ' + block.entries[0] + ' */' : '') + '\n' + getLabelLines(block.labels[0]);
         }
@@ -833,11 +826,7 @@ function JSify(data, functionsOnly, givenFunctions) {
       var lastReturn = func.JS.lastIndexOf('return ');
       if ((lastCurly < 0 && lastReturn < 0) || // no control flow, no return
           (lastCurly >= 0 && lastReturn < lastCurly)) { // control flow, no return past last join
-        if (func.returnType in Runtime.FLOAT_TYPES) {
-          func.JS += ' return +0;\n';
-        } else {
-          func.JS += ' return 0;\n';
-        }
+        func.JS += '  return ' + asmInitializer(func.returnType) + ';\n';
       }
     }
     func.JS += '}\n';
@@ -1337,7 +1326,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         if (isNumber(item.ident)) {
           // Direct read from a memory address; this may be an intentional segfault, if not, it is a bug in the source
           if (ASM_JS) {
-            return asmCoercion('abort(' + item.ident + ')', item.type);
+            return asmFFICoercion('abort(' + item.ident + ')', item.type);
           } else {
             item.assignTo = null;
             return 'throw "fault on read from ' + item.ident + '";';
@@ -1514,8 +1503,10 @@ function JSify(data, functionsOnly, givenFunctions) {
 
     args = args.map(function(arg, i) { return indexizeFunctions(arg, argsTypes[i]) });
     if (ASM_JS) {
-      if (shortident in Functions.libraryFunctions || simpleIdent in Functions.libraryFunctions || byPointerForced || invoke || extCall || funcData.setjmpTable) {
-        args = args.map(function(arg, i) { return asmCoercion(arg, argsTypes[i]) });
+      var ffiCall = (shortident in Functions.libraryFunctions || simpleIdent in Functions.libraryFunctions || byPointerForced || invoke || extCall || funcData.setjmpTable) &&
+                    !(simpleIdent in JS_MATH_BUILTINS);
+      if (ffiCall) {
+        args = args.map(function(arg, i) { return asmCoercion(arg, ensureValidFFIType(argsTypes[i])) });
       } else {
         args = args.map(function(arg, i) { return asmEnsureFloat(arg, argsTypes[i]) });
       }
@@ -1592,7 +1583,7 @@ function JSify(data, functionsOnly, givenFunctions) {
       returnType = getReturnType(type);
       if (callIdent in Functions.implementedFunctions) {
         // LLVM sometimes bitcasts for no reason. We must call using the exact same type as the actual function is generated as
-        var trueType = Functions.getSignatureReturnType(Functions.implementedFunctions[callIdent]);
+        var trueType = Functions.getSignatureType(Functions.implementedFunctions[callIdent][0]);
         if (trueType !== returnType && !isIdenticallyImplemented(trueType, returnType)) {
           if (VERBOSE) warnOnce('Fixing function call based on return type from signature, on ' + [callIdent, returnType, trueType]);
           returnType = trueType;
@@ -1628,7 +1619,11 @@ function JSify(data, functionsOnly, givenFunctions) {
 
     var ret = callIdent + '(' + args.join(',') + ')';
     if (ASM_JS) { // TODO: do only when needed (library functions and Math.*?) XXX && simpleIdent in Functions.libraryFunctions) {
-      ret = asmCoercion(ret, returnType);
+      if (ffiCall) {
+        ret = asmFFICoercion(ret, returnType);
+      } else {
+        ret = asmCoercion(ret, returnType);
+      }
       if (simpleIdent == 'abort' && funcData.returnType != 'void') {
         ret += '; return ' + asmCoercion('0', funcData.returnType); // special case: abort() can happen without return, breaking the return type of asm functions. ensure a return
       }
diff --git a/src/modules.js b/src/modules.js
index 854575e0..13cca977 100644
--- a/src/modules.js
+++ b/src/modules.js
@@ -18,7 +18,7 @@ var LLVM = {
   PHI_REACHERS: set('branch', 'switch', 'invoke', 'indirectbr'),
   EXTENDS: set('sext', 'zext'),
   COMPS: set('icmp', 'fcmp'),
-  CONVERSIONS: set('inttoptr', 'ptrtoint', 'uitofp', 'sitofp', 'fptosi', 'fptoui'),
+  CONVERSIONS: set('inttoptr', 'ptrtoint', 'uitofp', 'sitofp', 'fptosi', 'fptoui', 'fpext', 'fptrunc'),
   INTRINSICS_32: set('_llvm_memcpy_p0i8_p0i8_i64', '_llvm_memmove_p0i8_p0i8_i64', '_llvm_memset_p0i8_i64'), // intrinsics that need args converted to i32 in USE_TYPED_ARRAYS == 2
 };
 LLVM.GLOBAL_MODIFIERS = set(keys(LLVM.LINKAGES).concat(['constant', 'global', 'hidden']));
@@ -253,13 +253,32 @@ var Functions = {
 
   aliases: {}, // in shared modules (MAIN_MODULE or SHARED_MODULE), a list of aliases for functions that have them
 
+  getSignatureLetter: function(type) {
+    switch(type) {
+      case 'float': return 'f';
+      case 'double': return 'd';
+      case 'void': return 'v';
+      default: return 'i';
+    }
+  },
+
+  getSignatureType: function(letter) {
+    switch(letter) {
+      case 'v': return 'void';
+      case 'i': return 'i32';
+      case 'f': return 'float';
+      case 'd': return 'double';
+      default: throw 'what is this sig? ' + sig;
+    }
+  },
+
   getSignature: function(returnType, argTypes, hasVarArgs) {
-    var sig = returnType == 'void' ? 'v' : (isIntImplemented(returnType) ? 'i' : 'f');
+    var sig = Functions.getSignatureLetter(returnType);
     for (var i = 0; i < argTypes.length; i++) {
       var type = argTypes[i];
       if (!type) break; // varargs
       if (type in Runtime.FLOAT_TYPES) {
-        sig += 'f';
+        sig += Functions.getSignatureLetter(type);
       } else {
         var chunks = getNumIntChunks(type);
         for (var j = 0; j < chunks; j++) sig += 'i';
@@ -269,15 +288,6 @@ var Functions = {
     return sig;
   },
 
-  getSignatureReturnType: function(sig) {
-    switch(sig[0]) {
-      case 'v': return 'void';
-      case 'i': return 'i32';
-      case 'f': return 'double';
-      default: throw 'what is this sig? ' + sig;
-    }
-  },
-
   // Mark a function as needing indexing. Python will coordinate them all
   getIndex: function(ident, sig) {
     var ret;
@@ -350,17 +360,15 @@ var Functions = {
             if (!wrapped[curr]) {
               var args = '', arg_coercions = '', call = short + '(', retPre = '', retPost = '';
               if (t[0] != 'v') {
-                if (t[0] == 'i') {
-                  retPre = 'return ';
-                  retPost = '|0';
-                } else {
-                  retPre = 'return +';
-                }
+                var temp = asmFFICoercion('X', Functions.getSignatureType(t[0])).split('X');
+                retPre = 'return ' + temp[0];
+                retPost = temp[1];
               }
               for (var j = 1; j < t.length; j++) {
                 args += (j > 1 ? ',' : '') + 'a' + j;
-                arg_coercions += 'a' + j + '=' + asmCoercion('a' + j, t[j] != 'i' ? 'float' : 'i32') + ';';
-                call += (j > 1 ? ',' : '') + asmCoercion('a' + j, t[j] != 'i' ? 'float' : 'i32');
+                var type = Functions.getSignatureType(t[j]);
+                arg_coercions += 'a' + j + '=' + asmCoercion('a' + j, type) + ';';
+                call += (j > 1 ? ',' : '') + asmCoercion('a' + j, type === 'float' ? 'double' : type); // ffi arguments must be doubles if they are floats
               }
               call += ')';
               if (short == '_setjmp') printErr('WARNING: setjmp used via a function pointer. If this is for libc setjmp (not something of your own with the same name), it will break things');
diff --git a/src/parseTools.js b/src/parseTools.js
index 52727903..3c2eeece 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -629,6 +629,8 @@ function cleanSegment(segment) {
 
 var MATHOPS = set(['add', 'sub', 'sdiv', 'udiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'frem', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr']);
 
+var JS_MATH_BUILTINS = set(['Math_sin', 'Math_cos', 'Math_tan', 'Math_asin', 'Math_acos', 'Math_atan', 'Math_ceil', 'Math_floor', 'Math_exp', 'Math_log', 'Math_sqrt']);
+
 var PARSABLE_LLVM_FUNCTIONS = set('getelementptr', 'bitcast');
 mergeInto(PARSABLE_LLVM_FUNCTIONS, MATHOPS);
 
@@ -788,8 +790,8 @@ function splitI64(value, floatConversion) {
   var high = makeInlineCalculation(
     asmCoercion('Math_abs(VALUE)', 'double') + ' >= ' + asmEnsureFloat('1', 'double') + ' ? ' +
       '(VALUE > ' + asmEnsureFloat('0', 'double') + ' ? ' +
-               asmCoercion('Math_min(' + asmCoercion('Math_floor((VALUE)/' + asmEnsureFloat(4294967296, 'float') + ')', 'double') + ', ' + asmEnsureFloat(4294967295, 'float') + ')', 'i32') + '>>>0' +
-               ' : ' + asmFloatToInt(asmCoercion('Math_ceil((VALUE - +((' + asmFloatToInt('VALUE') + ')>>>0))/' + asmEnsureFloat(4294967296, 'float') + ')', 'double')) + '>>>0' + 
+               asmCoercion('Math_min(' + asmCoercion('Math_floor((VALUE)/' + asmEnsureFloat(4294967296, 'double') + ')', 'double') + ', ' + asmEnsureFloat(4294967295, 'double') + ')', 'i32') + '>>>0' +
+               ' : ' + asmFloatToInt(asmCoercion('Math_ceil((VALUE - +((' + asmFloatToInt('VALUE') + ')>>>0))/' + asmEnsureFloat(4294967296, 'double') + ')', 'double')) + '>>>0' + 
       ')' +
     ' : 0',
     value,
@@ -1167,32 +1169,37 @@ function makeVarDef(js) {
   return js;
 }
 
+function ensureDot(value) {
+  value = value.toString();
+  // if already dotted, or Infinity or NaN, nothing to do here
+  // if smaller than 1 and running js opts, we always need to force a coercion (0.001 will turn into 1e-3, which has no .)
+  if ((value.indexOf('.') >= 0 || /[IN]/.test(value)) && (!RUNNING_JS_OPTS || Math.abs(value) >= 1)) return value;
+  if (RUNNING_JS_OPTS) return '(+' + value + ')'; // JS optimizer will run, we must do +x, and it will be corrected later
+  var e = value.indexOf('e');
+  if (e < 0) return value + '.0';
+  return value.substr(0, e) + '.0' + value.substr(e);
+}
+
 function asmEnsureFloat(value, type) { // ensures that a float type has either 5.5 (clearly a float) or +5 (float due to asm coercion)
   if (!ASM_JS) return value;
-  // coerce if missing a '.', or if smaller than 1, so could be 1e-5 which has no .
-  if (type in Runtime.FLOAT_TYPES && isNumber(value) && (value.toString().indexOf('.') < 0 || Math.abs(value) < 1)) {
-    if (RUNNING_JS_OPTS) {
-      return '(+' + value + ')'; // JS optimizer will run, we must do +x, and it will be corrected later
-    } else {
-      // ensure a .
-      value = value.toString();
-      if (value.indexOf('.') >= 0 || /[IN]/.test(value)) return value; // if already dotted, or Infinity or NaN, nothing to do here
-      var e = value.indexOf('e');
-      if (e < 0) return value + '.0';
-      return value.substr(0, e) + '.0' + value.substr(e);
-    }
+  if (!isNumber(value)) return value;
+  if (PRECISE_F32 && type === 'float') {
+    // normally ok to just emit Math_fround(0), but if the constant is large we may need a .0 (if it can't fit in an int)
+    if (value == 0) return 'Math_fround(0)';
+    value = ensureDot(value);
+    return 'Math_fround(' + value + ')';
+  }
+  if (type in Runtime.FLOAT_TYPES) {
+    return ensureDot(value);
   } else {
     return value;
   }
 }
 
-function asmInitializer(type, impl) {
+function asmInitializer(type) {
   if (type in Runtime.FLOAT_TYPES) {
-    if (RUNNING_JS_OPTS) {
-      return '+0';
-    } else {
-      return '.0';
-    }
+    if (PRECISE_F32 && type === 'float') return 'Math_fround(0)';
+    return RUNNING_JS_OPTS ? '+0' : '.0';
   } else {
     return '0';
   }
@@ -1213,7 +1220,11 @@ function asmCoercion(value, type, signedness) {
           value = '(' + value + ')|0';
         }
       }
-      return '(+(' + value + '))';
+      if (PRECISE_F32 && type === 'float') {
+        return 'Math_fround(' + value + ')';
+      } else {
+        return '(+(' + value + '))';
+      }
     }
   } else {
     return '((' + value + ')|0)';
@@ -2129,14 +2140,14 @@ function makeRounding(value, bits, signed, floatConversion) {
   }
 }
 
-function makeIsNaN(value) {
-  if (ASM_JS) return makeInlineCalculation('((VALUE) != (VALUE))', value, 'tempDouble');
+function makeIsNaN(value, type) {
+  if (ASM_JS) return makeInlineCalculation('((VALUE) != (VALUE))', value, type === 'float' ? 'tempFloat' : 'tempDouble');
   return 'isNaN(' + value + ')';
 }
 
 function makeFloat(value, type) {
-  if (TO_FLOAT32 && type == 'float') {
-    return 'Math_toFloat32(' + value + ')';
+  if (PRECISE_F32 && type == 'float') {
+    return 'Math_fround(' + value + ')';
   }
   return value;
 }
@@ -2253,8 +2264,8 @@ function processMathop(item) {
       case 'lshr': {
 				throw 'shifts should have been legalized!';
       }
-      case 'uitofp': case 'sitofp': return RuntimeGenerator.makeBigInt(low1, high1, op[0] == 'u');
-      case 'fptoui': case 'fptosi': return finish(splitI64(idents[0], true));
+      case 'uitofp': case 'sitofp': return makeFloat(RuntimeGenerator.makeBigInt(low1, high1, op[0] == 'u'), item.type);
+      case 'fptoui': case 'fptosi': return finish(splitI64(asmCoercion(idents[0], 'double'), true)); // coerce to double before conversion to i64
       case 'icmp': {
         switch (variant) {
           case 'uge': return '((' + high1 + '>>>0) >= (' + high2 + '>>>0)) & ((((' + high1 + '>>>0) >  ('  + high2 + '>>>0)) | ' +
@@ -2438,7 +2449,7 @@ function processMathop(item) {
     case 'fdiv': return makeFloat(getFastValue(idents[0], '/', idents[1], item.type), item.type);
     case 'fmul': return makeFloat(getFastValue(idents[0], '*', idents[1], item.type), item.type);
     case 'frem': return makeFloat(getFastValue(idents[0], '%', idents[1], item.type), item.type);
-    case 'uitofp': case 'sitofp': return asmCoercion(idents[0], 'double', op[0]);
+    case 'uitofp': case 'sitofp': return asmCoercion(idents[0], item.type, op[0]);
     case 'fptoui': case 'fptosi': return makeRounding(idents[0], bitsLeft, op === 'fptosi', true);
 
     // TODO: We sometimes generate false instead of 0, etc., in the *cmps. It seemed slightly faster before, but worth rechecking
@@ -2470,8 +2481,8 @@ function processMathop(item) {
         case 'ult': case 'olt': return idents[0] + '<' + idents[1];
         case 'une': case 'one': return idents[0] + '!=' + idents[1];
         case 'ueq': case 'oeq': return idents[0] + '==' + idents[1];
-        case 'ord': return '!' + makeIsNaN(idents[0]) + '&!' + makeIsNaN(idents[1]);
-        case 'uno': return makeIsNaN(idents[0]) + '|' + makeIsNaN(idents[1]);
+        case 'ord': return '!' + makeIsNaN(idents[0], paramTypes[0]) + '&!' + makeIsNaN(idents[1], paramTypes[0]);
+        case 'uno': return makeIsNaN(idents[0], paramTypes[0]) + '|' + makeIsNaN(idents[1], paramTypes[0]);
         case 'true': return '1';
         default: throw 'Unknown fcmp variant: ' + variant;
       }
@@ -2485,8 +2496,15 @@ function processMathop(item) {
       }
       // otherwise, fall through
     }
-    case 'fpext': case 'sext': return idents[0];
-    case 'fptrunc': return idents[0];
+    case 'sext': return idents[0];
+    case 'fpext': {
+      if (PRECISE_F32) return '+(' + idents[0] + ')';
+      return idents[0];
+    }
+    case 'fptrunc': {
+      if (PRECISE_F32) return 'Math_fround(' + idents[0] + ')';
+      return idents[0];
+    }
     case 'select': return '(' + idents[0] + '?' + asmEnsureFloat(idents[1], item.type) + ':' + asmEnsureFloat(idents[2], item.type) + ')';
     case 'ptrtoint': case 'inttoptr': {
       var ret = '';
@@ -2677,3 +2695,14 @@ function ensureVector(ident, base) {
   return ident == 0 ? base + '32x4.zero()' : ident;
 }
 
+function ensureValidFFIType(type) {
+  return type === 'float' ? 'double' : type; // ffi does not tolerate float XXX
+}
+
+// FFI return values must arrive as doubles, and we can force them to floats afterwards
+function asmFFICoercion(value, type) {
+  value = asmCoercion(value, ensureValidFFIType(type));
+  if (PRECISE_F32 && type === 'float') value = asmCoercion(value, 'float');
+  return value;
+}
+
diff --git a/src/preamble.js b/src/preamble.js
index c88e4052..3e76e503 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -1074,11 +1074,16 @@ Math['imul'] = function imul(a, b) {
 #endif
 Math.imul = Math['imul'];
 
-#if TO_FLOAT32
-if (!Math['toFloat32']) Math['toFloat32'] = function toFloat32(x) {
-  return x;
-};
-Math.toFloat32 = Math['toFloat32'];
+#if PRECISE_F32
+#if PRECISE_F32 == 1
+if (!Math['fround']) {
+  var froundBuffer = new Float32Array(1);
+  Math['fround'] = function(x) { froundBuffer[0] = x; return froundBuffer[0] };
+}
+#else // 2
+if (!Math['fround']) Math['fround'] = function(x) { return x };
+#endif
+Math.fround = Math['fround'];
 #endif
 
 var Math_abs = Math.abs;
@@ -1096,7 +1101,7 @@ var Math_ceil = Math.ceil;
 var Math_floor = Math.floor;
 var Math_pow = Math.pow;
 var Math_imul = Math.imul;
-var Math_toFloat32 = Math.toFloat32;
+var Math_fround = Math.fround;
 var Math_min = Math.min;
 
 // A counter of dependencies for calling run(). If we need to
diff --git a/src/runtime.js b/src/runtime.js
index 3f89dc84..786ae021 100644
--- a/src/runtime.js
+++ b/src/runtime.js
@@ -82,8 +82,8 @@ var RuntimeGenerator = {
   // Rounding is inevitable if the number is large. This is a particular problem for small negative numbers
   // (-1 will be rounded!), so handle negatives separately and carefully
   makeBigInt: function(low, high, unsigned) {
-    var unsignedRet = '(' + asmCoercion(makeSignOp(low, 'i32', 'un', 1, 1), 'float') + '+(' + asmCoercion(makeSignOp(high, 'i32', 'un', 1, 1), 'float') + '*' + asmEnsureFloat(4294967296, 'float') + '))';
-    var signedRet = '(' + asmCoercion(makeSignOp(low, 'i32', 'un', 1, 1), 'float') + '+(' + asmCoercion(makeSignOp(high, 'i32', 're', 1, 1), 'float') + '*' + asmEnsureFloat(4294967296, 'float') + '))';
+    var unsignedRet = '(' + asmCoercion(makeSignOp(low, 'i32', 'un', 1, 1), 'double') + '+(' + asmCoercion(makeSignOp(high, 'i32', 'un', 1, 1), 'double') + '*' + asmEnsureFloat(4294967296, 'double') + '))';
+    var signedRet = '(' + asmCoercion(makeSignOp(low, 'i32', 'un', 1, 1), 'double') + '+(' + asmCoercion(makeSignOp(high, 'i32', 're', 1, 1), 'double') + '*' + asmEnsureFloat(4294967296, 'double') + '))';
     if (typeof unsigned === 'string') return '(' + unsigned + ' ? ' + unsignedRet + ' : ' + signedRet + ')';
     return unsigned ? unsignedRet : signedRet;
   }
diff --git a/src/settings.js b/src/settings.js
index 42e31b3a..4ffbb415 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -115,7 +115,13 @@ var PRECISE_I64_MATH = 1; // If enabled, i64 addition etc. is emulated - which i
 var PRECISE_I32_MUL = 1; // If enabled, i32 multiplication is done with full precision, which means it is
                          // correct even if the value exceeds the JS double-integer limit of ~52 bits (otherwise,
                          // rounding will occur above that range).
-var TO_FLOAT32 = 0; // Use Math.toFloat32
+var PRECISE_F32 = 0; // 0: Use JS numbers for floating-point values. These are 64-bit and do not model C++
+                     //    floats exactly, which are 32-bit.
+                     // 1: Model C++ floats precisely, using Math.fround, polyfilling when necessary. This
+                     //    can be slow if the polyfill is used on heavy float32 computation.
+                     // 2: Model C++ floats precisely using Math.fround if available in the JS engine, otherwise
+                     //    use an empty polyfill. This will have less of a speed penalty than using the full
+                     //    polyfill in cases where engine support is not present.
 
 var CLOSURE_ANNOTATIONS = 0; // If set, the generated code will be annotated for the closure
                              // compiler. This potentially lets closure optimize the code better.
diff --git a/src/utility.js b/src/utility.js
index ac821a89..bec0eb8f 100644
--- a/src/utility.js
+++ b/src/utility.js
@@ -222,7 +222,8 @@ function mergeInto(obj, other) {
 }
 
 function isNumber(x) {
-  return x == parseFloat(x) || (typeof x == 'string' && x.match(/^-?\d+$/));
+  // XXX this does not handle 0xabc123 etc. We should likely also do x == parseInt(x) (which handles that), and remove hack |// handle 0x... as well|
+  return x == parseFloat(x) || (typeof x == 'string' && x.match(/^-?\d+$/)) || x === 'NaN';
 }
 
 function isArray(x) {
diff --git a/system/include/emscripten/emscripten.h b/system/include/emscripten/emscripten.h
index d30620ec..dd1e01a4 100644
--- a/system/include/emscripten/emscripten.h
+++ b/system/include/emscripten/emscripten.h
@@ -203,7 +203,7 @@ void emscripten_get_canvas_size(int *width, int *height, int *isFullscreen);
  * absolute time, and is only meaningful in comparison to
  * other calls to this function. The unit is ms.
  */
-float emscripten_get_now();
+double emscripten_get_now();
 
 /*
  * Simple random number generation in [0, 1), maps to Math.random().
diff --git a/tests/cases/storebigfloat.ll b/tests/cases/storebigfloat.ll
new file mode 100644
index 00000000..c9995835
--- /dev/null
+++ b/tests/cases/storebigfloat.ll
@@ -0,0 +1,17 @@
+
+@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 ; [#uses=1 type=[15 x i8]*]
+
+; [#uses=0]
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4                   ; [#uses=1 type=i32*]
+  %f = alloca float, align 4
+  store float 1.000000e+10, float* %f, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
+  ret i32 1
+}
+
+; [#uses=1]
+declare i32 @printf(i8*, ...)
+
diff --git a/tests/emscripten_get_now.cpp b/tests/emscripten_get_now.cpp
index 17aa7d32..5ededb23 100644
--- a/tests/emscripten_get_now.cpp
+++ b/tests/emscripten_get_now.cpp
@@ -14,10 +14,10 @@ int main() {
   // b) Values returned by emscripten_get_now() are strictly nondecreasing.
   // c) emscripten_get_now() is able to return sub-millisecond precision timer values.
   bool detected_good_timer_precision = false;
-  float smallest_delta = 0.f;
+  double smallest_delta = 0.f;
   for(int x = 0; x < 1000; ++x) { // Have several attempts to find a good small delta, i.e. give time to JS engine to warm up the code and so on.
-    float t = emscripten_get_now();
-    float t2 = emscripten_get_now();
+    double t = emscripten_get_now();
+    double t2 = emscripten_get_now();
     for(int i = 0; i < 100 && t == t2; ++i) {
       t2 = emscripten_get_now();
     }
diff --git a/tests/runner.py b/tests/runner.py
index 867f7113..8c4a9abf 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -36,7 +36,7 @@ except:
 
 # Core test runner class, shared between normal tests and benchmarks
 checked_sanity = False
-test_modes = ['default', 'o1', 'o2', 'asm1', 'asm2', 'asm2g', 'asm2x86', 's_0_0', 's_0_1']
+test_modes = ['default', 'o1', 'o2', 'asm1', 'asm2', 'asm2f', 'asm2g', 'asm2x86', 's_0_0', 's_0_1']
 test_index = 0
 
 class RunnerCore(unittest.TestCase):
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index e9cfee52..943b133d 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -122,6 +122,7 @@ process(sys.argv[1])
                     '--llvm-lto', '3', '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
                     '-s', 'TOTAL_MEMORY=128*1024*1024',
                     '--closure', '1',
+                    #'-s', 'PRECISE_F32=1',
                     #'-g',
                     '-o', final_filename] + shared_args + emcc_args, stdout=PIPE, stderr=self.stderr_redirect).communicate()
     assert os.path.exists(final_filename), 'Failed to compile file: ' + output[0]
@@ -428,10 +429,15 @@ process(sys.argv[1])
     src = open(path_from_root('tests', 'life.c'), 'r').read()
     self.do_benchmark('life', src, '''--------------------------------''', shared_args=['-std=c99'], force_c=True)
 
-  def test_linpack(self):
+  def test_linpack_double(self):
     def output_parser(output):
       return 100.0/float(re.search('Unrolled Double  Precision +([\d\.]+) Mflops', output).group(1))
-    self.do_benchmark('linpack', open(path_from_root('tests', 'linpack.c')).read(), '''Unrolled Double  Precision''', force_c=True, output_parser=output_parser)
+    self.do_benchmark('linpack_double', open(path_from_root('tests', 'linpack.c')).read(), '''Unrolled Double  Precision''', force_c=True, output_parser=output_parser)
+
+  def test_linpack_float(self):
+    def output_parser(output):
+      return 100.0/float(re.search('Unrolled Single  Precision +([\d\.]+) Mflops', output).group(1))
+    self.do_benchmark('linpack_float', open(path_from_root('tests', 'linpack.c')).read(), '''Unrolled Single  Precision''', force_c=True, output_parser=output_parser, shared_args=['-DSP'])
 
   def test_zzz_java_nbody(self): # tests xmlvm compiled java, including bitcasts of doubles, i64 math, etc.
     args = [path_from_root('tests', 'nbody-java', x) for x in os.listdir(path_from_root('tests', 'nbody-java')) if x.endswith('.c')] + \
@@ -504,4 +510,4 @@ process(sys.argv[1])
     native_args = native_lib + ['-I' + path_from_root('tests', 'bullet', 'src'),
                                 '-I' + path_from_root('tests', 'bullet', 'Demos', 'Benchmarks')]
 
-    self.do_benchmark('bullet', src, '\nok.\n', emcc_args=emcc_args, native_args=native_args)
-\ No newline at end of file
+    self.do_benchmark('bullet', src, '\nok.\n', emcc_args=emcc_args, native_args=native_args)
diff --git a/tests/test_core.py b/tests/test_core.py
index 3680a92d..37179ff1 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -882,6 +882,32 @@ nada
     '''
     self.do_run(src, 'OK!\n');
 
+  def test_float32_precise(self):
+    Settings.PRECISE_F32 = 1
+
+    src = r'''
+      #include <stdio.h>
+
+      int main(int argc, char **argv) {
+        float x = 1.23456789123456789;
+        float y = 5.20456089123406709;
+        while (argc > 10 || argc % 19 == 15) {
+          // confuse optimizer
+          x /= y;
+          y = 2*y - 1;
+          argc--;
+        }
+        x = x - y;
+        y = 3*y - x/2;
+        x = x*y;
+        y += 0.000000000123123123123;
+        x -= y/7.654;
+        printf("\n%.20f, %.20f\n", x, y);
+        return 0;
+      }
+    '''
+    self.do_run(src, '\n-72.16590881347656250000, 17.59867858886718750000\n')
+
   def test_negative_zero(self):
     src = r'''
       #include <stdio.h>
@@ -1490,7 +1516,7 @@ f6: nan
         #include <stdio.h>
         #include <stdlib.h>
         #include <cmath>
-        int main()
+        int main(int argc, char **argv)
         {
           printf("*%.2f,%.2f,%d", M_PI, -M_PI, (1/0.0) > 1e300); // could end up as infinity, or just a very very big number
           printf(",%d", isfinite(NAN) != 0);
@@ -1512,11 +1538,15 @@ f6: nan
           sincosf(0.0, &fsine, &fcosine);
           printf(",%1.1f", fsine);
           printf(",%1.1f", fcosine);
+          fsine = sinf(1.1 + argc - 1);
+          fcosine = cosf(1.1 + argc - 1);
+          printf(",%1.1f", fsine);
+          printf(",%1.1f", fcosine);
           printf("*\\n");
           return 0;
         }
       '''
-      self.do_run(src, '*3.14,-3.14,1,0