diff options
author | Alon Zakai <alonzakai@gmail.com> | 2013-12-07 10:37:25 -0500 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2013-12-07 10:37:25 -0500 |
commit | 1a007b1631509b9d72499a8f4402294017ee04dc (patch) | |
tree | 92f8b0341497c7bd4e53aa82c690346536a244c3 | |
parent | df11c6f1fd1636a355b83a1c48b3a890596e6a32 (diff) | |
parent | eb083723747a90cb6ab9853fec8d6e8ef54748bc (diff) |
Merge branch 'incoming'
38 files changed, 2755 insertions, 900 deletions
@@ -108,3 +108,6 @@ a license to everyone to use it as detailed in LICENSE.) * Ben Noordhuis <info@bnoordhuis.nl> * Bob Roberts <bobroberts177@gmail.com> * John Vilk <jvilk@cs.umass.edu> +* Daniel Baulig <dbaulig@fb.com> (copyright owned by Facebook, Inc.) +* Lu Wang <coolwanglu@gmail.com> +* Heidi Pan <heidi.pan@intel.com> (copyright owned by Intel) @@ -9,8 +9,32 @@ Not all changes are documented here. In particular, new features, user-oriented Current trunk code ------------------ - - To see a list of commits that in the active development branch 'incoming', which have not yet been packaged in a release, see - https://github.com/kripken/emscripten/compare/1.7.5...incoming + - To see a list of commits in the active development branch 'incoming', which have not yet been packaged in a release, see + https://github.com/kripken/emscripten/compare/1.7.8...incoming + +v1.7.8: 11/19/2013 +------------------ + - Fixed an issue with -MMD compilation parameter. + - Added EM_ASM_INT() and EM_ASM_DOUBLE() macros. For more information, read https://groups.google.com/forum/#!topic/emscripten-discuss/BFGTJPCgO6Y . + - Fixed --split parameter to also work on Windows. + - Fixed issues with BSD sockets accept() call. + - Full list of changes: https://github.com/kripken/emscripten/compare/1.7.7...1.7.8 + +v1.7.7: 11/16/2013 +------------------ + - Improve SDL audio buffer queue timing support. + - Improved default precision of clock_gettime even when not using CLOCK_REALTIME. + - Optimize and fix issues with LLVM IR processing. + - Full list of changes: https://github.com/kripken/emscripten/compare/1.7.6...1.7.7 + +v1.7.6: 11/15/2013 +------------------ + - Added regex implementation from musl libc. + - The command line parameter -s DEAD_FUNCTIONS=[] can now be used to explicitly kill functions coming from built-in library_xx.js. + - Improved EGL support and GLES2 spec conformance. + - Reverted -s TOTAL_MEMORY=x to require pow2 values, instead of the relaxed 'multiples of 16MB'. This is because the relaxed rule is released only in Firefox 26 which which is currently in Beta and ships on the week of December 10th (currently in Beta). As of writing, current stable Firefox 25 does not yet support these. + - Adjusted the default linker behavior to warn about all missing symbols, instead of silently ignoring them. Use -s WARN_ON_UNDEFINED_SYMBOLS=0 to suppress these warnings if necessary. + - Full list of changes: https://github.com/kripken/emscripten/compare/1.7.5...1.7.6 v1.7.5: 11/13/2013 ------------------ @@ -53,8 +53,9 @@ from tools import shared, jsrun from tools.shared import Compression, execute, suffix, unsuffixed, unsuffixed_basename, WINDOWS from tools.response_file import read_response_file -CXX_SUFFIXES = ('.cpp', '.cxx', '.cc') -SOURCE_SUFFIXES = ('.c', '.cpp', '.cxx', '.cc', '.m', '.mm') +C_SUFFIXES = ('.c', '.C') +CXX_SUFFIXES = ('.cpp', '.cxx', '.cc', '.CPP', '.CXX', '.CC') +SOURCE_SUFFIXES = C_SUFFIXES + CXX_SUFFIXES + ('.m', '.mm') BITCODE_SUFFIXES = ('.bc', '.o', '.obj') DYNAMICLIB_SUFFIXES = ('.dylib', '.so', '.dll') STATICLIB_SUFFIXES = ('.a',) @@ -1274,7 +1275,16 @@ try: shutil.move(in_temp(unsuffixed(uniquename(input_file)) + '.o'), unsuffixed_basename(input_file) + '.' + final_suffix) else: if len(input_files) == 1: - shutil.move(in_temp(unsuffixed(uniquename(input_files[0])) + '.o'), specified_target) + temp_output_base = in_temp(unsuffixed(uniquename(input_files[0]))) + shutil.move(temp_output_base + '.o', specified_target) + if os.path.exists(temp_output_base + '.d'): + # There was a .d file generated, from -MD or -MMD and friends, save a copy of it to where the output resides, + # adjusting the target name away from the temporary file name to the specified target. + # It will be deleted with the rest of the temporary directory. + deps = open(temp_output_base + '.d').read() + deps = deps.replace(temp_output_base + '.o', specified_target) + with open(os.path.join(os.path.dirname(specified_target), os.path.basename(unsuffixed(input_files[0]) + '.d')), "w") as out_dep: + out_dep.write(deps) else: assert len(original_input_files) == 1 or not has_dash_c, 'fatal error: cannot specify -o with -c with multiple files' + str(sys.argv) + ':' + str(original_input_files) # We have a specified target (-o <target>), which is not JavaScript or HTML, and @@ -6,7 +6,7 @@ the environment variables to use emcc and so forth. Usage: emmake make [FLAGS] -Not that if you ran configure with emconfigure, then +Note that if you ran configure with emconfigure, then the environment variables have already been detected and set. This script is useful if you have no configure step, and your Makefile uses the environment vars diff --git a/emscripten.py b/emscripten.py index 75e6711a..907e88ce 100755 --- a/emscripten.py +++ b/emscripten.py @@ -9,7 +9,7 @@ header files (so that the JS compiler can see the constants in those headers, for the libc implementation in JS). ''' -import os, sys, json, optparse, subprocess, re, time, multiprocessing, string, logging +import os, sys, json, optparse, subprocess, re, time, multiprocessing, string, logging, shutil from tools import shared from tools import jsrun, cache as cache_module, tempfiles @@ -706,6 +706,492 @@ Runtime.stackRestore = function(top) { asm['stackRestore'](top) }; outfile.close() +# emscript_fast: emscript'en code using the 'fast' compilation path, using +# an LLVM backend +# FIXME: this is just a copy-paste of normal emscript(), and we trample it +# if the proper env var is set (see below). we should refactor to +# share code between the two, once emscript_fast stabilizes (or, +# leaving it separate like it is will make it trivial to rip out +# if the experiment fails) + +def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None, + jcache=None, temp_files=None, DEBUG=None, DEBUG_CACHE=None): + """Runs the emscripten LLVM-to-JS compiler. We parallelize as much as possible + + Args: + infile: The path to the input LLVM assembly file. + settings: JSON-formatted settings that override the values + defined in src/settings.js. + outfile: The file where the output is written. + """ + + assert(settings['ASM_JS']) # TODO: apply ASM_JS even in -O0 for fastcomp + assert(settings['RUNNING_JS_OPTS']) + + # Overview: + # * Run LLVM backend to emit JS. JS includes function bodies, memory initializer, + # and various metadata + # * Run compiler.js on the metadata to emit the shell js code, pre/post-ambles, + # JS library dependencies, etc. + + if DEBUG: logging.debug('emscript: llvm backend') + + # TODO: proper temp files + # TODO: use a single LLVM toolchain instead of normal for source, pnacl for simplification, custom for js backend + + if DEBUG: shutil.copyfile(infile, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp0.ll')) + + if DEBUG: logging.debug(' ..1..') + temp1 = temp_files.get('.1.bc').name + shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), infile, '-pnacl-abi-simplify-preopt', '-o', temp1])) + assert os.path.exists(temp1) + if DEBUG: + shutil.copyfile(temp1, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp1.bc')) + shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'llvm-dis'), 'temp1.bc', '-o', 'temp1.ll'])) + + #if DEBUG: logging.debug(' ..2..') + #temp2 = temp_files.get('.2.bc').name + #shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), temp1, '-O3', '-o', temp2])) + #assert os.path.exists(temp2) + #if DEBUG: + # shutil.copyfile(temp2, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp2.bc')) + # shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'llvm-dis'), 'temp2.bc', '-o', 'temp2.ll'])) + temp2 = temp1 # XXX if we optimize the bc, we remove some pnacl clutter, but it also makes varargs stores be 8-byte aligned + + if DEBUG: logging.debug(' ..3..') + temp3 = temp_files.get('.3.bc').name + shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), temp2, '-pnacl-abi-simplify-postopt', '-o', temp3])) + assert os.path.exists(temp3) + if DEBUG: + shutil.copyfile(temp3, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp3.bc')) + shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'llvm-dis'), 'temp3.bc', '-o', 'temp3.ll'])) + + if DEBUG: logging.debug(' ..4..') + temp4 = temp_files.get('.4.js').name + backend_compiler = os.path.join(shared.LLVM_ROOT, 'llc') + shared.jsrun.timeout_run(subprocess.Popen([backend_compiler, temp3, '-march=js', '-filetype=asm', '-o', temp4], stdout=subprocess.PIPE)) + if DEBUG: shutil.copyfile(temp4, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp4.js')) + + # Split up output + backend_output = open(temp4).read() + #if DEBUG: print >> sys.stderr, backend_output + + start_funcs_marker = '// EMSCRIPTEN_START_FUNCTIONS' + end_funcs_marker = '// EMSCRIPTEN_END_FUNCTIONS' + metadata_split_marker = '// EMSCRIPTEN_METADATA' + + start_funcs = backend_output.index(start_funcs_marker) + end_funcs = backend_output.rindex(end_funcs_marker) + metadata_split = backend_output.rindex(metadata_split_marker) + + funcs = backend_output[start_funcs+len(start_funcs_marker):end_funcs] + metadata_raw = backend_output[metadata_split+len(metadata_split_marker):] + #if DEBUG: print >> sys.stderr, "METAraw", metadata_raw + metadata = json.loads(metadata_raw) + mem_init = backend_output[end_funcs+len(end_funcs_marker):metadata_split] + #if DEBUG: print >> sys.stderr, "FUNCS", funcs + #if DEBUG: print >> sys.stderr, "META", metadata + #if DEBUG: print >> sys.stderr, "meminit", mem_init + + # function table masks + + table_sizes = {} + for k, v in metadata['tables'].iteritems(): + table_sizes[k] = str(v.count(',')) # undercounts by one, but that is what we want + funcs = re.sub(r"#FM_(\w+)#", lambda m: table_sizes[m.groups(0)[0]], funcs) + + # js compiler + + if DEBUG: logging.debug('emscript: js compiler glue') + + # Integrate info from backend + settings['DEFAULT_LIBRARY_FUNCS_TO_INCLUDE'] = list( + set(settings['DEFAULT_LIBRARY_FUNCS_TO_INCLUDE'] + map(shared.JS.to_nice_ident, metadata['declares'])).difference( + map(lambda x: x[1:], metadata['implementedFunctions']) + ) + ) + map(lambda x: x[1:], metadata['externs']) + + # Settings changes + assert settings['TARGET_LE32'] == 1 + settings['TARGET_LE32'] = 2 + + # Save settings to a file to work around v8 issue 1579 + settings_file = temp_files.get('.txt').name + def save_settings(): + global settings_text + settings_text = json.dumps(settings, sort_keys=True) + s = open(settings_file, 'w') + s.write(settings_text) + s.close() + save_settings() + + # Call js compiler + if DEBUG: t = time.time() + out = jsrun.run_js(path_from_root('src', 'compiler.js'), compiler_engine, [settings_file, ';', 'glue'] + libraries, stdout=subprocess.PIPE, stderr=STDERR_FILE, + cwd=path_from_root('src')) + assert '//FORWARDED_DATA:' in out, 'Did not receive forwarded data in pre output - process failed?' + glue, forwarded_data = out.split('//FORWARDED_DATA:') + + #print >> sys.stderr, out + + last_forwarded_json = forwarded_json = json.loads(forwarded_data) + + # merge in information from llvm backend + + last_forwarded_json['Functions']['tables'] = metadata['tables'] + + '''indexed_functions = set() + for key in forwarded_json['Functions']['indexedFunctions'].iterkeys(): + indexed_functions.add(key)''' + + pre, post = glue.split('// EMSCRIPTEN_END_FUNCS') + + #print >> sys.stderr, 'glue:', pre, '\n\n||||||||||||||||\n\n', post, '...............' + + # memory and global initializers + + global_initializers = ', '.join(map(lambda i: '{ func: function() { %s() } }' % i, metadata['initializers'])) + + pre = pre.replace('STATICTOP = STATIC_BASE + 0;', '''STATICTOP = STATIC_BASE + Runtime.alignMemory(%d); +/* global initializers */ __ATINIT__.push(%s); +%s''' % (mem_init.count(',')+1, global_initializers, mem_init)) # XXX wrong size calculation! + + funcs_js = [funcs] + if settings.get('ASM_JS'): + parts = pre.split('// ASM_LIBRARY FUNCTIONS\n') + if len(parts) > 1: + pre = parts[0] + funcs_js.append(parts[1]) + + # calculations on merged forwarded data TODO + + # merge forwarded data + assert settings.get('ASM_JS'), 'fastcomp is asm.js only' + settings['EXPORTED_FUNCTIONS'] = forwarded_json['EXPORTED_FUNCTIONS'] + all_exported_functions = set(settings['EXPORTED_FUNCTIONS']) # both asm.js and otherwise + for additional_export in settings['DEFAULT_LIBRARY_FUNCS_TO_INCLUDE']: # additional functions to export from asm, if they are implemented + all_exported_functions.add('_' + additional_export) + exported_implemented_functions = set() + export_bindings = settings['EXPORT_BINDINGS'] + export_all = settings['EXPORT_ALL'] + for key in metadata['implementedFunctions'] + forwarded_json['Functions']['implementedFunctions'].keys(): # XXX perf + if key in all_exported_functions or export_all or (export_bindings and key.startswith('_emscripten_bind')): + exported_implemented_functions.add(key) + + #if DEBUG: outfile.write('// pre\n') + outfile.write(pre) + pre = None + + #if DEBUG: outfile.write('// funcs\n') + + if settings.get('ASM_JS'): + #print >> sys.stderr, '<<<<<<', post, '>>>>>>' + post_funcs = '' #, post_rest = post.split('// EMSCRIPTEN_END_FUNCS\n') + #post = post_rest + + # Move preAsms to their right place + def move_preasm(m): + contents = m.groups(0)[0] + outfile.write(contents + '\n') + return '' + post_funcs = re.sub(r'/\* PRE_ASM \*/(.*)\n', lambda m: move_preasm(m), post_funcs) + + funcs_js += ['\n' + post_funcs + '// EMSCRIPTEN_END_FUNCS\n'] + + simple = os.environ.get('EMCC_SIMPLE_ASM') + class Counter: + i = 0 + j = 0 + if 'pre' in last_forwarded_json['Functions']['tables']: + pre_tables = last_forwarded_json['Functions']['tables']['pre'] + del last_forwarded_json['Functions']['tables']['pre'] + else: + pre_tables = '' + + def make_table(sig, raw): + i = Counter.i + Counter.i += 1 + bad = 'b' + str(i) + params = ','.join(['p%d' % p for p in range(len(sig)-1)]) + coercions = ';'.join(['p%d = %s' % (p, shared.JS.make_coercion('p%d' % p, sig[p+1], settings)) for p in range(len(sig)-1)]) + ';' + ret = '' if sig[0] == 'v' else ('return %s' % shared.JS.make_initializer(sig[0], settings)) + start = raw.index('[') + end = raw.rindex(']') + body = raw[start+1:end].split(',') + for j in range(settings['RESERVED_FUNCTION_POINTERS']): + body[settings['FUNCTION_POINTER_ALIGNMENT'] * (1 + j)] = 'jsCall_%s_%s' % (sig, j) + Counter.j = 0 + def fix_item(item): + Counter.j += 1 + newline = Counter.j % 30 == 29 + if item == '0': return bad if not newline else (bad + '\n') + return item if not newline else (item + '\n') + body = ','.join(map(fix_item, body)) + return ('function %s(%s) { %s %s(%d); %s }' % (bad, params, coercions, 'abort' if not settings['ASSERTIONS'] else 'nullFunc', i, ret), ''.join([raw[:start+1], body, raw[end:]])) + + infos = [make_table(sig, raw) for sig, raw in last_forwarded_json['Functions']['tables'].iteritems()] + + function_tables_defs = '\n'.join([info[0] for info in infos]) + '\n// EMSCRIPTEN_END_FUNCS\n' + '\n'.join([info[1] for info in infos]) + + asm_setup = '' + maths = ['Math.' + func for func in ['floor', 'abs', 'sqrt', 'pow', 'cos', 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'exp', 'log', 'ceil', 'imul']] + fundamentals = ['Math', 'Int8Array', 'Int16Array', 'Int32Array', 'Uint8Array', 'Uint16Array', 'Uint32Array', 'Float32Array', 'Float64Array'] + math_envs = ['Math.min'] # TODO: move min to maths + asm_setup += '\n'.join(['var %s = %s;' % (f.replace('.', '_'), f) for f in math_envs]) + + if settings['PRECISE_F32']: maths += ['Math.fround'] + + basic_funcs = ['abort', 'assert', 'asmPrintInt', 'asmPrintFloat'] + [m.replace('.', '_') for m in math_envs] + if settings['RESERVED_FUNCTION_POINTERS'] > 0: basic_funcs.append('jsCall') + if settings['SAFE_HEAP']: basic_funcs += ['SAFE_HEAP_LOAD', 'SAFE_HEAP_STORE', 'SAFE_HEAP_CLEAR'] + if settings['CHECK_HEAP_ALIGN']: basic_funcs += ['CHECK_ALIGN_2', 'CHECK_ALIGN_4', 'CHECK_ALIGN_8'] + if settings['ASSERTIONS']: + basic_funcs += ['nullFunc'] + asm_setup += 'function nullFunc(x) { Module["printErr"]("Invalid function pointer called. Perhaps a miscast function pointer (check compilation warnings) or bad vtable lookup (maybe due to derefing a bad pointer, like NULL)?"); abort(x) }\n' + + basic_vars = ['STACKTOP', 'STACK_MAX', 'tempDoublePtr', 'ABORT'] + basic_float_vars = ['NaN', 'Infinity'] + + if metadata.get('preciseI64MathUsed') or \ + forwarded_json['Functions']['libraryFunctions'].get('llvm_cttz_i32') or \ + forwarded_json['Functions']['libraryFunctions'].get('llvm_ctlz_i32'): + basic_vars += ['cttz_i8', 'ctlz_i8'] + + if settings.get('DLOPEN_SUPPORT'): + for sig in last_forwarded_json['Functions']['tables'].iterkeys(): + basic_vars.append('F_BASE_%s' % sig) + asm_setup += ' var F_BASE_%s = %s;\n' % (sig, 'FUNCTION_TABLE_OFFSET' if settings.get('SIDE_MODULE') else '0') + '\n' + + asm_runtime_funcs = ['stackAlloc', 'stackSave', 'stackRestore', 'setThrew'] + ['setTempRet%d' % i for i in range(10)] + # function tables + function_tables = ['dynCall_' + table for table in last_forwarded_json['Functions']['tables']] + function_tables_impls = [] + + for sig in last_forwarded_json['Functions']['tables'].iterkeys(): + args = ','.join(['a' + str(i) for i in range(1, len(sig))]) + arg_coercions = ' '.join(['a' + str(i) + '=' + shared.JS.make_coercion('a' + str(i), sig[i], settings) + ';' for i in range(1, len(sig))]) + coerced_args = ','.join([shared.JS.make_coercion('a' + str(i), sig[i], settings) for i in range(1, len(sig))]) + ret = ('return ' if sig[0] != 'v' else '') + shared.JS.make_coercion('FUNCTION_TABLE_%s[index&{{{ FTM_%s }}}](%s)' % (sig, sig, coerced_args), sig[0], settings) + function_tables_impls.append(''' + function dynCall_%s(index%s%s) { + index = index|0; + %s + %s; + } +''' % (sig, ',' if len(sig) > 1 else '', args, arg_coercions, ret)) + + for i in range(settings['RESERVED_FUNCTION_POINTERS']): + jsret = ('return ' if sig[0] != 'v' else '') + shared.JS.make_coercion('jsCall(%d%s%s)' % (i, ',' if coerced_args else '', coerced_args), sig[0], settings) + function_tables_impls.append(''' + function jsCall_%s_%s(%s) { + %s + %s; + } + +''' % (sig, i, args, arg_coercions, jsret)) + shared.Settings.copy(settings) + asm_setup += '\n' + shared.JS.make_invoke(sig) + '\n' + basic_funcs.append('invoke_%s' % sig) + if settings.get('DLOPEN_SUPPORT'): + asm_setup += '\n' + shared.JS.make_extcall(sig) + '\n' + basic_funcs.append('extCall_%s' % sig) + + # calculate exports + exported_implemented_functions = list(exported_implemented_functions) + metadata['initializers'] + exported_implemented_functions.append('runPostSets') + exports = [] + if not simple: + for export in exported_implemented_functions + asm_runtime_funcs + function_tables: + exports.append("%s: %s" % (export, export)) + exports = '{ ' + ', '.join(exports) + ' }' + else: + exports = '_main' + # calculate globals + try: + del forwarded_json['Variables']['globals']['_llvm_global_ctors'] # not a true variable + except: + pass + # If no named globals, only need externals + global_vars = metadata['externs'] #+ forwarded_json['Variables']['globals'] + global_funcs = list(set(['_' + key for key, value in forwarded_json['Functions']['libraryFunctions'].iteritems() if value != 2]).difference(set(global_vars))) # + metadata['externFuncs']/'declares' + def math_fix(g): + return g if not g.startswith('Math_') else g.split('_')[1] + asm_global_funcs = ''.join([' var ' + g.replace('.', '_') + '=global.' + g + ';\n' for g in maths]) + \ + ''.join([' var ' + g + '=env.' + math_fix(g) + ';\n' for g in basic_funcs + global_funcs]) + asm_global_vars = ''.join([' var ' + g + '=env.' + g + '|0;\n' for g in basic_vars + global_vars]) + # In linkable modules, we need to add some explicit globals for global variables that can be linked and used across modules + if settings.get('MAIN_MODULE') or settings.get('SIDE_MODULE'): + assert settings.get('TARGET_LE32'), 'TODO: support x86 target when linking modules (needs offset of 4 and not 8 here)' + for key, value in forwarded_json['Variables']['globals'].iteritems(): + if value.get('linkable'): + init = forwarded_json['Variables']['indexedGlobals'][key] + 8 # 8 is Runtime.GLOBAL_BASE / STATIC_BASE + if settings.get('SIDE_MODULE'): init = '(H_BASE+' + str(init) + ')|0' + asm_global_vars += ' var %s=%s;\n' % (key, str(init)) + + # sent data + the_global = '{ ' + ', '.join(['"' + math_fix(s) + '": ' + s for s in fundamentals]) + ' }' + sending = '{ ' + ', '.join(['"' + math_fix(s) + '": ' + s for s in basic_funcs + global_funcs + basic_vars + basic_float_vars + global_vars]) + ' }' + # received + if not simple: + receiving = ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) + else: + receiving = 'var _main = Module["_main"] = asm;' + + # finalize + + if DEBUG: logging.debug('asm text sizes' + str([map(len, funcs_js), len(asm_setup), len(asm_global_vars), len(asm_global_funcs), len(pre_tables), len('\n'.join(function_tables_impls)), len(function_tables_defs.replace('\n', '\n ')), len(exports), len(the_global), len(sending), len(receiving)])) + + funcs_js = [''' +%s +function asmPrintInt(x, y) { + Module.print('int ' + x + ',' + y);// + ' ' + new Error().stack); +} +function asmPrintFloat(x, y) { + Module.print('float ' + x + ',' + y);// + ' ' + new Error().stack); +} +// EMSCRIPTEN_START_ASM +var asm = (function(global, env, buffer) { + %s + var HEAP8 = new global.Int8Array(buffer); + var HEAP16 = new global.Int16Array(buffer); + var HEAP32 = new global.Int32Array(buffer); + var HEAPU8 = new global.Uint8Array(buffer); + var HEAPU16 = new global.Uint16Array(buffer); + var HEAPU32 = new global.Uint32Array(buffer); + var HEAPF32 = new global.Float32Array(buffer); + var HEAPF64 = new global.Float64Array(buffer); +''' % (asm_setup, "'use asm';" if not metadata.get('hasInlineJS') and not settings['SIDE_MODULE'] and settings['ASM_JS'] == 1 else "'almost asm';") + '\n' + asm_global_vars + ''' + var __THREW__ = 0; + var threwValue = 0; + var setjmpId = 0; + var undef = 0; + var nan = +env.NaN, inf = +env.Infinity; + var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0; +''' + ''.join([''' + var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + [''' +// EMSCRIPTEN_START_FUNCS +function stackAlloc(size) { + size = size|0; + var ret = 0; + ret = STACKTOP; + STACKTOP = (STACKTOP + size)|0; +''' + ('STACKTOP = (STACKTOP + 3)&-4;' if settings['TARGET_X86'] else 'STACKTOP = (STACKTOP + 7)&-8;') + ''' + return ret|0; +} +function stackSave() { + return STACKTOP|0; +} +function stackRestore(top) { + top = top|0; + STACKTOP = top; +} +function setThrew(threw, value) { + threw = threw|0; + value = value|0; + if ((__THREW__|0) == 0) { + __THREW__ = threw; + threwValue = value; + } +} +function copyTempFloat(ptr) { + ptr = ptr|0; + HEAP8[tempDoublePtr] = HEAP8[ptr]; + HEAP8[tempDoublePtr+1|0] = HEAP8[ptr+1|0]; + HEAP8[tempDoublePtr+2|0] = HEAP8[ptr+2|0]; + HEAP8[tempDoublePtr+3|0] = HEAP8[ptr+3|0]; +} +function copyTempDouble(ptr) { + ptr = ptr|0; + HEAP8[tempDoublePtr] = HEAP8[ptr]; + HEAP8[tempDoublePtr+1|0] = HEAP8[ptr+1|0]; + HEAP8[tempDoublePtr+2|0] = HEAP8[ptr+2|0]; + HEAP8[tempDoublePtr+3|0] = HEAP8[ptr+3|0]; + HEAP8[tempDoublePtr+4|0] = HEAP8[ptr+4|0]; + HEAP8[tempDoublePtr+5|0] = HEAP8[ptr+5|0]; + HEAP8[tempDoublePtr+6|0] = HEAP8[ptr+6|0]; + HEAP8[tempDoublePtr+7|0] = HEAP8[ptr+7|0]; +} +''' + ''.join([''' +function setTempRet%d(value) { + value = value|0; + tempRet%d = value; +} +''' % (i, i) for i in range(10)])] + funcs_js + [''' + %s + + return %s; +}) +// EMSCRIPTEN_END_ASM +(%s, %s, buffer); +%s; +''' % (pre_tables + '\n'.join(function_tables_impls) + '\n' + function_tables_defs.replace('\n', '\n '), exports, the_global, sending, receiving)] + + if not settings.get('SIDE_MODULE'): + funcs_js.append(''' +Runtime.stackAlloc = function(size) { return asm['stackAlloc'](size) }; +Runtime.stackSave = function() { return asm['stackSave']() }; +Runtime.stackRestore = function(top) { asm['stackRestore'](top) }; +''') + + # Set function table masks + masks = {} + max_mask = 0 + for sig, table in last_forwarded_json['Functions']['tables'].iteritems(): + mask = table.count(',') + masks[sig] = str(mask) + max_mask = max(mask, max_mask) + def function_table_maskize(js, masks): + def fix(m): + sig = m.groups(0)[0] + return masks[sig] + return re.sub(r'{{{ FTM_([\w\d_$]+) }}}', lambda m: fix(m), js) # masks[m.groups(0)[0]] + funcs_js = map(lambda js: function_table_maskize(js, masks), funcs_js) + + if settings.get('DLOPEN_SUPPORT'): + funcs_js.append(''' + asm.maxFunctionIndex = %(max_mask)d; + DLFCN.registerFunctions(asm, %(max_mask)d+1, %(sigs)s, Module); + Module.SYMBOL_TABLE = SYMBOL_TABLE; +''' % { 'max_mask': max_mask, 'sigs': str(map(str, last_forwarded_json['Functions']['tables'].keys())) }) + + else: + function_tables_defs = '\n'.join([table for table in last_forwarded_json['Functions']['tables'].itervalues()]) + outfile.write(function_tables_defs) + funcs_js = [''' +// EMSCRIPTEN_START_FUNCS +'''] + funcs_js + [''' +// EMSCRIPTEN_END_FUNCS +'''] + + # Create symbol table for self-dlopen + if settings.get('DLOPEN_SUPPORT'): + symbol_table = {} + for k, v in forwarded_json['Variables']['indexedGlobals'].iteritems(): + if forwarded_json['Variables']['globals'][k]['named']: + symbol_table[k] = str(v + forwarded_json['Runtime']['GLOBAL_BASE']) + for raw in last_forwarded_json['Functions']['tables'].itervalues(): + if raw == '': continue + table = map(string.strip, raw[raw.find('[')+1:raw.find(']')].split(",")) + for i in range(len(table)): + value = table[i] + if value != '0': + if settings.get('SIDE_MODULE'): + symbol_table[value] = 'FUNCTION_TABLE_OFFSET+' + str(i) + else: + symbol_table[value] = str(i) + outfile.write("var SYMBOL_TABLE = %s;" % json.dumps(symbol_table).replace('"', '')) + + for i in range(len(funcs_js)): # do this loop carefully to save memory + outfile.write(funcs_js[i]) + funcs_js = None + + outfile.write(post) + + outfile.close() + +if os.environ.get('EMCC_FAST_COMPILER'): + emscript = emscript_fast + def main(args, compiler_engine, cache, jcache, relooper, temp_files, DEBUG, DEBUG_CACHE): # Prepare settings for serialization to JSON. settings = {} diff --git a/src/compiler.js b/src/compiler.js index aa3c7b92..7d768c3d 100644 --- a/src/compiler.js +++ b/src/compiler.js @@ -206,12 +206,12 @@ if (phase == 'pre') { if (VERBOSE) printErr('VERBOSE is on, this generates a lot of output and can slow down compilation'); // Load struct and define information. -try { +//try { var temp = JSON.parse(read(STRUCT_INFO)); -} catch(e) { - printErr('cannot load struct info at ' + STRUCT_INFO + ' : ' + e + ', trying in current dir'); - temp = JSON.parse(read('struct_info.compiled.json')); -} +//} catch(e) { +// printErr('cannot load struct info at ' + STRUCT_INFO + ' : ' + e + ', trying in current dir'); +// temp = JSON.parse(read('struct_info.compiled.json')); +//} C_STRUCTS = temp.structs; C_DEFINES = temp.defines; @@ -224,12 +224,12 @@ load('analyzer.js'); load('jsifier.js'); if (phase == 'funcs' && RELOOP) { // XXX handle !singlePhase RelooperModule = { TOTAL_MEMORY: ceilPowerOfTwo(2*RELOOPER_BUFFER_SIZE) }; - try { + //try { load(RELOOPER); - } catch(e) { - printErr('cannot load relooper at ' + RELOOPER + ' : ' + e + ', trying in current dir'); - load('relooper.js'); - } + //} catch(e) { + // printErr('cannot load relooper at ' + RELOOPER + ' : ' + e + ', trying in current dir'); + // load('relooper.js'); + //} assert(typeof Relooper != 'undefined'); } globalEval(processMacros(preprocess(read('runtime.js')))); @@ -267,7 +267,7 @@ function compile(raw) { function runPhase(currPhase) { //printErr('// JS compiler in action, phase ' + currPhase + typeof lines + (lines === null)); phase = currPhase; - if (phase != 'pre') { + if (phase != 'pre' && phase != 'glue') { if (singlePhase) PassManager.load(read(forwardedDataFile)); if (phase == 'funcs') { @@ -313,14 +313,16 @@ B = new Benchmarker(); try { if (ll_file) { - if (ll_file.indexOf(String.fromCharCode(10)) == -1) { + if (phase === 'glue') { + compile(';'); + } else if (ll_file.indexOf(String.fromCharCode(10)) == -1) { compile(read(ll_file)); } else { compile(ll_file); // we are given raw .ll } } } catch(err) { - printErr('aborting from js compiler due to exception: ' + err); + printErr('aborting from js compiler due to exception: ' + err + ' | ' + err.stack); } //var M = keys(tokenCacheMisses).map(function(m) { return [m, misses[m]] }).sort(function(a, b) { return a[1] - b[1] }); diff --git a/src/intertyper.js b/src/intertyper.js index 940c677f..b34d0c08 100644 --- a/src/intertyper.js +++ b/src/intertyper.js @@ -524,6 +524,27 @@ function intertyper(lines, sidePass, baseLineNums) { } }); } + } else if (ident == '_llvm_used') { + var chunk = item.tokens[1].tokens; + var funcs = []; + var part = []; + + for (var i = 0; i < chunk.length; i++) { + if (chunk[i].text == ',') { + var call = parseLLVMFunctionCall(part); + EXPORTED_FUNCTIONS[call.ident] = 0; + part = []; + } else { + part.push(chunk[i]); + } + } + if (part.length > 0) { + var call = parseLLVMFunctionCall(part); + EXPORTED_FUNCTIONS[call.ident] = 0; + } + + ret.type = 'i32'; + ret.value = { intertype: 'value', ident: '0', value: '0', type: ret.type }; } else if (!external) { if (item.tokens[1] && item.tokens[1].text != ';') { if (item.tokens[1].text == 'c') { @@ -538,6 +559,7 @@ function intertyper(lines, sidePass, baseLineNums) { ret.value = { intertype: 'value', ident: '0', value: '0', type: ret.type }; } } + return ret; } } @@ -616,7 +638,8 @@ function intertyper(lines, sidePass, baseLineNums) { // 'bitcast' function bitcastHandler(item) { item.intertype = 'bitcast'; - item.type = item.tokens[4].text; // The final type + var last = getTokenIndexByText(item.tokens, ';'); + item.type = item.tokens[Math.min(last, item.tokens.length-1)].text; // The final type Types.needAnalysis[item.type] = 0; var to = getTokenIndexByText(item.tokens, 'to'); item.params = [parseLLVMSegment(item.tokens.slice(1, to))]; diff --git a/src/jsifier.js b/src/jsifier.js index acfb6365..907855e7 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -28,7 +28,7 @@ function JSify(data, functionsOnly, givenFunctions) { if (mainPass) { var shellFile = SHELL_FILE ? SHELL_FILE : (BUILD_AS_SHARED_LIB || SIDE_MODULE ? 'shell_sharedlib.js' : 'shell.js'); - if (phase == 'pre') { + if (phase == 'pre' || phase == 'glue') { // We will start to print out the data, but must do so carefully - we are // dealing with potentially *huge* strings. Convenient replacements and // manipulations may create in-memory copies, and we may OOM. @@ -72,7 +72,7 @@ function JSify(data, functionsOnly, givenFunctions) { LibraryManager.load(); //B.stop('jsifier-libload'); - if (phase == 'pre') { + if (phase == 'pre' || phase == 'glue') { var libFuncsToInclude; if (INCLUDE_FULL_LIBRARY) { assert(!(BUILD_AS_SHARED_LIB || SIDE_MODULE), 'Cannot have both INCLUDE_FULL_LIBRARY and BUILD_AS_SHARED_LIB/SIDE_MODULE set.') @@ -474,7 +474,7 @@ function JSify(data, functionsOnly, givenFunctions) { } } if (SIDE_MODULE) return ';'; // we import into the side module js library stuff from the outside parent - if ((!ASM_JS || phase == 'pre') && + if ((!ASM_JS || phase == 'pre' || phase == 'glue') && (EXPORT_ALL || (ident in EXPORTED_FUNCTIONS))) { contentText += '\nModule["' + ident + '"] = ' + ident + ';'; } @@ -1373,8 +1373,9 @@ function JSify(data, functionsOnly, givenFunctions) { function insertelementHandler(item) { var base = getVectorBaseType(item.type); var ident = ensureVector(item.ident, base); + var laneOp = ((base == 'float') ? 'SIMD.float32x4.with' : 'SIMD.int32x4.with'); //return ident + '.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + finalizeLLVMParameter(item.value) + ')'; - return 'SIMD.with' + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')'; + return laneOp + SIMDLane[finalizeLLVMParameter(item.index)] + '(' + ident + ',' + finalizeLLVMParameter(item.value) + ')'; } function extractelementHandler(item) { var base = getVectorBaseType(item.type); @@ -1603,6 +1604,15 @@ function JSify(data, functionsOnly, givenFunctions) { } } + // we alias llvm memset and such to normal memset. The target has a return value, while the original + // does not, so we need to fix that for the actual call target + if (ASM_JS) { + var sig = LibraryManager.library[simpleIdent + '__sig']; + if (sig && sig[0] !== 'v') { + returnType = Functions.getSignatureType(sig[0]); + } + } + if (byPointer) { var sig = Functions.getSignature(returnType, argsTypes, hasVarArgs); if (ASM_JS) { @@ -1704,7 +1714,7 @@ function JSify(data, functionsOnly, givenFunctions) { // if (!mainPass) { - if (phase == 'pre' && !Variables.generatedGlobalBase && !BUILD_AS_SHARED_LIB) { + if ((phase == 'pre' || phase == 'glue') && !Variables.generatedGlobalBase && !BUILD_AS_SHARED_LIB) { Variables.generatedGlobalBase = true; // Globals are done, here is the rest of static memory assert((TARGET_LE32 && Runtime.GLOBAL_BASE == 8) || (TARGET_X86 && Runtime.GLOBAL_BASE == 4)); // this is assumed in e.g. relocations for linkable modules @@ -1719,7 +1729,7 @@ function JSify(data, functionsOnly, givenFunctions) { var generated = itemsDict.function.concat(itemsDict.type).concat(itemsDict.GlobalVariableStub).concat(itemsDict.GlobalVariable); print(generated.map(function(item) { return item.JS; }).join('\n')); - if (phase == 'pre') { + if (phase == 'pre' || phase == 'glue') { if (memoryInitialization.length > 0) { // apply postsets directly into the big memory initialization itemsDict.GlobalVariablePostSet = itemsDict.GlobalVariablePostSet.filter(function(item) { @@ -1742,15 +1752,17 @@ function JSify(data, functionsOnly, givenFunctions) { }); // write out the singleton big memory initialization value print('/* memory initializer */ ' + makePointer(memoryInitialization, null, 'ALLOC_NONE', 'i8', 'Runtime.GLOBAL_BASE' + (SIDE_MODULE ? '+H_BASE' : ''), true)); - } else { + } else if (phase !== 'glue') { print('/* no memory initializer */'); // test purposes } - // Define postsets. These will be run in ATINIT, right before global initializers (which might need the postsets). We cannot - // run them now because the memory initializer might not have been applied yet. - print('function runPostSets() {\n'); - print(itemsDict.GlobalVariablePostSet.map(function(item) { return item.JS }).join('\n')); - print('}\n'); + if (phase !== 'glue') { + // Define postsets. These will be run in ATINIT, right before global initializers (which might need the postsets). We cannot + // run them now because the memory initializer might not have been applied yet. + print('function runPostSets() {\n'); + print(itemsDict.GlobalVariablePostSet.map(function(item) { return item.JS }).join('\n')); + print('}\n'); + } if (USE_TYPED_ARRAYS == 2) { if (!BUILD_AS_SHARED_LIB && !SIDE_MODULE) { @@ -1780,7 +1792,7 @@ function JSify(data, functionsOnly, givenFunctions) { } // Print out global variables and postsets TODO: batching - if (phase == 'pre') { + if (phase == 'pre' || phase == 'glue') { var legalizedI64sDefault = legalizedI64s; legalizedI64s = false; diff --git a/src/library.js b/src/library.js index a5380c3a..26d766e9 100644 --- a/src/library.js +++ b/src/library.js @@ -23,6 +23,7 @@ LibraryManager.library = { stdout: 'allocate(1, "i32*", ALLOC_STATIC)', stderr: 'allocate(1, "i32*", ALLOC_STATIC)', _impure_ptr: 'allocate(1, "i32*", ALLOC_STATIC)', + __dso_handle: 'allocate(1, "i32*", ALLOC_STATIC)', // ========================================================================== // dirent.h @@ -471,6 +472,11 @@ LibraryManager.library = { mkstemp: function(template) { return _creat(_mktemp(template), 0600); }, + mkdtemp__deps: ['mktemp', 'mkdir'], + mkdtemp: function(template) { + template = _mktemp(template); + return (_mkdir(template, 0700) === 0) ? template : 0; + }, fcntl__deps: ['$FS', '__setErrNo', '$ERRNO_CODES'], fcntl: function(fildes, cmd, varargs, dup2) { // int fcntl(int fildes, int cmd, ...); @@ -535,7 +541,7 @@ LibraryManager.library = { // Advise as much as you wish. We don't care. return 0; }, - posix_madvise: 'posix_fadvise', + posix_madvise: function(){ return 0 }, // ditto as fadvise posix_fallocate__deps: ['$FS', '__setErrNo', '$ERRNO_CODES'], posix_fallocate: function(fd, offset, len) { // int posix_fallocate(int fd, off_t offset, off_t len); @@ -1855,7 +1861,11 @@ LibraryManager.library = { // int x = 4; printf("%c\n", (char)x); var ret; if (type === 'double') { +#if TARGET_LE32 == 2 + ret = {{{ makeGetValue('varargs', 'argIndex', 'double', undefined, undefined, true, 4) }}}; +#else ret = {{{ makeGetValue('varargs', 'argIndex', 'double', undefined, undefined, true) }}}; +#endif #if USE_TYPED_ARRAYS == 2 } else if (type == 'i64') { @@ -1876,7 +1886,11 @@ LibraryManager.library = { type = 'i32'; // varargs are always i32, i64, or double ret = {{{ makeGetValue('varargs', 'argIndex', 'i32', undefined, undefined, true) }}}; } +#if TARGET_LE32 == 2 + argIndex += Runtime.getNativeFieldSize(type); +#else argIndex += Math.max(Runtime.getNativeFieldSize(type), Runtime.getAlignSize(type, null, true)); +#endif return ret; } @@ -2505,6 +2519,10 @@ LibraryManager.library = { } var bytesRead = 0; var streamObj = FS.getStream(stream); + if (!streamObj) { + ___setErrNo(ERRNO_CODES.EBADF); + return 0; + } while (streamObj.ungotten.length && bytesToRead > 0) { {{{ makeSetValue('ptr++', '0', 'streamObj.ungotten.pop()', 'i8') }}} bytesToRead--; @@ -3522,13 +3540,15 @@ LibraryManager.library = { llvm_memcpy_p0i8_p0i8_i32: 'memcpy', llvm_memcpy_p0i8_p0i8_i64: 'memcpy', - memmove__sig: 'viii', + memmove__sig: 'iiii', memmove__asm: true, memmove__deps: ['memcpy'], memmove: function(dest, src, num) { dest = dest|0; src = src|0; num = num|0; + var ret = 0; if (((src|0) < (dest|0)) & ((dest|0) < ((src + num)|0))) { // Unlikely case: Copy backwards in a safe manner + ret = dest; src = (src + num)|0; dest = (dest + num)|0; while ((num|0) > 0) { @@ -3537,9 +3557,11 @@ LibraryManager.library = { num = (num - 1)|0; {{{ makeSetValueAsm('dest', 0, makeGetValueAsm('src', 0, 'i8'), 'i8') }}}; } + dest = ret; } else { _memcpy(dest, src, num) | 0; } + return dest | 0; }, llvm_memmove_i32: 'memmove', llvm_memmove_i64: 'memmove', @@ -3556,7 +3578,7 @@ LibraryManager.library = { memset__inline: function(ptr, value, num, align) { return makeSetValues(ptr, 0, value, 'null', num, align); }, - memset__sig: 'viii', + memset__sig: 'iiii', memset__asm: true, memset: function(ptr, value, num) { #if USE_TYPED_ARRAYS == 2 @@ -3585,8 +3607,10 @@ LibraryManager.library = { {{{ makeSetValueAsm('ptr', 0, 'value', 'i8') }}}; ptr = (ptr+1)|0; } + return (ptr-num)|0; #else {{{ makeSetValues('ptr', '0', 'value', 'null', 'num') }}}; + return ptr; #endif }, llvm_memset_i32: 'memset', @@ -4657,6 +4681,10 @@ LibraryManager.library = { llvm_dbg_declare__inline: function() { throw 'llvm_debug_declare' }, // avoid warning + // llvm-nacl + + llvm_nacl_atomic_store_i32__inline: true, + // ========================================================================== // llvm-mono integration // ========================================================================== @@ -6955,7 +6983,7 @@ LibraryManager.library = { pthread_setspecific__deps: ['$PTHREAD_SPECIFIC', '$ERRNO_CODES'], pthread_setspecific: function(key, value) { - if (value == 0) { + if (!(key in PTHREAD_SPECIFIC)) { return ERRNO_CODES.EINVAL; } PTHREAD_SPECIFIC[key] = value; @@ -8724,8 +8752,72 @@ LibraryManager.library = { // emscripten vector ops //============================ - emscripten_float32x4_signmask__inline: function(x) { - return x + '.signMask()'; + emscripten_float32x4_signmask__inline: function(a) { + return 'SIMD.float32x4.bitsToInt32x4(' + a + ').signMask'; + }, + + emscripten_float32x4_min__inline: function(a, b) { + return 'SIMD.float32x4.min(' + a + ', ' + b + ')'; + }, + + emscripten_float32x4_max__inline: function(a, b) { + return 'SIMD.float32x4.max(' + a + ', ' + b + ')'; + }, + + emscripten_float32x4_sqrt__inline: function(a) { + return 'SIMD.float32x4.sqrt(' + a + ')'; + }, + + emscripten_float32x4_lessThan__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThan(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_lessThanOrEqual__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.lessThanOrEqual(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_equal__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.equal(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_greaterThanOrEqual__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThanOrEqual(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_greaterThan__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.float32x4.greaterThan(' + a + ', ' + b + '))'; + }, + + emscripten_float32x4_and__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_andNot__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.and(SIMD.int32x4.not(SIMD.float32x4.bitsToInt32x4(' + a + ')), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_or__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_float32x4_xor__inline: function(a, b) { + return 'SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.xor(SIMD.float32x4.bitsToInt32x4(' + a + '), SIMD.float32x4.bitsToInt32x4(' + b + ')))'; + }, + + emscripten_int32x4_bitsToFloat32x4__inline: function(a) { + return 'SIMD.int32x4.bitsToFloat32x4(' + a + ')'; + }, + + emscripten_int32x4_toFloat32x4__inline: function(a) { + return 'SIMD.int32x4.toFloat32x4(' + a + ')'; + }, + + emscripten_float32x4_bitsToInt32x4__inline: function(a) { + return 'SIMD.float32x4.bitsToInt32x4(' + a + ')'; + }, + + emscripten_float32x4_toInt32x4__inline: function(a) { + return 'SIMD.float32x4.toInt32x4(' + a + ')'; }, //============================ diff --git a/src/library_browser.js b/src/library_browser.js index 8444fb73..b368c6ac 100644 --- a/src/library_browser.js +++ b/src/library_browser.js @@ -250,15 +250,24 @@ mergeInto(LibraryManager.library, { contextAttributes.preserveDrawingBuffer = true; #endif - ['experimental-webgl', 'webgl'].some(function(webglId) { - return ctx = canvas.getContext(webglId, contextAttributes); - }); + var errorInfo = '?'; + function onContextCreationError(event) { + errorInfo = event.statusMessage || errorInfo; + } + canvas.addEventListener('webglcontextcreationerror', onContextCreationError, false); + try { + ['experimental-webgl', 'webgl'].some(function(webglId) { + return ctx = canvas.getContext(webglId, contextAttributes); + }); + } finally { + canvas.removeEventListener('webglcontextcreationerror', onContextCreationError, false); + } } else { ctx = canvas.getContext('2d'); } if (!ctx) throw ':('; } catch (e) { - Module.print('Could not create canvas - ' + e); + Module.print('Could not create canvas: ' + [errorInfo, e]); return null; } if (useWebGL) { @@ -854,7 +863,7 @@ mergeInto(LibraryManager.library, { var styleSheet = document.styleSheets[0]; var rules = styleSheet.cssRules; for (var i = 0; i < rules.length; i++) { - if (rules[i].cssText.substr(0, 5) == 'canvas') { + if (rules[i].cssText.substr(0, 6) == 'canvas') { styleSheet.deleteRule(i); i--; } diff --git a/src/library_gl.js b/src/library_gl.js index afd36197..cc39b048 100644 --- a/src/library_gl.js +++ b/src/library_gl.js @@ -209,6 +209,105 @@ var LibraryGL = { ((height - 1) * alignedRowSize + plainRowSize); }, + get: function(name_, p, type) { + var ret = undefined; + switch(name_) { // Handle a few trivial GLES values + case 0x8DFA: // GL_SHADER_COMPILER + ret = 1; + break; + case 0x8DF8: // GL_SHADER_BINARY_FORMATS + if (type === 'Integer') { + // fall through, see gles2_conformance.cpp + } else { + GL.recordError(0x0500); // GL_INVALID_ENUM +#if GL_ASSERTIONS + Module.printErr('GL_INVALID_ENUM in glGet' + type + 'v(GL_SHADER_BINARY_FORMATS): Invalid parameter type!'); +#endif + return; + } + case 0x8DF9: // GL_NUM_SHADER_BINARY_FORMATS + ret = 0; + break; + case 0x86A2: // GL_NUM_COMPRESSED_TEXTURE_FORMATS + // WebGL doesn't have GL_NUM_COMPRESSED_TEXTURE_FORMATS (it's obsolete since GL_COMPRESSED_TEXTURE_FORMATS returns a JS array that can be queried for length), + // so implement it ourselves to allow C++ GLES2 code get the length. + var formats = Module.ctx.getParameter(0x86A3 /*GL_COMPRESSED_TEXTURE_FORMATS*/); + ret = formats.length; + break; + case 0x8B9A: // GL_IMPLEMENTATION_COLOR_READ_TYPE + ret = 0x1401; // GL_UNSIGNED_BYTE + break; + case 0x8B9B: // GL_IMPLEMENTATION_COLOR_READ_FORMAT + ret = 0x1908; // GL_RGBA + break; + } + + if (ret === undefined) { + var result = Module.ctx.getParameter(name_); + switch (typeof(result)) { + case "number": + ret = result; + break; + case "boolean": + ret = result ? 1 : 0; + break; + case "string": + GL.recordError(0x0500); // GL_INVALID_ENUM +#if GL_ASSERTIONS + Module.printErr('GL_INVALID_ENUM in glGet' + type + 'v(' + name_ + ') on a name which returns a string!'); +#endif + return; + case "object": + if (result === null) { + GL.recordError(0x0500); // GL_INVALID_ENUM +#if GL_ASSERTIONS + Module.printErr('GL_INVALID_ENUM in glGet' + type + 'v(' + name_ + ') and it returns null!'); +#endif + return; + } else if (result instanceof Float32Array || + result instanceof Uint32Array || + result instanceof Int32Array || + result instanceof Array) { + for (var i = 0; i < result.length; ++i) { + switch (type) { + case 'Integer': {{{ makeSetValue('p', 'i*4', 'result[i]', 'i32') }}}; break; + case 'Float': {{{ makeSetValue('p', 'i*4', 'result[i]', 'float') }}}; break; + case 'Boolean': {{{ makeSetValue('p', 'i', 'result[i] ? 1 : 0', 'i8') }}}; break; + default: throw 'internal glGet error, bad type: ' + type; + } + } + return; + } else if (result instanceof WebGLBuffer || + result instanceof WebGLProgram || + result instanceof WebGLFramebuffer || + result instanceof WebGLRenderbuffer || + result instanceof WebGLTexture) { + ret = result.name | 0; + } else { + GL.recordError(0x0500); // GL_INVALID_ENUM +#if GL_ASSERTIONS + Module.printErr('GL_INVALID_ENUM in glGet' + type + 'v: Unknown object returned from WebGL getParameter(' + name_ + ')!'); +#endif + return; + } + break; + default: + GL.recordError(0x0500); // GL_INVALID_ENUM +#if GL_ASSERTIONS + Module.printErr('GL_INVALID_ENUM in glGetIntegerv: Native code calling glGet' + type + 'v(' + name_ + ') and it returns ' + result + ' of type ' + typeof(result) + '!'); +#endif + return; + } + } + + switch (type) { + case 'Integer': {{{ makeSetValue('p', '0', 'ret', 'i32') }}}; break; + case 'Float': {{{ makeSetValue('p', '0', 'ret', 'float') }}}; break; + case 'Boolean': {{{ makeSetValue('p', '0', 'ret ? 1 : 0', 'i8') }}}; break; + default: throw 'internal glGet error, bad type: ' + type; + } + }, + getTexPixelData: function(type, format, width, height, pixels, internalFormat) { var sizePerPixel; switch (type) { @@ -288,6 +387,22 @@ var LibraryGL = { } }, +#if GL_FFP_ONLY + enabledClientAttribIndices: [], + enableVertexAttribArray: function enableVertexAttribArray(index) { + if (!GL.enabledClientAttribIndices[index]) { + GL.enabledClientAttribIndices[index] = true; + Module.ctx.enableVertexAttribArray(index); + } + }, + disableVertexAttribArray: function disableVertexAttribArray(index) { + if (GL.enabledClientAttribIndices[index]) { + GL.enabledClientAttribIndices[index] = false; + Module.ctx.disableVertexAttribArray(index); + } + }, +#endif + #if FULL_ES2 calcBufLength: function calcBufLength(size, type, stride, count) { if (stride > 0) { @@ -554,214 +669,17 @@ var LibraryGL = { glGetIntegerv__sig: 'vii', glGetIntegerv: function(name_, p) { - switch(name_) { // Handle a few trivial GLES values - case 0x8DFA: // GL_SHADER_COMPILER - {{{ makeSetValue('p', '0', '1', 'i32') }}}; - return; - case 0x8DF8: // GL_SHADER_BINARY_FORMATS - case 0x8DF9: // GL_NUM_SHADER_BINARY_FORMATS - {{{ makeSetValue('p', '0', '0', 'i32') }}}; - return; - case 0x86A2: // GL_NUM_COMPRESSED_TEXTURE_FORMATS - // WebGL doesn't have GL_NUM_COMPRESSED_TEXTURE_FORMATS (it's obsolete since GL_COMPRESSED_TEXTURE_FORMATS returns a JS array that can be queried for length), - // so implement it ourselves to allow C++ GLES2 code get the length. - var formats = Module.ctx.getParameter(0x86A3 /*GL_COMPRESSED_TEXTURE_FORMATS*/); - {{{ makeSetValue('p', '0', 'formats.length', 'i32') }}}; - return; - } - var result = Module.ctx.getParameter(name_); - switch (typeof(result)) { - case "number": - {{{ makeSetValue('p', '0', 'result', 'i32') }}}; - break; - case "boolean": - {{{ makeSetValue('p', '0', 'result ? 1 : 0', 'i8') }}}; - break; - case "string": - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetIntegerv: Native code calling glGetIntegerv(' + name_ + ') on a name which returns a string!'); -#endif - return; - case "object": - if (result === null) { - {{{ makeSetValue('p', '0', '0', 'i32') }}}; - } else if (result instanceof Float32Array || - result instanceof Uint32Array || - result instanceof Int32Array || - result instanceof Array) { - for (var i = 0; i < result.length; ++i) { - {{{ makeSetValue('p', 'i*4', 'result[i]', 'i32') }}}; - } - } else if (result instanceof WebGLBuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'i32') }}}; - } else if (result instanceof WebGLProgram) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'i32') }}}; - } else if (result instanceof WebGLFramebuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'i32') }}}; - } else if (result instanceof WebGLRenderbuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'i32') }}}; - } else if (result instanceof WebGLTexture) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'i32') }}}; - } else { - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetIntegerv: Unknown object returned from WebGL getParameter(' + name_ + ')!'); -#endif - return; - } - break; - default: - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetIntegerv: Native code calling glGetIntegerv(' + name_ + ') and it returns ' + result + ' of type ' + typeof(result) + '!'); -#endif - return; - } + return GL.get(name_, p, 'Integer'); }, glGetFloatv__sig: 'vii', glGetFloatv: function(name_, p) { - switch(name_) { - case 0x8DFA: // GL_SHADER_COMPILER - {{{ makeSetValue('p', '0', '1', 'float') }}}; - return; - case 0x8DF8: // GL_SHADER_BINARY_FORMATS - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetFloatv(GL_SHADER_BINARY_FORMATS): Invalid parameter type!'); -#endif - return; - case 0x8DF9: // GL_NUM_SHADER_BINARY_FORMATS - {{{ makeSetValue('p', '0', '0', 'float') }}}; - return; - case 0x86A2: // GL_NUM_COMPRESSED_TEXTURE_FORMATS - // WebGL doesn't have GL_NUM_COMPRESSED_TEXTURE_FORMATS (it's obsolete since GL_COMPRESSED_TEXTURE_FORMATS returns a JS array that can be queried for length), - // so implement it ourselves to allow C++ GLES2 code get the length. - var formats = Module.ctx.getParameter(0x86A3 /*GL_COMPRESSED_TEXTURE_FORMATS*/); - {{{ makeSetValue('p', '0', 'formats.length', 'float') }}}; - return; - } - - var result = Module.ctx.getParameter(name_); - switch (typeof(result)) { - case "number": - {{{ makeSetValue('p', '0', 'result', 'float') }}}; - break; - case "boolean": - {{{ makeSetValue('p', '0', 'result ? 1.0 : 0.0', 'float') }}}; - break; - case "string": - {{{ makeSetValue('p', '0', '0', 'float') }}}; - case "object": - if (result === null) { - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetFloatv: Native code calling glGetFloatv(' + name_ + ') and it returns null!'); -#endif - return; - } else if (result instanceof Float32Array || - result instanceof Uint32Array || - result instanceof Int32Array || - result instanceof Array) { - for (var i = 0; i < result.length; ++i) { - {{{ makeSetValue('p', 'i*4', 'result[i]', 'float') }}}; - } - } else if (result instanceof WebGLBuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'float') }}}; - } else if (result instanceof WebGLProgram) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'float') }}}; - } else if (result instanceof WebGLFramebuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'float') }}}; - } else if (result instanceof WebGLRenderbuffer) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'float') }}}; - } else if (result instanceof WebGLTexture) { - {{{ makeSetValue('p', '0', 'result.name | 0', 'float') }}}; - } else { - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetFloatv: Native code calling glGetFloatv(' + name_ + ') and it returns ' + result + ' of type ' + typeof(result) + '!'); -#endif - return; - } - break; - default: - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetFloatv: Native code calling glGetFloatv(' + name_ + ') and it returns ' + result + ' of type ' + typeof(result) + '!'); -#endif - return; - } + return GL.get(name_, p, 'Float'); }, glGetBooleanv__sig: 'vii', glGetBooleanv: function(name_, p) { - switch(name_) { - case 0x8DFA: // GL_SHADER_COMPILER - {{{ makeSetValue('p', '0', '1', 'i8') }}}; - return; - case 0x8DF8: // GL_SHADER_BINARY_FORMATS - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetBooleanv(GL_SHADER_BINARY_FORMATS): Invalid parameter type!'); -#endif - return; - case 0x8DF9: // GL_NUM_SHADER_BINARY_FORMATS - {{{ makeSetValue('p', '0', '0', 'i8') }}}; - return; - case 0x86A2: // GL_NUM_COMPRESSED_TEXTURE_FORMATS - // WebGL doesn't have GL_NUM_COMPRESSED_TEXTURE_FORMATS (it's obsolete since GL_COMPRESSED_TEXTURE_FORMATS returns a JS array that can be queried for length), - // so implement it ourselves to allow C++ GLES2 code get the length. - var hasCompressedFormats = Module.ctx.getParameter(0x86A3 /*GL_COMPRESSED_TEXTURE_FORMATS*/).length > 0 ? 1 : 0; - {{{ makeSetValue('p', '0', 'hasCompressedFormats', 'i8') }}}; - return; - } - - var result = Module.ctx.getParameter(name_); - switch (typeof(result)) { - case "number": - {{{ makeSetValue('p', '0', 'result != 0', 'i8') }}}; - break; - case "boolean": - {{{ makeSetValue('p', '0', 'result != 0', 'i8') }}}; - break; - case "string": - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetBooleanv: Native code calling glGetBooleanv(' + name_ + ') on a name which returns a string!'); -#endif - return; - case "object": - if (result === null) { - {{{ makeSetValue('p', '0', '0', 'i8') }}}; - } else if (result instanceof Float32Array || - result instanceof Uint32Array || - result instanceof Int32Array || - result instanceof Array) { - for (var i = 0; i < result.length; ++i) { - {{{ makeSetValue('p', 'i', 'result[i] != 0', 'i8') }}}; - } - } else if (result instanceof WebGLBuffer || - result instanceof WebGLProgram || - result instanceof WebGLFramebuffer || - result instanceof WebGLRenderbuffer || - result instanceof WebGLTexture) { - {{{ makeSetValue('p', '0', '1', 'i8') }}}; // non-zero ID is always 1! - } else { - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetBooleanv: Unknown object returned from WebGL getParameter(' + name_ + ')!'); -#endif - return; - } - break; - default: - GL.recordError(0x0500/*GL_INVALID_ENUM*/); -#if GL_ASSERTIONS - Module.printErr('GL_INVALID_ENUM in glGetBooleanv: Native code calling glGetBooleanv(' + name_ + ') and it returns ' + result + ' of type ' + typeof(result) + '!'); -#endif - return; - } + return GL.get(name_, p, 'Boolean'); }, glGenTextures__sig: 'vii', @@ -1808,7 +1726,7 @@ var LibraryGL = { // Add some emulation workarounds Module.printErr('WARNING: using emscripten GL emulation. This is a collection of limited workarounds, do not expect it to work.'); -#if GL_UNSAFE_OPTS == 0 +#if GL_UNSAFE_OPTS == 1 Module.printErr('WARNING: using emscripten GL emulation unsafe opts. If weirdness happens, try -s GL_UNSAFE_OPTS=0'); #endif @@ -2149,7 +2067,10 @@ var LibraryGL = { } } #endif - GL.currProgram = program; + if (GL.currProgram != program) { + GL.currentRenderer = null; // This changes the FFP emulation shader program, need to recompute that. + GL.currProgram = program; + } glUseProgram(program); } @@ -2689,32 +2610,85 @@ var LibraryGL = { GL_SRC_ALPHA ]; - this.traverseState = function CTexEnv_traverseState(keyView) { - keyView.next(this.mode); - keyView.next(this.colorCombiner); - keyView.next(this.alphaCombiner); - keyView.next(this.colorCombiner); - keyView.next(this.alphaScale); - keyView.next(this.envColor[0]); - keyView.next(this.envColor[1]); - keyView.next(this.envColor[2]); - keyView.next(this.envColor[3]); - - keyView.next(this.colorSrc[0]); - keyView.next(this.colorSrc[1]); - keyView.next(this.colorSrc[2]); - - keyView.next(this.alphaSrc[0]); - keyView.next(this.alphaSrc[1]); - keyView.next(this.alphaSrc[2]); - - keyView.next(this.colorOp[0]); - keyView.next(this.colorOp[1]); - keyView.next(this.colorOp[2]); - - keyView.next(this.alphaOp[0]); - keyView.next(this.alphaOp[1]); - keyView.next(this.alphaOp[2]); + // Map GLenums to small values to efficiently pack the enums to bits for tighter access. + this.traverseKey = { + // mode + 0x1E01 /* GL_REPLACE */: 0, + 0x2100 /* GL_MODULATE */: 1, + 0x0104 /* GL_ADD */: 2, + 0x0BE2 /* GL_BLEND */: 3, + 0x2101 /* GL_DECAL */: 4, + 0x8570 /* GL_COMBINE */: 5, + + // additional color and alpha combiners + 0x84E7 /* GL_SUBTRACT */: 3, + 0x8575 /* GL_INTERPOLATE */: 4, + + // color and alpha src + 0x1702 /* GL_TEXTURE */: 0, + 0x8576 /* GL_CONSTANT */: 1, + 0x8577 /* GL_PRIMARY_COLOR */: 2, + 0x8578 /* GL_PREVIOUS */: 3, + + // color and alpha op + 0x0300 /* GL_SRC_COLOR */: 0, + 0x0301 /* GL_ONE_MINUS_SRC_COLOR */: 1, + 0x0302 /* GL_SRC_ALPHA */: 2, + 0x0300 /* GL_ONE_MINUS_SRC_ALPHA */: 3 + }; + + // The tuple (key0,key1,key2) uniquely identifies the state of the variables in CTexEnv. + // -1 on key0 denotes 'the whole cached key is dirty' + this.key0 = -1; + this.key1 = 0; + this.key2 = 0; + + this.computeKey0 = function() { + var k = this.traverseKey; + var key = k[this.mode] * 1638400; // 6 distinct values. + key += k[this.colorCombiner] * 327680; // 5 distinct values. + key += k[this.alphaCombiner] * 65536; // 5 distinct values. + // The above three fields have 6*5*5=150 distinct values -> 8 bits. + key += (this.colorScale-1) * 16384; // 10 bits used. + key += (this.alphaScale-1) * 4096; // 12 bits used. + key += k[this.colorSrc[0]] * 1024; // 14 + key += k[this.colorSrc[1]] * 256; // 16 + key += k[this.colorSrc[2]] * 64; // 18 + key += k[this.alphaSrc[0]] * 16; // 20 + key += k[this.alphaSrc[1]] * 4; // 22 + key += k[this.alphaSrc[2]]; // 24 bits used total. + return key; + } + this.computeKey1 = function() { + var k = this.traverseKey; + key = k[this.colorOp[0]] * 4096; + key += k[this.colorOp[1]] * 1024; + key += k[this.colorOp[2]] * 256; + key += k[this.alphaOp[0]] * 16; + key += k[this.alphaOp[1]] * 4; + key += k[this.alphaOp[2]]; + return key; + } + // TODO: remove this. The color should not be part of the key! + this.computeKey2 = function() { + return this.envColor[0] * 16777216 + this.envColor[1] * 65536 + this.envColor[2] * 256 + 1 + this.envColor[3]; + } + this.recomputeKey = function() { + this.key0 = this.computeKey0(); + this.key1 = this.computeKey1(); + this.key2 = this.computeKey2(); + } + this.invalidateKey = function() { + this.key0 = -1; // The key of this texture unit must be recomputed when rendering the next time. + GL.immediate.currentRenderer = null; // The currently used renderer must be re-evaluated at next render. + } + this.traverseState = function(keyView) { + if (this.key0 == -1) { + this.recomputeKey(); + } + keyView.next(this.key0); + keyView.next(this.key1); + keyView.next(this.key2); }; } @@ -3076,16 +3050,28 @@ var LibraryGL = { var cur = getCurTexUnit(); switch (cap) { case GL_TEXTURE_1D: - cur.enabled_tex1D = true; + if (!cur.enabled_tex1D) { + GL.immediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again. + cur.enabled_tex1D = true; + } break; case GL_TEXTURE_2D: - cur.enabled_tex2D = true; + if (!cur.enabled_tex2D) { + GL.immediate.currentRenderer = null; + cur.enabled_tex2D = true; + } break; case GL_TEXTURE_3D: - cur.enabled_tex3D = true; + if (!cur.enabled_tex3D) { + GL.immediate.currentRenderer = null; + cur.enabled_tex3D = true; + } break; case GL_TEXTURE_CUBE_MAP: - cur.enabled_texCube = true; + if (!cur.enabled_texCube) { + GL.immediate.currentRenderer = null; + cur.enabled_texCube = true; + } break; } }, @@ -3094,16 +3080,28 @@ var LibraryGL = { var cur = getCurTexUnit(); switch (cap) { case GL_TEXTURE_1D: - cur.enabled_tex1D = false; + if (cur.enabled_tex1D) { + GL.immediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again. + cur.enabled_tex1D = false; + } break; case GL_TEXTURE_2D: - cur.enabled_tex2D = false; + if (cur.enabled_tex2D) { + GL.immediate.currentRenderer = null; + cur.enabled_tex2D = false; + } break; case GL_TEXTURE_3D: - cur.enabled_tex3D = false; + if (cur.enabled_tex3D) { + GL.immediate.currentRenderer = null; + cur.enabled_tex3D = false; + } break; case GL_TEXTURE_CUBE_MAP: - cur.enabled_texCube = false; + if (cur.enabled_texCube) { + GL.immediate.currentRenderer = null; + cur.enabled_texCube = false; + } break; } }, @@ -3115,10 +3113,16 @@ var LibraryGL = { var env = getCurTexUnit().env; switch (pname) { case GL_RGB_SCALE: - env.colorScale = param; + if (env.colorScale != param) { + env.invalidateKey(); // We changed FFP emulation renderer state. + env.colorScale = param; + } break; case GL_ALPHA_SCALE: - env.alphaScale = param; + if (env.alphaScale != param) { + env.invalidateKey(); + env.alphaScale = param; + } break; default: @@ -3133,61 +3137,112 @@ var LibraryGL = { var env = getCurTexUnit().env; switch (pname) { case GL_TEXTURE_ENV_MODE: - env.mode = param; + if (env.mode != param) { + env.invalidateKey(); // We changed FFP emulation renderer state. + env.mode = param; + } break; case GL_COMBINE_RGB: - env.colorCombiner = param; + if (env.colorCombiner != param) { + env.invalidateKey(); + env.colorCombiner = param; + } break; case GL_COMBINE_ALPHA: - env.alphaCombiner = param; + if (env.alphaCombiner != param) { + env.invalidateKey(); + env.alphaCombiner = param; + } break; case GL_SRC0_RGB: - env.colorSrc[0] = param; + if (env.colorSrc[0] != param) { + env.invalidateKey(); + env.colorSrc[0] = param; + } break; case GL_SRC1_RGB: - env.colorSrc[1] = param; + if (env.colorSrc[1] != param) { + env.invalidateKey(); + env.colorSrc[1] = param; + } break; case GL_SRC2_RGB: - env.colorSrc[2] = param; + if (env.colorSrc[2] != param) { + env.invalidateKey(); + env.colorSrc[2] = param; + } break; case GL_SRC0_ALPHA: - env.alphaSrc[0] = param; + if (env.alphaSrc[0] != param) { + env.invalidateKey(); + env.alphaSrc[0] = param; + } break; case GL_SRC1_ALPHA: - env.alphaSrc[1] = param; + if (env.alphaSrc[1] != param) { + env.invalidateKey(); + env.alphaSrc[1] = param; + } break; case GL_SRC2_ALPHA: - env.alphaSrc[2] = param; + if (env.alphaSrc[2] != param) { + env.invalidateKey(); + env.alphaSrc[2] = param; + } break; case GL_OPERAND0_RGB: - env.colorOp[0] = param; + if (env.colorOp[0] != param) { + env.invalidateKey(); + env.colorOp[0] = param; + } break; case GL_OPERAND1_RGB: - env.colorOp[1] = param; + if (env.colorOp[1] != param) { + env.invalidateKey(); + env.colorOp[1] = param; + } break; case GL_OPERAND2_RGB: - env.colorOp[2] = param; + if (env.colorOp[2] != param) { + env.invalidateKey(); + env.colorOp[2] = param; + } break; case GL_OPERAND0_ALPHA: - env.alphaOp[0] = param; + if (env.alphaOp[0] != param) { + env.invalidateKey(); + env.alphaOp[0] = param; + } break; case GL_OPERAND1_ALPHA: - env.alphaOp[1] = param; + if (env.alphaOp[1] != param) { + env.invalidateKey(); + env.alphaOp[1] = param; + } break; case GL_OPERAND2_ALPHA: - env.alphaOp[2] = param; + if (env.alphaOp[2] != param) { + env.invalidateKey(); + env.alphaOp[2] = param; + } break; case GL_RGB_SCALE: - env.colorScale = param; + if (env.colorScale != param) { + env.invalidateKey(); + env.colorScale = param; + } break; case GL_ALPHA_SCALE: - env.alphaScale = param; + if (env.alphaScale != param) { + env.invalidateKey(); + env.alphaScale = param; + } break; default: @@ -3203,7 +3258,10 @@ var LibraryGL = { case GL_TEXTURE_ENV_COLOR: { for (var i = 0; i < 4; i++) { var param = {{{ makeGetValue('params', 'i*4', 'float') }}}; - env.envColor[i] = param; + if (env.envColor[i] != param) { + env.invalidateKey(); // We changed FFP emulation renderer state. + env.envColor[i] = param; + } } break } @@ -3243,26 +3301,21 @@ var LibraryGL = { NORMAL: 1, COLOR: 2, TEXTURE0: 3, - TEXTURE1: 4, - TEXTURE2: 5, - TEXTURE3: 6, - TEXTURE4: 7, - TEXTURE5: 8, - TEXTURE6: 9, - NUM_ATTRIBUTES: 10, // Overwritten in init(). - MAX_TEXTURES: 7, // Overwritten in init(). + NUM_ATTRIBUTES: -1, // Initialized in GL emulation init(). + MAX_TEXTURES: -1, // Initialized in GL emulation init(). totalEnabledClientAttributes: 0, enabledClientAttributes: [0, 0], clientAttributes: [], // raw data, including possible unneeded ones liveClientAttributes: [], // the ones actually alive in the current computation, sorted + currentRenderer: null, // Caches the currently active FFP emulation renderer, so that it does not have to be re-looked up unless relevant state changes. modifiedClientAttributes: false, clientActiveTexture: 0, clientColor: null, usedTexUnitList: [], fixedFunctionProgram: null, - setClientAttribute: function(name, size, type, stride, pointer) { + setClientAttribute: function setClientAttribute(name, size, type, stride, pointer) { var attrib = this.clientAttributes[name]; if (!attrib) { for (var i = 0; i <= name; i++) { // keep flat @@ -3289,7 +3342,7 @@ var LibraryGL = { }, // Renderers - addRendererComponent: function(name, size, type) { + addRendererComponent: function addRendererComponent(name, size, type) { if (!this.rendererComponents[name]) { this.rendererComponents[name] = 1; #if ASSERTIONS @@ -3305,13 +3358,18 @@ var LibraryGL = { } }, - disableBeginEndClientAttributes: function() { + disableBeginEndClientAttributes: function disableBeginEndClientAttributes() { for (var i = 0; i < this.NUM_ATTRIBUTES; i++) { if (this.rendererComponents[i]) this.enabledClientAttributes[i] = false; } }, - getRenderer: function() { + getRenderer: function getRenderer() { + // If no FFP state has changed that would have forced to re-evaluate which FFP emulation shader to use, + // we have the currently used renderer in cache, and can immediately return that. + if (this.currentRenderer) { + return this.currentRenderer; + } // return a renderer object given the liveClientAttributes // we maintain a cache of renderers, optimized to not generate garbage var attributes = GL.immediate.liveClientAttributes; @@ -3320,10 +3378,11 @@ var LibraryGL = { var keyView = cacheMap.getStaticKeyView().reset(); // By attrib state: + var enabledAttributesKey = 0; for (var i = 0; i < attributes.length; i++) { - var attribute = attributes[i]; - keyView.next(attribute.name).next(attribute.size).next(attribute.type); + enabledAttributesKey |= 1 << attributes[i].name; } + keyView.next(enabledAttributesKey); // By fog state: var fogParam = 0; @@ -3349,18 +3408,23 @@ var LibraryGL = { } // If we don't already have it, create it. - if (!keyView.get()) { + var renderer = keyView.get(); + if (!renderer) { #if GL_DEBUG Module.printErr('generating renderer for ' + JSON.stringify(attributes)); #endif - keyView.set(this.createRenderer()); + renderer = this.createRenderer(); + this.currentRenderer = renderer; + keyView.set(renderer); + return renderer; } - return keyView.get(); + this.currentRenderer = renderer; // Cache the currently used renderer, so later lookups without state changes can get this fast. + return renderer; }, - createRenderer: function(renderer) { + createRenderer: function createRenderer(renderer) { var useCurrProgram = !!GL.currProgram; - var hasTextures = false, textureSizes = [], textureTypes = []; + var hasTextures = false; for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { var texAttribName = GL.immediate.TEXTURE0 + i; if (!GL.immediate.enabledClientAttributes[texAttribName]) @@ -3374,24 +3438,11 @@ var LibraryGL = { } #endif - textureSizes[i] = GL.immediate.clientAttributes[texAttribName].size; - textureTypes[i] = GL.immediate.clientAttributes[texAttribName].type; hasTextures = true; } - var positionSize = GL.immediate.clientAttributes[GL.immediate.VERTEX].size; - var positionType = GL.immediate.clientAttributes[GL.immediate.VERTEX].type; - var colorSize = 0, colorType; - if (GL.immediate.enabledClientAttributes[GL.immediate.COLOR]) { - colorSize = GL.immediate.clientAttributes[GL.immediate.COLOR].size; - colorType = GL.immediate.clientAttributes[GL.immediate.COLOR].type; - } - var normalSize = 0, normalType; - if (GL.immediate.enabledClientAttributes[GL.immediate.NORMAL]) { - normalSize = GL.immediate.clientAttributes[GL.immediate.NORMAL].size; - normalType = GL.immediate.clientAttributes[GL.immediate.NORMAL].type; - } + var ret = { - init: function() { + init: function init() { // For fixed-function shader generation. var uTexUnitPrefix = 'u_texUnit'; var aTexCoordPrefix = 'a_texCoord'; @@ -3524,10 +3575,25 @@ var LibraryGL = { this.program = Module.ctx.createProgram(); Module.ctx.attachShader(this.program, this.vertexShader); Module.ctx.attachShader(this.program, this.fragmentShader); - Module.ctx.bindAttribLocation(this.program, 0, 'a_position'); + + // As optimization, bind all attributes to prespecified locations, so that the FFP emulation + // code can submit attributes to any generated FFP shader without having to examine each shader in turn. + // These prespecified locations are only assumed if GL_FFP_ONLY is specified, since user could also create their + // own shaders that didn't have attributes in the same locations. + Module.ctx.bindAttribLocation(this.program, GL.immediate.VERTEX, 'a_position'); + Module.ctx.bindAttribLocation(this.program, GL.immediate.COLOR, 'a_color'); + Module.ctx.bindAttribLocation(this.program, GL.immediate.NORMAL, 'a_normal'); + for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { + Module.ctx.bindAttribLocation(this.program, GL.immediate.TEXTURE0 + i, 'a_texCoord'+i); + Module.ctx.bindAttribLocation(this.program, GL.immediate.TEXTURE0 + i, aTexCoordPrefix+i); + } Module.ctx.linkProgram(this.program); } + // Stores a map that remembers which matrix uniforms are up-to-date in this FFP renderer, so they don't need to be resubmitted + // each time we render with this program. + this.textureMatrixVersion = {}; + this.positionLocation = Module.ctx.getAttribLocation(this.program, 'a_position'); this.texCoordLocations = []; @@ -3570,7 +3636,9 @@ var LibraryGL = { this.projectionLocation = Module.ctx.getUniformLocation(this.program, 'u_projection'); this.hasTextures = hasTextures; - this.hasNormal = normalSize > 0 && this.normalLocation >= 0; + this.hasNormal = GL.immediate.enabledClientAttributes[GL.immediate.NORMAL] && + GL.immediate.clientAttributes[GL.immediate.NORMAL].size > 0 && + this.normalLocation >= 0; this.hasColor = (this.colorLocation === 0) || this.colorLocation > 0; this.floatType = Module.ctx.FLOAT; // minor optimization @@ -3583,7 +3651,7 @@ var LibraryGL = { this.fogScaleLocation || this.fogDensityLocation); }, - prepare: function() { + prepare: function prepare() { // Calculate the array buffer var arrayBuffer; if (!GL.currArrayBuffer) { @@ -3598,10 +3666,10 @@ var LibraryGL = { arrayBuffer = GL.currArrayBuffer; } +#if GL_UNSAFE_OPTS // If the array buffer is unchanged and the renderer as well, then we can avoid all the work here // XXX We use some heuristics here, and this may not work in all cases. Try disabling GL_UNSAFE_OPTS if you // have odd glitches -#if GL_UNSAFE_OPTS var lastRenderer = GL.immediate.lastRenderer; var canSkip = this == lastRenderer && arrayBuffer == GL.immediate.lastArrayBuffer && @@ -3636,62 +3704,105 @@ var LibraryGL = { GL.immediate.fixedFunctionProgram = this.program; } - if (this.modelViewLocation) Module.ctx.uniformMatrix4fv(this.modelViewLocation, false, GL.immediate.matrix['m']); - if (this.projectionLocation) Module.ctx.uniformMatrix4fv(this.projectionLocation, false, GL.immediate.matrix['p']); + if (this.modelViewLocation && this.modelViewMatrixVersion != GL.immediate.matrixVersion['m']) { + this.modelViewMatrixVersion = GL.immediate.matrixVersion['m']; + Module.ctx.uniformMatrix4fv(this.modelViewLocation, false, GL.immediate.matrix['m']); + } + if (this.projectionLocation && this.projectionMatrixVersion != GL.immediate.matrixVersion['p']) { + this.projectionMatrixVersion = GL.immediate.matrixVersion['p']; + Module.ctx.uniformMatrix4fv(this.projectionLocation, false, GL.immediate.matrix['p']); + } var clientAttributes = GL.immediate.clientAttributes; + var posAttr = clientAttributes[GL.immediate.VERTEX]; #if GL_ASSERTIONS - GL.validateVertexAttribPointer(positionSize, positionType, GL.immediate.stride, clientAttributes[GL.immediate.VERTEX].offset); + GL.validateVertexAttribPointer(posAttr.size, posAttr.type, GL.immediate.stride, clientAttributes[GL.immediate.VERTEX].offset); #endif - Module.ctx.vertexAttribPointer(this.positionLocation, positionSize, positionType, false, - GL.immediate.stride, clientAttributes[GL.immediate.VERTEX].offset); + +#if GL_FFP_ONLY + if (!GL.currArrayBuffer) { + Module.ctx.vertexAttribPointer(GL.immediate.VERTEX, posAttr.size, posAttr.type, false, GL.immediate.stride, posAttr.offset); + GL.enableVertexAttribArray(GL.immediate.VERTEX); + if (this.hasNormal) { + var normalAttr = clientAttributes[GL.immediate.NORMAL]; + Module.ctx.vertexAttribPointer(GL.immediate.NORMAL, normalAttr.size, normalAttr.type, true, GL.immediate.stride, normalAttr.offset); + GL.enableVertexAttribArray(GL.immediate.NORMAL); + } + } +#else + Module.ctx.vertexAttribPointer(this.positionLocation, posAttr.size, posAttr.type, false, GL.immediate.stride, posAttr.offset); Module.ctx.enableVertexAttribArray(this.positionLocation); + if (this.hasNormal) { + var normalAttr = clientAttributes[GL.immediate.NORMAL]; +#if GL_ASSERTIONS + GL.validateVertexAttribPointer(normalAttr.size, normalAttr.type, GL.immediate.stride, normalAttr.offset); +#endif + Module.ctx.vertexAttribPointer(this.normalLocation, normalAttr.size, normalAttr.type, true, GL.immediate.stride, normalAttr.offset); + Module.ctx.enableVertexAttribArray(this.normalLocation); + } +#endif if (this.hasTextures) { - //for (var i = 0; i < this.usedTexUnitList.length; i++) { - // var texUnitID = this.usedTexUnitList[i]; for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { - var texUnitID = i; - var attribLoc = this.texCoordLocations[texUnitID]; +#if GL_FFP_ONLY + if (!GL.currArrayBuffer) { + var attribLoc = GL.immediate.TEXTURE0+i; + var texAttr = clientAttributes[attribLoc]; + if (texAttr.size) { + Module.ctx.vertexAttribPointer(attribLoc, texAttr.size, texAttr.type, false, GL.immediate.stride, texAttr.offset); + GL.enableVertexAttribArray(attribLoc); + } else { + // These two might be dangerous, but let's try them. + Module.ctx.vertexAttrib4f(attribLoc, 0, 0, 0, 1); + GL.disableVertexAttribArray(attribLoc); + } + } +#else + var attribLoc = this.texCoordLocations[i]; if (attribLoc === undefined || attribLoc < 0) continue; + var texAttr = clientAttributes[GL.immediate.TEXTURE0+i]; - if (texUnitID < textureSizes.length && textureSizes[texUnitID]) { + if (texAttr.size) { #if GL_ASSERTIONS - GL.validateVertexAttribPointer(textureSizes[texUnitID], textureTypes[texUnitID], GL.immediate.stride, GL.immediate.clientAttributes[GL.immediate.TEXTURE0 + texUnitID].offset); + GL.validateVertexAttribPointer(texAttr.size, texAttr.type, GL.immediate.stride, texAttr.offset); #endif - Module.ctx.vertexAttribPointer(attribLoc, textureSizes[texUnitID], textureTypes[texUnitID], false, - GL.immediate.stride, GL.immediate.clientAttributes[GL.immediate.TEXTURE0 + texUnitID].offset); + Module.ctx.vertexAttribPointer(attribLoc, texAttr.size, texAttr.type, false, GL.immediate.stride, texAttr.offset); Module.ctx.enableVertexAttribArray(attribLoc); } else { // These two might be dangerous, but let's try them. Module.ctx.vertexAttrib4f(attribLoc, 0, 0, 0, 1); Module.ctx.disableVertexAttribArray(attribLoc); } - } - for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { - if (this.textureMatrixLocations[i]) { // XXX might we need this even without the condition we are currently in? - Module.ctx.uniformMatrix4fv(this.textureMatrixLocations[i], false, GL.immediate.matrix['t' + i]); +#endif + var t = 't'+i; + if (this.textureMatrixLocations[i] && this.textureMatrixVersion[t] != GL.immediate.matrixVersion[t]) { // XXX might we need this even without the condition we are currently in? + this.textureMatrixVersion[t] = GL.immediate.matrixVersion[t]; + Module.ctx.uniformMatrix4fv(this.textureMatrixLocations[i], false, GL.immediate.matrix[t]); } } } - if (colorSize) { + if (GL.immediate.enabledClientAttributes[GL.immediate.COLOR]) { + var colorAttr = clientAttributes[GL.immediate.COLOR]; #if GL_ASSERTIONS - GL.validateVertexAttribPointer(colorSize, colorType, GL.immediate.stride, clientAttributes[GL.immediate.COLOR].offset); + GL.validateVertexAttribPointer(colorAttr.size, colorAttr.type, GL.immediate.stride, colorAttr.offset); #endif - Module.ctx.vertexAttribPointer(this.colorLocation, colorSize, colorType, true, - GL.immediate.stride, clientAttributes[GL.immediate.COLOR].offset); +#if GL_FFP_ONLY + if (!GL.currArrayBuffer) { + Module.ctx.vertexAttribPointer(GL.immediate.COLOR, colorAttr.size, colorAttr.type, true, GL.immediate.stride, colorAttr.offset); + GL.enableVertexAttribArray(GL.immediate.COLOR); + } +#else + Module.ctx.vertexAttribPointer(this.colorLocation, colorAttr.size, colorAttr.type, true, GL.immediate.stride, colorAttr.offset); Module.ctx.enableVertexAttribArray(this.colorLocation); +#endif } else if (this.hasColor) { +#if GL_FFP_ONLY + GL.disableVertexAttribArray(GL.immediate.COLOR); + Module.ctx.vertexAttrib4fv(GL.immediate.COLOR, GL.immediate.clientColor); +#else Module.ctx.disableVertexAttribArray(this.colorLocation); Module.ctx.vertexAttrib4fv(this.colorLocation, GL.immediate.clientColor); - } - if (this.hasNormal) { -#if GL_ASSERTIONS - GL.validateVertexAttribPointer(normalSize, normalType, GL.immediate.stride, clientAttributes[GL.immediate.NORMAL].offset); #endif - Module.ctx.vertexAttribPointer(this.normalLocation, normalSize, normalType, true, - GL.immediate.stride, clientAttributes[GL.immediate.NORMAL].offset); - Module.ctx.enableVertexAttribArray(this.normalLocation); } if (this.hasFog) { if (this.fogColorLocation) Module.ctx.uniform4fv(this.fogColorLocation, GLEmulation.fogColor); @@ -3701,11 +3812,12 @@ var LibraryGL = { } }, - cleanup: function() { + cleanup: function cleanup() { +#if !GL_FFP_ONLY Module.ctx.disableVertexAttribArray(this.positionLocation); if (this.hasTextures) { - for (var i = 0; i < textureSizes.length; i++) { - if (textureSizes[i] && this.texCoordLocations[i] >= 0) { + for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { + if (GL.immediate.enabledClientAttributes[GL.immediate.TEXTURE0+i] && this.texCoordLocations[i] >= 0) { Module.ctx.disableVertexAttribArray(this.texCoordLocations[i]); } } @@ -3729,6 +3841,7 @@ var LibraryGL = { GL.immediate.lastProgram = null; #endif GL.immediate.matricesModified = true; +#endif } }; ret.init(); @@ -3858,11 +3971,15 @@ var LibraryGL = { this.TexEnvJIT.init(Module.ctx); - GL.immediate.MAX_TEXTURES = Module.ctx.getParameter(Module.ctx.MAX_TEXTURE_IMAGE_UNITS); - GL.immediate.NUM_ATTRIBUTES = GL.immediate.TEXTURE0 + GL.immediate.MAX_TEXTURES; + // User can override the maximum number of texture units that we emulate. Using fewer texture units increases runtime performance + // slightly, so it is advantageous to choose as small value as needed. + GL.immediate.MAX_TEXTURES = Module['GL_MAX_TEXTURE_IMAGE_UNITS'] || Module.ctx.getParameter(Module.ctx.MAX_TEXTURE_IMAGE_UNITS); + GL.immediate.NUM_ATTRIBUTES = 3 /*pos+normal+color attributes*/ + GL.immediate.MAX_TEXTURES; GL.immediate.clientAttributes = []; + GLEmulation.enabledClientAttribIndices = []; for (var i = 0; i < GL.immediate.NUM_ATTRIBUTES; i++) { GL.immediate.clientAttributes.push({}); + GLEmulation.enabledClientAttribIndices.push(false); } this.matrixStack['m'] = []; @@ -3872,13 +3989,18 @@ var LibraryGL = { } // Initialize matrix library - + // When user sets a matrix, increment a 'version number' on the new data, and when rendering, submit + // the matrices to the shader program only if they have an old version of the data. + GL.immediate.matrixVersion = {}; GL.immediate.matrix['m'] = GL.immediate.matrix.lib.mat4.create(); + GL.immediate.matrixVersion['m'] = 0; GL.immediate.matrix.lib.mat4.identity(GL.immediate.matrix['m']); GL.immediate.matrix['p'] = GL.immediate.matrix.lib.mat4.create(); + GL.immediate.matrixVersion['p'] = 0; GL.immediate.matrix.lib.mat4.identity(GL.immediate.matrix['p']); for (var i = 0; i < GL.immediate.MAX_TEXTURES; i++) { GL.immediate.matrix['t' + i] = GL.immediate.matrix.lib.mat4.create(); + GL.immediate.matrixVersion['t' + i] = 0; } // Renderer cache @@ -3899,7 +4021,7 @@ var LibraryGL = { // Modifies liveClientAttributes, stride, vertexPointer, vertexCounter // count: number of elements we will draw // beginEnd: whether we are drawing the results of a begin/end block - prepareClientAttributes: function(count, beginEnd) { + prepareClientAttributes: function prepareClientAttributes(count, beginEnd) { // If no client attributes were modified since we were last called, do nothing. Note that this // does not work for glBegin/End, where we generate renderer components dynamically and then // disable them ourselves, but it does help with glDrawElements/Arrays. @@ -3997,7 +4119,7 @@ var LibraryGL = { } }, - flush: function(numProvidedIndexes, startIndex, ptr) { + flush: function flush(numProvidedIndexes, startIndex, ptr) { #if ASSERTIONS assert(numProvidedIndexes >= 0 || !numProvidedIndexes); #endif @@ -4070,7 +4192,7 @@ var LibraryGL = { Module.ctx.bindBuffer(Module.ctx.ELEMENT_ARRAY_BUFFER, GL.buffers[GL.currElementArrayBuffer] || null); } -#if GL_UNSAFE_OPTS == 0 +#if GL_UNSAFE_OPTS == 0 && !GL_FFP_ONLY renderer.cleanup(); #endif } @@ -4237,7 +4359,7 @@ var LibraryGL = { glColor4ubv__deps: ['glColor4ub'], glColor4ubv: function(p) { _glColor4ub({{{ makeGetValue('p', '0', 'i8') }}}, {{{ makeGetValue('p', '1', 'i8') }}}, {{{ makeGetValue('p', '2', 'i8') }}}, {{{ makeGetValue('p', '3', 'i8') }}}); - }, + }, glFogf: function(pname, param) { // partial support, TODO switch(pname) { @@ -4318,10 +4440,12 @@ var LibraryGL = { if (disable && GL.immediate.enabledClientAttributes[attrib]) { GL.immediate.enabledClientAttributes[attrib] = false; GL.immediate.totalEnabledClientAttributes--; + this.currentRenderer = null; // Will need to change current renderer, since the set of active vertex pointers changed. if (GLEmulation.currentVao) delete GLEmulation.currentVao.enabledClientStates[cap]; } else if (!disable && !GL.immediate.enabledClientAttributes[attrib]) { GL.immediate.enabledClientAttributes[attrib] = true; GL.immediate.totalEnabledClientAttributes++; + this.currentRenderer = null; // Will need to change current renderer, since the set of active vertex pointers changed. if (GLEmulation.currentVao) GLEmulation.currentVao.enabledClientStates[cap] = 1; } GL.immediate.modifiedClientAttributes = true; @@ -4333,15 +4457,40 @@ var LibraryGL = { glVertexPointer__deps: ['$GLEmulation'], // if any pointers are used, glVertexPointer must be, and if it is, then we need emulation glVertexPointer: function(size, type, stride, pointer) { GL.immediate.setClientAttribute(GL.immediate.VERTEX, size, type, stride, pointer); +#if GL_FFP_ONLY + if (GL.currArrayBuffer) { + Module.ctx.vertexAttribPointer(GL.immediate.VERTEX, size, type, false, stride, pointer); + GL.enableVertexAttribArray(GL.immediate.VERTEX); + } +#endif }, glTexCoordPointer: function(size, type, stride, pointer) { GL.immediate.setClientAttribute(GL.immediate.TEXTURE0 + GL.immediate.clientActiveTexture, size, type, stride, pointer); +#if GL_FFP_ONLY + if (GL.currArrayBuffer) { + var loc = GL.immediate.TEXTURE0 + GL.immediate.clientActiveTexture; + Module.ctx.vertexAttribPointer(loc, size, type, false, stride, pointer); + GL.enableVertexAttribArray(loc); + } +#endif }, glNormalPointer: function(type, stride, pointer) { GL.immediate.setClientAttribute(GL.immediate.NORMAL, 3, type, stride, pointer); +#if GL_FFP_ONLY + if (GL.currArrayBuffer) { + Module.ctx.vertexAttribPointer(GL.immediate.NORMAL, size, type, true, stride, pointer); + GL.enableVertexAttribArray(GL.immediate.NORMAL); + } +#endif }, glColorPointer: function(size, type, stride, pointer) { GL.immediate.setClientAttribute(GL.immediate.COLOR, size, type, stride, pointer); +#if GL_FFP_ONLY + if (GL.currArrayBuffer) { + Module.ctx.vertexAttribPointer(GL.immediate.COLOR, size, type, true, stride, pointer); + GL.enableVertexAttribArray(GL.immediate.COLOR); + } +#endif }, glClientActiveTexture__sig: 'vi', @@ -4424,23 +4573,27 @@ var LibraryGL = { glPushMatrix: function() { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrixStack[GL.immediate.currentMatrix].push( Array.prototype.slice.call(GL.immediate.matrix[GL.immediate.currentMatrix])); }, glPopMatrix: function() { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix[GL.immediate.currentMatrix] = GL.immediate.matrixStack[GL.immediate.currentMatrix].pop(); }, glLoadIdentity__deps: ['$GL', '$GLImmediateSetup'], glLoadIdentity: function() { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.identity(GL.immediate.matrix[GL.immediate.currentMatrix]); }, glLoadMatrixd: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F64', 'matrix', 'matrix+' + (16*8)) }}}, GL.immediate.matrix[GL.immediate.currentMatrix]); }, @@ -4449,35 +4602,41 @@ var LibraryGL = { if (GL.debug) Module.printErr('glLoadMatrixf receiving: ' + Array.prototype.slice.call(HEAPF32.subarray(matrix >> 2, (matrix >> 2) + 16))); #endif GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F32', 'matrix', 'matrix+' + (16*4)) }}}, GL.immediate.matrix[GL.immediate.currentMatrix]); }, glLoadTransposeMatrixd: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F64', 'matrix', 'matrix+' + (16*8)) }}}, GL.immediate.matrix[GL.immediate.currentMatrix]); GL.immediate.matrix.lib.mat4.transpose(GL.immediate.matrix[GL.immediate.currentMatrix]); }, glLoadTransposeMatrixf: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F32', 'matrix', 'matrix+' + (16*4)) }}}, GL.immediate.matrix[GL.immediate.currentMatrix]); GL.immediate.matrix.lib.mat4.transpose(GL.immediate.matrix[GL.immediate.currentMatrix]); }, glMultMatrixd: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.multiply(GL.immediate.matrix[GL.immediate.currentMatrix], {{{ makeHEAPView('F64', 'matrix', 'matrix+' + (16*8)) }}}); }, glMultMatrixf: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.multiply(GL.immediate.matrix[GL.immediate.currentMatrix], {{{ makeHEAPView('F32', 'matrix', 'matrix+' + (16*4)) }}}); }, glMultTransposeMatrixd: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; var colMajor = GL.immediate.matrix.lib.mat4.create(); GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F64', 'matrix', 'matrix+' + (16*8)) }}}, colMajor); GL.immediate.matrix.lib.mat4.transpose(colMajor); @@ -4486,6 +4645,7 @@ var LibraryGL = { glMultTransposeMatrixf: function(matrix) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; var colMajor = GL.immediate.matrix.lib.mat4.create(); GL.immediate.matrix.lib.mat4.set({{{ makeHEAPView('F32', 'matrix', 'matrix+' + (16*4)) }}}, colMajor); GL.immediate.matrix.lib.mat4.transpose(colMajor); @@ -4494,6 +4654,7 @@ var LibraryGL = { glFrustum: function(left, right, bottom, top_, nearVal, farVal) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.multiply(GL.immediate.matrix[GL.immediate.currentMatrix], GL.immediate.matrix.lib.mat4.frustum(left, right, bottom, top_, nearVal, farVal)); }, @@ -4501,6 +4662,7 @@ var LibraryGL = { glOrtho: function(left, right, bottom, top_, nearVal, farVal) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.multiply(GL.immediate.matrix[GL.immediate.currentMatrix], GL.immediate.matrix.lib.mat4.ortho(left, right, bottom, top_, nearVal, farVal)); }, @@ -4508,18 +4670,21 @@ var LibraryGL = { glScaled: function(x, y, z) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.scale(GL.immediate.matrix[GL.immediate.currentMatrix], [x, y, z]); }, glScalef: 'glScaled', glTranslated: function(x, y, z) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.translate(GL.immediate.matrix[GL.immediate.currentMatrix], [x, y, z]); }, glTranslatef: 'glTranslated', glRotated: function(angle, x, y, z) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.rotate(GL.immediate.matrix[GL.immediate.currentMatrix], angle*Math.PI/180, [x, y, z]); }, glRotatef: 'glRotated', @@ -4602,6 +4767,7 @@ var LibraryGL = { gluPerspective: function(fov, aspect, near, far) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix[GL.immediate.currentMatrix] = GL.immediate.matrix.lib.mat4.perspective(fov, aspect, near, far, GL.immediate.matrix[GL.immediate.currentMatrix]); @@ -4609,6 +4775,7 @@ var LibraryGL = { gluLookAt: function(ex, ey, ez, cx, cy, cz, ux, uy, uz) { GL.immediate.matricesModified = true; + GL.immediate.matrixVersion[GL.immediate.currentMatrix] = (GL.immediate.matrixVersion[GL.immediate.currentMatrix] + 1)|0; GL.immediate.matrix.lib.mat4.lookAt(GL.immediate.matrix[GL.immediate.currentMatrix], [ex, ey, ez], [cx, cy, cz], [ux, uy, uz]); }, @@ -4817,14 +4984,14 @@ var LibraryGL = { glClearColor__sig: 'viiii', glIsEnabled__sig: 'ii', glFrontFace__sig: 'vi', - glSampleCoverage__sig: 'vi', + glSampleCoverage__sig: 'vii', }; // Simple pass-through functions. Starred ones have return values. [X] ones have X in the C name but not in the JS name [[0, 'finish flush'], - [1, 'clearDepth clearDepth[f] depthFunc enable disable frontFace cullFace clear lineWidth clearStencil depthMask stencilMask checkFramebufferStatus* generateMipmap activeTexture blendEquation sampleCoverage isEnabled*'], - [2, 'blendFunc blendEquationSeparate depthRange depthRange[f] stencilMaskSeparate hint polygonOffset vertexAttrib1f'], + [1, 'clearDepth clearDepth[f] depthFunc enable disable frontFace cullFace clear lineWidth clearStencil depthMask stencilMask checkFramebufferStatus* generateMipmap activeTexture blendEquation isEnabled*'], + [2, 'blendFunc blendEquationSeparate depthRange depthRange[f] stencilMaskSeparate hint polygonOffset vertexAttrib1f sampleCoverage'], [3, 'texParameteri texParameterf vertexAttrib2f stencilFunc stencilOp'], [4, 'viewport clearColor scissor vertexAttrib3f colorMask renderbufferStorage blendFuncSeparate blendColor stencilFuncSeparate stencilOpSeparate'], [5, 'vertexAttrib4f'], diff --git a/src/library_sdl.js b/src/library_sdl.js index eb8eea97..40e5e3ab 100644 --- a/src/library_sdl.js +++ b/src/library_sdl.js @@ -715,7 +715,7 @@ var LibrarySDL = { // Joystick helper methods and state - joystickEventState: 0, + joystickEventState: 1, // SDL_ENABLE lastJoystickState: {}, // Map from SDL_Joystick* to their last known state. Required to determine if a change has occurred. // Maps Joystick names to pointers. Allows us to avoid reallocating memory for // joystick names each time this function is called. @@ -1247,6 +1247,11 @@ var LibrarySDL = { return 0; }, + SDL_LowerBlit__deps: ['SDL_UpperBlit'], + SDL_LowerBlit: function(src, srcrect, dst, dstrect) { + return _SDL_UpperBlit(src, srcrect, dst, dstrect); + }, + SDL_FillRect: function(surf, rect, color) { var surfData = SDL.surfaces[surf]; assert(!surfData.locked); // but we could unlock and re-lock if we must.. @@ -1910,23 +1915,19 @@ var LibrarySDL = { var filename = ''; var audio; var bytes; - + if (rwops.filename !== undefined) { filename = PATH.resolve(rwops.filename); var raw = Module["preloadedAudios"][filename]; if (!raw) { if (raw === null) Module.printErr('Trying to reuse preloaded audio, but freePreloadedMediaOnUse is set!'); Runtime.warnOnce('Cannot find preloaded audio ' + filename); - + // see if we can read the file-contents from the in-memory FS - var fileObject = FS.findObject(filename); - - if (fileObject === null) Module.printErr('Couldn\'t find file for: ' + filename); - - // We found the file. Load the contents - if (fileObject && !fileObject.isFolder && fileObject.read) { - bytes = fileObject.contents; - } else { + try { + bytes = FS.readFile(filename); + } catch (e) { + Module.printErr('Couldn\'t find file for: ' + filename); return 0; } } @@ -1941,16 +1942,16 @@ var LibrarySDL = { else { return 0; } - + // Here, we didn't find a preloaded audio but we either were passed a filepath for // which we loaded bytes, or we were passed some bytes if (audio === undefined && bytes) { - var blob = new Blob([new Uint8Array(bytes)], {type: rwops.mimetype}); + var blob = new Blob([bytes], {type: rwops.mimetype}); var url = URL.createObjectURL(blob); audio = new Audio(); audio.src = url; } - + var id = SDL.audios.length; // Keep the loaded audio in the audio arrays, ready for playback SDL.audios.push({ diff --git a/src/library_sockfs.js b/src/library_sockfs.js index bc3aa997..2028d841 100644 --- a/src/library_sockfs.js +++ b/src/library_sockfs.js @@ -1,6 +1,6 @@ mergeInto(LibraryManager.library, { $SOCKFS__postset: '__ATINIT__.push({ func: function() { SOCKFS.root = FS.mount(SOCKFS, {}, null); } });', - $SOCKFS__deps: ['$FS'], + $SOCKFS__deps: ['$FS', 'mkport'], $SOCKFS: { mount: function(mount) { return FS.createNode(null, '/', {{{ cDefine('S_IFDIR') }}} | 0777, 0); diff --git a/src/modules.js b/src/modules.js index 5d48ede2..e80115c4 100644 --- a/src/modules.js +++ b/src/modules.js @@ -483,6 +483,11 @@ var PassManager = { print('\n//FORWARDED_DATA:' + JSON.stringify({ Functions: { tables: Functions.tables } })); + } else if (phase == 'glue') { + print('\n//FORWARDED_DATA:' + JSON.stringify({ + Functions: Functions, + EXPORTED_FUNCTIONS: EXPORTED_FUNCTIONS + })); } }, load: function(json) { @@ -496,6 +501,7 @@ var PassManager = { for (var i in data.Functions) { Functions[i] = data.Functions[i]; } + EXPORTED_FUNCTIONS = data.EXPORTED_FUNCTIONS; /* print('\n//LOADED_DATA:' + phase + ':' + JSON.stringify({ Types: Types, diff --git a/src/parseTools.js b/src/parseTools.js index 08cf9b60..ff981264 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -362,7 +362,7 @@ function getVectorNativeType(type) { function getSIMDName(type) { switch (type) { - case 'i32': return 'uint'; + case 'i32': return 'int'; case 'float': return 'float'; default: throw 'getSIMDName ' + type; } @@ -603,10 +603,11 @@ function parseLLVMSegment(segment) { type = segment[0].text; if (type[type.length-1] === '>' && segment[1].text[0] === '<') { // vector literal + var nativeType = getVectorNativeType(type); return { intertype: 'vector', idents: splitTokenList(segment[1].tokens).map(function(pair) { - return pair[1].text; + return parseNumerical(pair[1].text, nativeType); }), type: type }; @@ -1453,7 +1454,7 @@ function makeSetValues(ptr, pos, value, type, num, align) { // If we don't know how to handle this at compile-time, or handling it is best done in a large amount of code, call memset // TODO: optimize the case of numeric num but non-numeric value if (!isNumber(num) || !isNumber(value) || (parseInt(num)/align >= UNROLL_LOOP_MAX)) { - return '_memset(' + asmCoercion(getFastValue(ptr, '+', pos), 'i32') + ', ' + asmCoercion(value, 'i32') + ', ' + asmCoercion(num, 'i32') + ')'; + return '_memset(' + asmCoercion(getFastValue(ptr, '+', pos), 'i32') + ', ' + asmCoercion(value, 'i32') + ', ' + asmCoercion(num, 'i32') + ')|0'; } num = parseInt(num); value = parseInt(value); @@ -2371,29 +2372,28 @@ function processMathop(item) { // vector/SIMD operation Types.usesSIMD = true; switch (op) { - case 'fadd': return 'SIMD.add(' + idents[0] + ',' + idents[1] + ')'; - case 'fsub': return 'SIMD.sub(' + idents[0] + ',' + idents[1] + ')'; - case 'fmul': return 'SIMD.mul(' + idents[0] + ',' + idents[1] + ')'; - case 'fdiv': return 'SIMD.div(' + idents[0] + ',' + idents[1] + ')'; - case 'add' : return 'SIMD.addu32(' + idents[0] + ',' + idents[1] + ')'; - case 'sub' : return 'SIMD.subu32(' + idents[0] + ',' + idents[1] + ')'; - case 'mul' : return 'SIMD.mulu32(' + idents[0] + ',' + idents[1] + ')'; - case 'udiv': return 'SIMD.divu32(' + idents[0] + ',' + idents[1] + ')'; + case 'fadd': return 'SIMD.float32x4.add(' + idents[0] + ',' + idents[1] + ')'; + case 'fsub': return 'SIMD.float32x4.sub(' + idents[0] + ',' + idents[1] + ')'; + case 'fmul': return 'SIMD.float32x4.mul(' + idents[0] + ',' + idents[1] + ')'; + case 'fdiv': return 'SIMD.float32x4.div(' + idents[0] + ',' + idents[1] + ')'; + case 'add' : return 'SIMD.int32x4.add(' + idents[0] + ',' + idents[1] + ')'; + case 'sub' : return 'SIMD.int32x4.sub(' + idents[0] + ',' + idents[1] + ')'; + case 'mul' : return 'SIMD.int32x4.mul(' + idents[0] + ',' + idents[1] + ')'; case 'bitcast': { var inType = item.params[0].type; var outType = item.type; if (inType === '<4 x float>') { assert(outType === '<4 x i32>'); - return 'SIMD.float32x4BitsToUint32x4(' + idents[0] + ')'; + return 'SIMD.float32x4.bitsToInt32x4(' + idents[0] + ')'; } else { assert(inType === '<4 x i32>'); assert(outType === '<4 x float>'); - return 'SIMD.uint32x4BitsToFloat32x4(' + idents[0] + ')'; + return 'SIMD.int32x4.bitsToFloat32x4(' + idents[0] + ')'; } } - case 'and': return 'SIMD.and(' + idents[0] + ',' + idents[1] + ')'; - case 'or': return 'SIMD.or(' + idents[0] + ',' + idents[1] + ')'; - case 'xor': return 'SIMD.xor(' + idents[0] + ',' + idents[1] + ')'; + case 'and': return 'SIMD.int32x4.and(' + idents[0] + ',' + idents[1] + ')'; + case 'or': return 'SIMD.int32x4.or(' + idents[0] + ',' + idents[1] + ')'; + case 'xor': return 'SIMD.int32x4.xor(' + idents[0] + ',' + idents[1] + ')'; default: throw 'vector op todo: ' + dump(item); } } @@ -2697,7 +2697,7 @@ var simdLane = ['x', 'y', 'z', 'w']; function ensureVector(ident, base) { Types.usesSIMD = true; - return ident == 0 ? base + '32x4.zero()' : ident; + return ident == 0 ? base + '32x4.splat(0)' : ident; } function ensureValidFFIType(type) { diff --git a/src/preamble.js b/src/preamble.js index ff9200fc..710b7c52 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -585,16 +585,16 @@ function UTF16ToString(ptr) { } Module['UTF16ToString'] = UTF16ToString; -// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', +// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', // null-terminated and encoded in UTF16LE form. The copy will require at most (str.length*2+1)*2 bytes of space in the HEAP. function stringToUTF16(str, outPtr) { for(var i = 0; i < str.length; ++i) { // charCodeAt returns a UTF-16 encoded code unit, so it can be directly written to the HEAP. var codeUnit = str.charCodeAt(i); // possibly a lead surrogate - {{{ makeSetValue('outPtr', 'i*2', 'codeUnit', 'i16') }}} + {{{ makeSetValue('outPtr', 'i*2', 'codeUnit', 'i16') }}}; } // Null-terminate the pointer to the HEAP. - {{{ makeSetValue('outPtr', 'str.length*2', 0, 'i16') }}} + {{{ makeSetValue('outPtr', 'str.length*2', 0, 'i16') }}}; } Module['stringToUTF16'] = stringToUTF16; @@ -620,7 +620,7 @@ function UTF32ToString(ptr) { } Module['UTF32ToString'] = UTF32ToString; -// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', +// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', // null-terminated and encoded in UTF32LE form. The copy will require at most (str.length+1)*4 bytes of space in the HEAP, // but can use less, since str.length does not return the number of characters in the string, but the number of UTF-16 code units in the string. function stringToUTF32(str, outPtr) { @@ -632,11 +632,11 @@ function stringToUTF32(str, outPtr) { var trailSurrogate = str.charCodeAt(++iCodeUnit); codeUnit = 0x10000 + ((codeUnit & 0x3FF) << 10) | (trailSurrogate & 0x3FF); } - {{{ makeSetValue('outPtr', 'iChar*4', 'codeUnit', 'i32') }}} + {{{ makeSetValue('outPtr', 'iChar*4', 'codeUnit', 'i32') }}}; ++iChar; } // Null-terminate the pointer to the HEAP. - {{{ makeSetValue('outPtr', 'iChar*4', 0, 'i32') }}} + {{{ makeSetValue('outPtr', 'iChar*4', 0, 'i32') }}}; } Module['stringToUTF32'] = stringToUTF32; @@ -1043,7 +1043,7 @@ function writeStringToMemory(string, buffer, dontAddNull) { var i = 0; while (i < array.length) { var chr = array[i]; - {{{ makeSetValue('buffer', 'i', 'chr', 'i8') }}} + {{{ makeSetValue('buffer', 'i', 'chr', 'i8') }}}; i = i + 1; } } @@ -1061,9 +1061,9 @@ function writeAsciiToMemory(str, buffer, dontAddNull) { #if ASSERTIONS assert(str.charCodeAt(i) === str.charCodeAt(i)&0xff); #endif - {{{ makeSetValue('buffer', 'i', 'str.charCodeAt(i)', 'i8') }}} + {{{ makeSetValue('buffer', 'i', 'str.charCodeAt(i)', 'i8') }}}; } - if (!dontAddNull) {{{ makeSetValue('buffer', 'str.length', 0, 'i8') }}} + if (!dontAddNull) {{{ makeSetValue('buffer', 'str.length', 0, 'i8') }}}; } Module['writeAsciiToMemory'] = writeAsciiToMemory; diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp index 0b4284bc..d2a48f63 100644 --- a/src/relooper/Relooper.cpp +++ b/src/relooper/Relooper.cpp @@ -6,7 +6,12 @@ #include <list> #include <stack> +#if EMSCRIPTEN #include "ministring.h" +#else +#include <string> +typedef std::string ministring; +#endif template <class T, class U> bool contains(const T& container, const U& contained) { return container.find(contained) != container.end(); @@ -66,11 +71,7 @@ static int AsmJS = 0; // Indenter -#if EMSCRIPTEN int Indenter::CurrIndent = 1; -#else -int Indenter::CurrIndent = 0; -#endif // Branch diff --git a/src/runtime.js b/src/runtime.js index dedaf5ea..8ba5d08d 100644 --- a/src/runtime.js +++ b/src/runtime.js @@ -185,8 +185,11 @@ var Runtime = { // type can be a native type or a struct (or null, for structs we only look at size here) getAlignSize: function(type, size, vararg) { // we align i64s and doubles on 64-bit boundaries, unlike x86 +#if TARGET_LE32 == 1 + if (vararg) return 8; +#endif #if TARGET_LE32 - if (type == 'i64' || type == 'double' || vararg) return 8; + if (!vararg && (type == 'i64' || type == 'double')) return 8; if (!type) return Math.min(size, 8); // align structures internally to 64 bits #endif return Math.min(size || (type ? Runtime.getNativeFieldSize(type) : 0), Runtime.QUANTUM_SIZE); diff --git a/src/settings.js b/src/settings.js index bc665973..753e2367 100644 --- a/src/settings.js +++ b/src/settings.js @@ -23,7 +23,8 @@ var QUANTUM_SIZE = 4; // This is the size of an individual field in a structure. // Changing this from the default of 4 is deprecated. var TARGET_X86 = 0; // For i386-pc-linux-gnu -var TARGET_LE32 = 1; // For le32-unknown-nacl +var TARGET_LE32 = 1; // For le32-unknown-nacl. 1 is normal, 2 is for the fastcomp llvm + // backend using pnacl abi simplification var CORRECT_SIGNS = 1; // Whether we make sure to convert unsigned values to signed values. // Decreases performance with additional runtime checks. Might not be @@ -223,6 +224,10 @@ var GL_UNSAFE_OPTS = 1; // Enables some potentially-unsafe optimizations in GL e var FULL_ES2 = 0; // Forces support for all GLES2 features, not just the WebGL-friendly subset. var LEGACY_GL_EMULATION = 0; // Includes code to emulate various desktop GL features. Incomplete but useful // in some cases, see https://github.com/kripken/emscripten/wiki/OpenGL-support +var GL_FFP_ONLY = 0; // If you specified LEGACY_GL_EMULATION = 1 and only use fixed function pipeline in your code, + // you can also set this to 1 to signal the GL emulation layer that it can perform extra + // optimizations by knowing that the user code does not use shaders at all. If + // LEGACY_GL_EMULATION = 0, this setting has no effect. var STB_IMAGE = 0; // Enables building of stb-image, a tiny public-domain library for decoding images, allowing // decoding of images without using the browser's built-in decoders. The benefit is that this diff --git a/src/shell.html b/src/shell.html index 53a4fffb..efb9e91d 100644 --- a/src/shell.html +++ b/src/shell.html @@ -50,7 +50,7 @@ //text = text.replace(/>/g, ">"); //text = text.replace('\n', '<br>', 'g'); element.value += text + "\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })(), printErr: function(text) { diff --git a/src/shell.js b/src/shell.js index b68e16d9..b41fbb51 100644 --- a/src/shell.js +++ b/src/shell.js @@ -91,6 +91,8 @@ else if (ENVIRONMENT_IS_SHELL) { } this['{{{ EXPORT_NAME }}}'] = Module; + + eval("if (typeof gc === 'function' && gc.toString().indexOf('[native code]') > 0) var gc = undefined"); // wipe out the SpiderMonkey shell 'gc' function, which can confuse closure (uses it as a minified name, and it is then initted to a non-falsey value unexpectedly) } else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { Module['read'] = function read(url) { diff --git a/src/simd.js b/src/simd.js index bbb12d0a..c7f5ff48 100644 --- a/src/simd.js +++ b/src/simd.js @@ -20,8 +20,10 @@ https://github.com/johnmccutchan/ecmascript_simd/blob/master/src/ecmascript_simd.js */ +"use strict"; + /** - * Construct a new instance of a float32x4 number. + * Construct a new instance of float32x4 number. * @param {double} value used for x lane. * @param {double} value used for y lane. * @param {double} value used for z lane. @@ -40,7 +42,7 @@ function float32x4(x, y, z, w) { } /** - * Construct a new instance of a float32x4 number with 0.0 in all lanes. + * Construct a new instance of float32x4 number with 0.0 in all lanes. * @constructor */ float32x4.zero = function() { @@ -48,7 +50,7 @@ float32x4.zero = function() { } /** - * Construct a new instance of a float32x4 number with the same value + * Construct a new instance of float32x4 number with the same value * in all lanes. * @param {double} value used for all lanes. * @constructor @@ -87,18 +89,18 @@ Object.defineProperty(float32x4.prototype, 'signMask', { }); /** - * Construct a new instance of a uint32x4 number. + * Construct a new instance of int32x4 number. * @param {integer} 32-bit unsigned value used for x lane. * @param {integer} 32-bit unsigned value used for y lane. * @param {integer} 32-bit unsigned value used for z lane. * @param {integer} 32-bit unsigned value used for w lane. * @constructor */ -function uint32x4(x, y, z, w) { - if (!(this instanceof uint32x4)) { - return new uint32x4(x, y, z, w); +function int32x4(x, y, z, w) { + if (!(this instanceof int32x4)) { + return new int32x4(x, y, z, w); } - this.storage_ = new Uint32Array(4); + this.storage_ = new Int32Array(4); this.storage_[0] = x; this.storage_[1] = y; this.storage_[2] = z; @@ -106,7 +108,7 @@ function uint32x4(x, y, z, w) { } /** - * Construct a new instance of a uint32x4 number with 0xFFFFFFFF or 0x0 in each + * Construct a new instance of int32x4 number with 0xFFFFFFFF or 0x0 in each * lane, depending on the truth value in x, y, z, and w. * @param {boolean} flag used for x lane. * @param {boolean} flag used for y lane. @@ -114,59 +116,59 @@ function uint32x4(x, y, z, w) { * @param {boolean} flag used for w lane. * @constructor */ -uint32x4.bool = function(x, y, z, w) { - return uint32x4(x ? 0xFFFFFFFF : 0x0, - y ? 0xFFFFFFFF : 0x0, - z ? 0xFFFFFFFF : 0x0, - w ? 0xFFFFFFFF : 0x0); +int32x4.bool = function(x, y, z, w) { + return int32x4(x ? -1 : 0x0, + y ? -1 : 0x0, + z ? -1 : 0x0, + w ? -1 : 0x0); } /** - * Construct a new instance of a uint32x4 number with the same value + * Construct a new instance of int32x4 number with the same value * in all lanes. * @param {integer} value used for all lanes. * @constructor */ -uint32x4.splat = function(s) { - return uint32x4(s, s, s, s); +int32x4.splat = function(s) { + return int32x4(s, s, s, s); } -Object.defineProperty(uint32x4.prototype, 'x', { +Object.defineProperty(int32x4.prototype, 'x', { get: function() { return this.storage_[0]; } }); -Object.defineProperty(uint32x4.prototype, 'y', { +Object.defineProperty(int32x4.prototype, 'y', { get: function() { return this.storage_[1]; } }); -Object.defineProperty(uint32x4.prototype, 'z', { +Object.defineProperty(int32x4.prototype, 'z', { get: function() { return this.storage_[2]; } }); -Object.defineProperty(uint32x4.prototype, 'w', +Object.defineProperty(int32x4.prototype, 'w', { get: function() { return this.storage_[3]; } }); -Object.defineProperty(uint32x4.prototype, 'flagX', { +Object.defineProperty(int32x4.prototype, 'flagX', { get: function() { return this.storage_[0] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagY', { +Object.defineProperty(int32x4.prototype, 'flagY', { get: function() { return this.storage_[1] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagZ', { +Object.defineProperty(int32x4.prototype, 'flagZ', { get: function() { return this.storage_[2] != 0x0; } }); -Object.defineProperty(uint32x4.prototype, 'flagW', +Object.defineProperty(int32x4.prototype, 'flagW', { get: function() { return this.storage_[3] != 0x0; } }); /** * Extract the sign bit from each lane return them in the first 4 bits. */ -Object.defineProperty(uint32x4.prototype, 'signMask', { +Object.defineProperty(int32x4.prototype, 'signMask', { get: function() { var mx = (this.storage_[0] & 0x80000000) >>> 31; var my = (this.storage_[1] & 0x80000000) >>> 31; @@ -287,414 +289,580 @@ Float32x4Array.prototype.setAt = function(i, v) { this.storage_[i*4+3] = v.w; } + +function Int32x4Array(a, b, c) { + + function isNumber(o) { + return typeof o == "number" || (typeof o == "object" && o.constructor === Number); + } + + function isTypedArray(o) { + return (o instanceof Int8Array) || + (o instanceof Uint8Array) || + (o instanceof Uint8ClampedArray) || + (o instanceof Int16Array) || + (o instanceof Uint16Array) || + (o instanceof Int32Array) || + (o instanceof Uint32Array) || + (o instanceof Float32Array) || + (o instanceof Float64Array) || + (o instanceof Int32x4Array) || + (o instanceof Float32x4Array); + } + + function isArrayBuffer(o) { + return (o instanceof ArrayBuffer); + } + + if (isNumber(a)) { + this.storage_ = new Int32Array(a*4); + this.length_ = a; + this.byteOffset_ = 0; + return; + } else if (isTypedArray(a)) { + if (!(a instanceof Int32x4Array)) { + throw "Copying typed array of non-Int32x4Array is unimplemented."; + } + this.storage_ = new Int32Array(a.length * 4); + this.length_ = a.length; + this.byteOffset_ = 0; + // Copy floats. + for (var i = 0; i < a.length*4; i++) { + this.storage_[i] = a.storage_[i]; + } + } else if (isArrayBuffer(a)) { + if ((b != undefined) && (b % Int32x4Array.BYTES_PER_ELEMENT) != 0) { + throw "byteOffset must be a multiple of 16."; + } + if (c != undefined) { + c *= 4; + this.storage_ = new Int32Array(a, b, c); + } + else { + // Note: new Int32Array(a, b) is NOT equivalent to new Float32Array(a, b, undefined) + this.storage_ = new Int32Array(a, b); + } + this.length_ = this.storage_.length / 4; + this.byteOffset_ = b != undefined ? b : 0; + } else { + throw "Unknown type of first argument."; + } +} + +Object.defineProperty(Int32x4Array.prototype, 'length', + { get: function() { return this.length_; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'byteLength', + { get: function() { return this.length_ * Int32x4Array.BYTES_PER_ELEMENT; } +}); + +Object.defineProperty(Int32x4Array, 'BYTES_PER_ELEMENT', + { get: function() { return 16; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'BYTES_PER_ELEMENT', + { get: function() { return 16; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'byteOffset', + { get: function() { return this.byteOffset_; } +}); + +Object.defineProperty(Int32x4Array.prototype, 'buffer', + { get: function() { return this.storage_.buffer; } +}); + +Int32x4Array.prototype.getAt = function(i) { + if (i < 0) { + throw "Index must be >= 0."; + } + if (i >= this.length) { + throw "Index out of bounds."; + } + var x = this.storage_[i*4+0]; + var y = this.storage_[i*4+1]; + var z = this.storage_[i*4+2]; + var w = this.storage_[i*4+3]; + return float32x4(x, y, z, w); +} + +Int32x4Array.prototype.setAt = function(i, v) { + if (i < 0) { + throw "Index must be >= 0."; + } + if (i >= this.length) { + throw "Index out of bounds."; + } + if (!(v instanceof int32x4)) { + throw "Value is not a int32x4."; + } + this.storage_[i*4+0] = v.x; + this.storage_[i*4+1] = v.y; + this.storage_[i*4+2] = v.z; + this.storage_[i*4+3] = v.w; +} + var SIMD = (function () { return { - /** - * @return {float32x4} New instance of float32x4 with absolute values of - * t. - */ - abs: function(t) { - return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z), - Math.abs(t.w)); - }, - /** - * @return {float32x4} New instance of float32x4 with negated values of - * t. - */ - neg: function(t) { - return new float32x4(-t.x, -t.y, -t.z, -t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a + b. - */ - add: function(a, b) { - return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a - b. - */ - sub: function(a, b) { - return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a * b. - */ - mul: function(a, b) { - return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with a / b. - */ - div: function(a, b) { - return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); - }, - /** - * @return {float32x4} New instance of float32x4 with t's values clamped - * between lowerLimit and upperLimit. - */ - clamp: function(t, lowerLimit, upperLimit) { - var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x; - var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y; - var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z; - var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w; - cx = cx > upperLimit.x ? upperLimit.x : cx; - cy = cy > upperLimit.y ? upperLimit.y : cy; - cz = cz > upperLimit.z ? upperLimit.z : cz; - cw = cw > upperLimit.w ? upperLimit.w : cw; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with the minimum value of - * t and other. - */ - min: function(t, other) { - var cx = t.x > other.x ? other.x : t.x; - var cy = t.y > other.y ? other.y : t.y; - var cz = t.z > other.z ? other.z : t.z; - var cw = t.w > other.w ? other.w : t.w; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with the maximum value of - * t and other. - */ - max: function(t, other) { - var cx = t.x < other.x ? other.x : t.x; - var cy = t.y < other.y ? other.y : t.y; - var cz = t.z < other.z ? other.z : t.z; - var cw = t.w < other.w ? other.w : t.w; - return new float32x4(cx, cy, cz, cw); - }, - /** - * @return {float32x4} New instance of float32x4 with reciprocal value of - * t. - */ - reciprocal: function(t) { - return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with square root of the - * reciprocal value of t. - */ - reciprocalSqrt: function(t) { - return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y), - Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w)); - }, - /** - * @return {float32x4} New instance of float32x4 with values of t - * scaled by s. - */ - scale: function(t, s) { - return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w); - }, - /** - * @return {float32x4} New instance of float32x4 with square root of - * values of t. - */ - sqrt: function(t) { - return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y), - Math.sqrt(t.z), Math.sqrt(t.w)); - }, - /** - * @param {float32x4} t An instance of float32x4 to be shuffled. - * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. - * @return {float32x4} New instance of float32x4 with lanes shuffled. - */ - shuffle: function(t, mask) { - var _x = (mask) & 0x3; - var _y = (mask >> 2) & 0x3; - var _z = (mask >> 4) & 0x3; - var _w = (mask >> 6) & 0x3; - return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], - t.storage_[_w]); + float32x4: { + /** + * @return {float32x4} New instance of float32x4 with absolute values of + * t. + */ + abs: function(t) { + return new float32x4(Math.abs(t.x), Math.abs(t.y), Math.abs(t.z), + Math.abs(t.w)); + }, + /** + * @return {float32x4} New instance of float32x4 with negated values of + * t. + */ + neg: function(t) { + return new float32x4(-t.x, -t.y, -t.z, -t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a + b. + */ + add: function(a, b) { + return new float32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a - b. + */ + sub: function(a, b) { + return new float32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a * b. + */ + mul: function(a, b) { + return new float32x4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with a / b. + */ + div: function(a, b) { + return new float32x4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); + }, + /** + * @return {float32x4} New instance of float32x4 with t's values clamped + * between lowerLimit and upperLimit. + */ + clamp: function(t, lowerLimit, upperLimit) { + var cx = t.x < lowerLimit.x ? lowerLimit.x : t.x; + var cy = t.y < lowerLimit.y ? lowerLimit.y : t.y; + var cz = t.z < lowerLimit.z ? lowerLimit.z : t.z; + var cw = t.w < lowerLimit.w ? lowerLimit.w : t.w; + cx = cx > upperLimit.x ? upperLimit.x : cx; + cy = cy > upperLimit.y ? upperLimit.y : cy; + cz = cz > upperLimit.z ? upperLimit.z : cz; + cw = cw > upperLimit.w ? upperLimit.w : cw; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with the minimum value of + * t and other. + */ + min: function(t, other) { + var cx = t.x > other.x ? other.x : t.x; + var cy = t.y > other.y ? other.y : t.y; + var cz = t.z > other.z ? other.z : t.z; + var cw = t.w > other.w ? other.w : t.w; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with the maximum value of + * t and other. + */ + max: function(t, other) { + var cx = t.x < other.x ? other.x : t.x; + var cy = t.y < other.y ? other.y : t.y; + var cz = t.z < other.z ? other.z : t.z; + var cw = t.w < other.w ? other.w : t.w; + return new float32x4(cx, cy, cz, cw); + }, + /** + * @return {float32x4} New instance of float32x4 with reciprocal value of + * t. + */ + reciprocal: function(t) { + return new float32x4(1.0 / t.x, 1.0 / t.y, 1.0 / t.z, 1.0 / t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with square root of the + * reciprocal value of t. + */ + reciprocalSqrt: function(t) { + return new float32x4(Math.sqrt(1.0 / t.x), Math.sqrt(1.0 / t.y), + Math.sqrt(1.0 / t.z), Math.sqrt(1.0 / t.w)); + }, + /** + * @return {float32x4} New instance of float32x4 with values of t + * scaled by s. + */ + scale: function(t, s) { + return new float32x4(s * t.x, s * t.y, s * t.z, s * t.w); + }, + /** + * @return {float32x4} New instance of float32x4 with square root of + * values of t. + */ + sqrt: function(t) { + return new float32x4(Math.sqrt(t.x), Math.sqrt(t.y), + Math.sqrt(t.z), Math.sqrt(t.w)); + }, + /** + * @param {float32x4} t An instance of float32x4 to be shuffled. + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {float32x4} New instance of float32x4 with lanes shuffled. + */ + shuffle: function(t, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new float32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], + t.storage_[_w]); + }, + /** + * @param {float32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result + * @param {float32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {float32x4} New instance of float32x4 with lanes shuffled. + */ + shuffleMix: function(t1, t2, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new float32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z], + t2.storage_[_w]); + }, + /** + * @param {double} value used for x lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * x replaced with {x}. + */ + withX: function(t, x) { + return new float32x4(x, t.y, t.z, t.w); + }, + /** + * @param {double} value used for y lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * y replaced with {y}. + */ + withY: function(t, y) { + return new float32x4(t.x, y, t.z, t.w); + }, + /** + * @param {double} value used for z lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * z replaced with {z}. + */ + withZ: function(t, z) { + return new float32x4(t.x, t.y, z, t.w); + }, + /** + * @param {double} value used for w lane. + * @return {float32x4} New instance of float32x4 with the values in t and + * w replaced with {w}. + */ + withW: function(t, w) { + return new float32x4(t.x, t.y, t.z, w); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t < other. + */ + lessThan: function(t, other) { + var cx = t.x < other.x; + var cy = t.y < other.y; + var cz = t.z < other.z; + var cw = t.w < other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t <= other. + */ + lessThanOrEqual: function(t, other) { + var cx = t.x <= other.x; + var cy = t.y <= other.y; + var cz = t.z <= other.z; + var cw = t.w <= other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t == other. + */ + equal: function(t, other) { + var cx = t.x == other.x; + var cy = t.y == other.y; + var cz = t.z == other.z; + var cw = t.w == other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t != other. + */ + notEqual: function(t, other) { + var cx = t.x != other.x; + var cy = t.y != other.y; + var cz = t.z != other.z; + var cw = t.w != other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t >= other. + */ + greaterThanOrEqual: function(t, other) { + var cx = t.x >= other.x; + var cy = t.y >= other.y; + var cz = t.z >= other.z; + var cw = t.w >= other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @param {float32x4} other An instance of float32x4. + * @return {int32x4} 0xFFFFFFFF or 0x0 in each lane depending on + * the result of t > other. + */ + greaterThan: function(t, other) { + var cx = t.x > other.x; + var cy = t.y > other.y; + var cz = t.z > other.z; + var cw = t.w > other.w; + return int32x4.bool(cx, cy, cz, cw); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @return {int32x4} a bit-wise copy of t as a int32x4. + */ + bitsToInt32x4: function(t) { + var alias = new Int32Array(t.storage_.buffer); + return new int32x4(alias[0], alias[1], alias[2], alias[3]); + }, + /** + * @param {float32x4} t An instance of float32x4. + * @return {int32x4} with a integer to float conversion of t. + */ + toInt32x4: function(t) { + var a = new int32x4(t.storage_[0], t.storage_[1], t.storage_[2], + t.storage_[3]); + return a; + } }, - /** - * @param {double} value used for x lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * x replaced with {x}. - */ - withX: function(t, x) { - return new float32x4(x, t.y, t.z, t.w); - }, - /** - * @param {double} value used for y lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * y replaced with {y}. - */ - withY: function(t, y) { - return new float32x4(t.x, y, t.z, t.w); - }, - /** - * @param {double} value used for z lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * z replaced with {z}. - */ - withZ: function(t, z) { - return new float32x4(t.x, t.y, z, t.w); - }, - /** - * @param {double} value used for w lane. - * @return {float32x4} New instance of float32x4 with the values in t and - * w replaced with {w}. - */ - withW: function(t, w) { - return new float32x4(t.x, t.y, t.z, w); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t < other. - */ - lessThan: function(t, other) { - var cx = t.x < other.x; - var cy = t.y < other.y; - var cz = t.z < other.z; - var cw = t.w < other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t <= other. - */ - lessThanOrEqual: function(t, other) { - var cx = t.x <= other.x; - var cy = t.y <= other.y; - var cz = t.z <= other.z; - var cw = t.w <= other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t == other. - */ - equal: function(t, other) { - var cx = t.x == other.x; - var cy = t.y == other.y; - var cz = t.z == other.z; - var cw = t.w == other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t != other. - */ - notEqual: function(t, other) { - var cx = t.x != other.x; - var cy = t.y != other.y; - var cz = t.z != other.z; - var cw = t.w != other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t >= other. - */ - greaterThanOrEqual: function(t, other) { - var cx = t.x >= other.x; - var cy = t.y >= other.y; - var cz = t.z >= other.z; - var cw = t.w >= other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @param {float32x4} other An instance of a float32x4. - * @return {uint32x4} 0xFFFFFFFF or 0x0 in each lane depending on - * the result of t > other. - */ - greaterThan: function(t, other) { - var cx = t.x > other.x; - var cy = t.y > other.y; - var cz = t.z > other.z; - var cw = t.w > other.w; - return uint32x4.bool(cx, cy, cz, cw); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a & b. - */ - and: function(a, b) { - return new uint32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a | b. - */ - or: function(a, b) { - return new uint32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w); - }, - /** - * @param {uint32x4} a An instance of a uint32x4. - * @param {uint32x4} b An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a ^ b. - */ - xor: function(a, b) { - return new uint32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of ~a - */ - negu32: function(t) { - return new uint32x4(~t.x, ~t.y, ~t.z, ~t.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a + b. - */ - addu32: function(a, b) { - return new uint32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a - b. - */ - subu32: function(a, b) { - return new uint32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - }, - /** - * @param {uint32x4} a An instance of uint32x4. - * @param {uint32x4} b An instance of uint32x4. - * @return {uint32x4} New instance of uint32x4 with values of a * b. - */ - mulu32: function(a, b) { - return new uint32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y), - Math.imul(a.z, b.z), Math.imul(a.w, b.w)); - }, - /** - * @param {float32x4} - */ - select: function(t, trueValue, falseValue) { - var tv = SIMD.float32x4BitsToUint32x4(trueValue); - var fv = SIMD.float32x4BitsToUint32x4(falseValue); - var tr = SIMD.and(t, tv); - var fr = SIMD.and(SIMD.negu32(t), fv); - return SIMD.uint32x4BitsToFloat32x4(SIMD.or(tr, fr)); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for x lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * x lane replaced with {x}. - */ - withXu32: function(t, x) { - return new uint32x4(x, t.y, t.z, t.w); - }, - /** - * param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for y lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * y lane replaced with {y}. - */ - withYu32: function(t, y) { - return new uint32x4(t.x, y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {integer} 32-bit value used for z lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * z lane replaced with {z}. - */ - withZu32: function(t, z) { - return new uint32x4(t.x, t.y, z, t.w); - }, - /** - * @param {integer} 32-bit value used for w lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * w lane replaced with {w}. - */ - withWu32: function(t, w) { - return new uint32x4(t.x, t.y, t.z, w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} x flag used for x lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * x lane replaced with {x}. - */ - withFlagX: function(t, flagX) { - var x = flagX ? 0xFFFFFFFF : 0x0; - return new uint32x4(x, t.y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} y flag used for y lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * y lane replaced with {y}. - */ - withFlagY: function(t, flagY) { - var y = flagY ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, y, t.z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} z flag used for z lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * z lane replaced with {z}. - */ - withFlagZ: function(t, flagZ) { - var z = flagZ ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, t.y, z, t.w); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @param {boolean} w flag used for w lane. - * @return {uint32x4} New instance of uint32x4 with the values in t and - * w lane replaced with {w}. - */ - withFlagW: function(t, flagW) { - var w = flagW ? 0xFFFFFFFF : 0x0; - return new uint32x4(t.x, t.y, t.z, w); - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @return {uint32x4} a bit-wise copy of t as a uint32x4. - */ - float32x4BitsToUint32x4: function(t) { - var alias = new Uint32Array(t.storage_.buffer); - return new uint32x4(alias[0], alias[1], alias[2], alias[3]); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {float32x4} a bit-wise copy of t as a float32x4. - */ - uint32x4BitsToFloat32x4: function(t) { - var alias = new Float32Array(t.storage_.buffer); - return new float32x4(alias[0], alias[1], alias[2], alias[3]); - }, - /** - * @param {uint32x4} t An instance of a uint32x4. - * @return {float32x4} with a float to integer conversion copy of t. - */ - uint32x4ToFloat32x4: function(t) { - var a = float32x4.zero(); - a.storage_[0] = t.storage_[0]; - a.storage_[1] = t.storage_[1]; - a.storage_[2] = t.storage_[2]; - a.storage_[3] = t.storage_[3]; - return a; - }, - /** - * @param {float32x4} t An instance of a float32x4. - * @return {uint32x4} with a integer to float conversion of t. - */ - float32x4ToUint32x4: function(t) { - var a = new uint32x4(t.storage_[0], t.storage_[1], t.storage_[2], - t.storage_[3]); - return a; + int32x4: { + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a & b. + */ + and: function(a, b) { + return new int32x4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a | b. + */ + or: function(a, b) { + return new int32x4(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a ^ b. + */ + xor: function(a, b) { + return new int32x4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of ~t + */ + not: function(t) { + return new int32x4(~t.x, ~t.y, ~t.z, ~t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of -t + */ + neg: function(t) { + return new int32x4(-t.x, -t.y, -t.z, -t.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a + b. + */ + add: function(a, b) { + return new int32x4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a - b. + */ + sub: function(a, b) { + return new int32x4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + }, + /** + * @param {int32x4} a An instance of int32x4. + * @param {int32x4} b An instance of int32x4. + * @return {int32x4} New instance of int32x4 with values of a * b. + */ + mul: function(a, b) { + return new int32x4(Math.imul(a.x, b.x), Math.imul(a.y, b.y), + Math.imul(a.z, b.z), Math.imul(a.w, b.w)); + }, + /** + * @param {int32x4} t An instance of float32x4 to be shuffled. + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {int32x4} New instance of float32x4 with lanes shuffled. + */ + shuffle: function(t, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new int32x4(t.storage_[_x], t.storage_[_y], t.storage_[_z], + t.storage_[_w]); + }, + /** + * @param {int32x4} t1 An instance of float32x4 to be shuffled. XY lanes in result + * @param {int32x4} t2 An instance of float32x4 to be shuffled. ZW lanes in result + * @param {integer} mask One of the 256 shuffle masks, for example, SIMD.XXXX. + * @return {int32x4} New instance of float32x4 with lanes shuffled. + */ + shuffleMix: function(t1, t2, mask) { + var _x = (mask) & 0x3; + var _y = (mask >> 2) & 0x3; + var _z = (mask >> 4) & 0x3; + var _w = (mask >> 6) & 0x3; + return new int32x4(t1.storage_[_x], t1.storage_[_y], t2.storage_[_z], + t2.storage_[_w]); + }, + /** + * @param {float32x4} + */ + select: function(t, trueValue, falseValue) { + var tv = SIMD.float32x4.bitsToInt32x4(trueValue); + var fv = SIMD.float32x4.bitsToInt32x4(falseValue); + var tr = SIMD.int32x4.and(t, tv); + var fr = SIMD.int32x4.and(SIMD.int32x4.not(t), fv); + return SIMD.int32x4.bitsToFloat32x4(SIMD.int32x4.or(tr, fr)); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for x lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * x lane replaced with {x}. + */ + withX: function(t, x) { + return new int32x4(x, t.y, t.z, t.w); + }, + /** + * param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for y lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * y lane replaced with {y}. + */ + withY: function(t, y) { + return new int32x4(t.x, y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {integer} 32-bit value used for z lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * z lane replaced with {z}. + */ + withZ: function(t, z) { + return new int32x4(t.x, t.y, z, t.w); + }, + /** + * @param {integer} 32-bit value used for w lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * w lane replaced with {w}. + */ + withW: function(t, w) { + return new int32x4(t.x, t.y, t.z, w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} x flag used for x lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * x lane replaced with {x}. + */ + withFlagX: function(t, flagX) { + var x = flagX ? 0xFFFFFFFF : 0x0; + return new int32x4(x, t.y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} y flag used for y lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * y lane replaced with {y}. + */ + withFlagY: function(t, flagY) { + var y = flagY ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, y, t.z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} z flag used for z lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * z lane replaced with {z}. + */ + withFlagZ: function(t, flagZ) { + var z = flagZ ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, t.y, z, t.w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @param {boolean} w flag used for w lane. + * @return {int32x4} New instance of int32x4 with the values in t and + * w lane replaced with {w}. + */ + withFlagW: function(t, flagW) { + var w = flagW ? 0xFFFFFFFF : 0x0; + return new int32x4(t.x, t.y, t.z, w); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {float32x4} a bit-wise copy of t as a float32x4. + */ + bitsToFloat32x4: function(t) { + var alias = new Float32Array(t.storage_.buffer); + return new float32x4(alias[0], alias[1], alias[2], alias[3]); + }, + /** + * @param {int32x4} t An instance of int32x4. + * @return {float32x4} with a float to integer conversion copy of t. + */ + toFloat32x4: function(t) { + var a = float32x4.zero(); + a.storage_[0] = t.storage_[0]; + a.storage_[1] = t.storage_[1]; + a.storage_[2] = t.storage_[2]; + a.storage_[3] = t.storage_[3]; + return a; + } } } })(); @@ -955,4 +1123,3 @@ Object.defineProperty(SIMD, 'WWWX', { get: function() { return 0x3F; } }); Object.defineProperty(SIMD, 'WWWY', { get: function() { return 0x7F; } }); Object.defineProperty(SIMD, 'WWWZ', { get: function() { return 0xBF; } }); Object.defineProperty(SIMD, 'WWWW', { get: function() { return 0xFF; } }); - diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h new file mode 100644 index 00000000..31265db8 --- /dev/null +++ b/system/include/emscripten/emmintrin.h @@ -0,0 +1,87 @@ +#include <xmmintrin.h> + +typedef int32x4 __m128i; + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set_epi32(int z, int y, int x, int w) +{ + return (__m128i){ w, x, y, z }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_set1_epi32(int w) +{ + return (__m128i){ w, w, w, w }; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_setzero_si128() +{ + return (__m128i){ 0, 0, 0, 0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_si128(__m128i *p, __m128i a) +{ + *p = a; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_and_si128(__m128i a, __m128i b) +{ + return a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_andnot_si128(__m128i a, __m128i b) +{ + return ~a & b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_or_si128(__m128i a, __m128i b) +{ + return a | b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_xor_si128(__m128i a, __m128i b) +{ + return a ^ b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_add_epi32(__m128i a, __m128i b) +{ + return a + b; +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_sub_epi32(__m128i a, __m128i b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_castsi128_ps(__m128i a) +{ + return emscripten_int32x4_bitsToFloat32x4(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cvtepi32_ps(__m128i a) +{ + return emscripten_int32x4_toFloat32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_castps_si128(__m128 a) +{ + return emscripten_float32x4_bitsToInt32x4(a); +} + +static __inline__ __m128i __attribute__((__always_inline__)) +_mm_cvtps_epi32(__m128 a) +{ + return emscripten_float32x4_toInt32x4(a); +}
\ No newline at end of file diff --git a/system/include/emscripten/vector.h b/system/include/emscripten/vector.h index 938f2369..cf26a5d6 100644 --- a/system/include/emscripten/vector.h +++ b/system/include/emscripten/vector.h @@ -2,7 +2,7 @@ // Support for the JS SIMD API proposal, https://github.com/johnmccutchan/ecmascript_simd typedef float float32x4 __attribute__((__vector_size__(16))); -typedef unsigned int uint32x4 __attribute__((__vector_size__(16))); +typedef unsigned int int32x4 __attribute__((__vector_size__(16))); #ifdef __cplusplus extern "C" { @@ -10,6 +10,24 @@ extern "C" { unsigned int emscripten_float32x4_signmask(float32x4 x); +float32x4 emscripten_float32x4_min(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_max(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_sqrt(float32x4 a); +float32x4 emscripten_float32x4_lessThan(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_lessThanOrEqual(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_equal(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_greaterThanOrEqual(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_greaterThan(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_and(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_andNot(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_or(float32x4 a, float32x4 b); +float32x4 emscripten_float32x4_xor(float32x4 a, float32x4 b); + +float32x4 emscripten_int32x4_bitsToFloat32x4(int32x4 a); +float32x4 emscripten_int32x4_toFloat32x4(int32x4 a); +int32x4 emscripten_float32x4_bitsToInt32x4(float32x4 a); +int32x4 emscripten_float32x4_toInt32x4(float32x4 a); + #ifdef __cplusplus } #endif diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h new file mode 100644 index 00000000..1b9108fa --- /dev/null +++ b/system/include/emscripten/xmmintrin.h @@ -0,0 +1,131 @@ +#include <vector.h> + +typedef float32x4 __m128; + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set_ps(float z, float y, float x, float w) +{ + return (__m128){ w, x, y, z }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_set1_ps(float w) +{ + return (__m128){ w, w, w, w }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_setzero_ps(void) +{ + return (__m128){ 0.0, 0.0, 0.0, 0.0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_ps(float *p, __m128 a) +{ + *(__m128 *)p = a; +} + +static __inline__ int __attribute__((__always_inline__)) +_mm_movemask_ps(__m128 a) +{ + return emscripten_float32x4_signmask(a); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_add_ps(__m128 a, __m128 b) +{ + return a + b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sub_ps(__m128 a, __m128 b) +{ + return a - b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_mul_ps(__m128 a, __m128 b) +{ + return a * b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_div_ps(__m128 a, __m128 b) +{ + return a / b; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_min_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_min(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_max_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_max(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_sqrt_ps(__m128 a) +{ + return emscripten_float32x4_sqrt(a); +} + +/* TODO: shuffles */ + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmplt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmple_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_lessThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpeq_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_equal(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpge_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThanOrEqual(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_cmpgt_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_greaterThan(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_and_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_and(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_andnot_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_andNot(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_or_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_or(a, b); +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_xor_ps(__m128 a, __m128 b) +{ + return emscripten_float32x4_xor(a, b); +} diff --git a/tests/embind/shell.html b/tests/embind/shell.html index f0ee10f8..7a3b0a07 100644 --- a/tests/embind/shell.html +++ b/tests/embind/shell.html @@ -48,7 +48,7 @@ //text = text.replace(/>/g, ">"); //text = text.replace('\n', '<br>', 'g'); element.value += text + "\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })(), printErr: function(text) { diff --git a/tests/hello_world_worker.cpp b/tests/hello_world_worker.cpp index 5ea26d91..5b673df8 100644 --- a/tests/hello_world_worker.cpp +++ b/tests/hello_world_worker.cpp @@ -1,9 +1,17 @@ +#include <string.h> #include <stdio.h> #include <emscripten.h> int main() { printf("you should not see this text when in a worker!\n"); // this should not crash, but also should not show up anywhere if you are in a worker - emscripten_run_script("if (typeof postMessage !== 'undefined') { postMessage('hello from worker!') }"); + FILE *f = fopen("file.dat", "r"); + char buffer[100]; + memset(buffer, 0, 100); + buffer[0] = 0; + fread(buffer, 10, 1, f); + char buffer2[100]; + sprintf(buffer2, "if (typeof postMessage !== 'undefined') { postMessage('hello from worker, and |%s|') }", buffer); + emscripten_run_script(buffer2); } diff --git a/tests/pthread/specific.c b/tests/pthread/specific.c index 914884e7..631baf8c 100644 --- a/tests/pthread/specific.c +++ b/tests/pthread/specific.c @@ -33,6 +33,14 @@ int main(void) printf("pthread_getspecific = %d\n", *data2); assert(*data2 == 123); + rv = pthread_setspecific(key, NULL); + printf("valid pthread_setspecific for value NULL = %d\n", rv); + assert(rv == 0); + + data2 = pthread_getspecific(key); + assert(data2 == NULL); + printf("pthread_getspecific = %p\n", data2); + rv = pthread_key_create(&key, &destr_function); data2 = pthread_getspecific(key); printf("pthread_getspecific after key recreate = %p\n", data2); diff --git a/tests/pthread/specific.c.txt b/tests/pthread/specific.c.txt index ad122b3d..ce7bef3d 100644 --- a/tests/pthread/specific.c.txt +++ b/tests/pthread/specific.c.txt @@ -1,6 +1,8 @@ pthread_key_create = 0 pthread_setspecific = 0 pthread_getspecific = 123 +valid pthread_setspecific for value NULL = 0 +pthread_getspecific = (nil) pthread_getspecific after key recreate = (nil) pthread_key_delete = 0 pthread_key_delete repeated = 22 diff --git a/tests/sdl_canvas_size.html b/tests/sdl_canvas_size.html index 50495049..37e3818f 100644 --- a/tests/sdl_canvas_size.html +++ b/tests/sdl_canvas_size.html @@ -51,7 +51,7 @@ //text = text.replace(/>/g, ">"); //text = text.replace('\n', '<br>', 'g'); element.value += text + "\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })(), printErr: function(text) { diff --git a/tests/sdl_joystick.c b/tests/sdl_joystick.c index 50802c31..bdb621ab 100644 --- a/tests/sdl_joystick.c +++ b/tests/sdl_joystick.c @@ -72,6 +72,10 @@ void main_2(void* arg) { assert(SDL_JoystickNumAxes(pad1) == 4); assert(SDL_JoystickNumButtons(pad1) == 16); + // By default, SDL will automatically process events. Test this behavior, and then disable it. + assert(SDL_JoystickEventState(SDL_QUERY) == SDL_ENABLE); + SDL_JoystickEventState(SDL_DISABLE); + assert(SDL_JoystickEventState(SDL_QUERY) == SDL_DISABLE); // Button events. emscripten_run_script("window.simulateGamepadButtonDown(0, 1)"); // We didn't tell SDL to automatically update this joystick's state. diff --git a/tests/test_browser.py b/tests/test_browser.py index 128820b3..d0618af8 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -183,7 +183,7 @@ If manually bisecting: //text = text.replace(/>/g, ">"); //text = text.replace('\\n', '<br>', 'g'); element.value += text + "\\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })(), printErr: function(text) { @@ -274,7 +274,7 @@ If manually bisecting: //text = text.replace(/>/g, ">"); //text = text.replace('\\n', '<br>', 'g'); element.value += text + "\\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })(), printErr: function(text) { @@ -898,7 +898,7 @@ keydown(100);keyup(100); // trigger the end return function(text) { text = Array.prototype.slice.call(arguments).join(' '); element.value += text + "\\n"; - element.scrollTop = 99999; // focus on bottom + element.scrollTop = element.scrollHeight; // focus on bottom }; })() }; @@ -1218,10 +1218,7 @@ keydown(100);keyup(100); // trigger the end def test_worker(self): # Test running in a web worker - output = Popen([PYTHON, EMCC, path_from_root('tests', 'hello_world_worker.cpp'), '-o', 'worker.js'], stdout=PIPE, stderr=PIPE).communicate() - assert len(output[0]) == 0, output[0] - assert os.path.exists('worker.js'), output - self.assertContained('you should not see this text when in a worker!', run_js('worker.js')) # code should run standalone + open('file.dat', 'w').write('data for worker') html_file = open('main.html', 'w') html_file.write(''' <html> @@ -1240,7 +1237,15 @@ keydown(100);keyup(100); // trigger the end </html> ''') html_file.close() - self.run_browser('main.html', 'You should see that the worker was called, and said "hello from worker!"', '/report_result?hello%20from%20worker!') + + # no file data + for file_data in [0, 1]: + print 'file data', file_data + output = Popen([PYTHON, EMCC, path_from_root('tests', 'hello_world_worker.cpp'), '-o', 'worker.js'] + (['--preload-file', 'file.dat'] if file_data else []) , stdout=PIPE, stderr=PIPE).communicate() + assert len(output[0]) == 0, output[0] + assert os.path.exists('worker.js'), output + if not file_data: self.assertContained('you should not see this text when in a worker!', run_js('worker.js')) # code should run standalone + self.run_browser('main.html', '', '/report_result?hello%20from%20worker,%20and%20|' + ('data%20for%20w' if file_data else '') + '|') def test_chunked_synchronous_xhr(self): main = 'chunked_sync_xhr.html' diff --git a/tests/test_core.py b/tests/test_core.py index 1803c926..69abfc0e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -3865,6 +3865,25 @@ Exiting setjmp function, level: 0, prev_jmp: -1 process.communicate() assert process.returncode is 0, 'float.h should agree with our system' + def test_llvm_used(self): + src = r''' + #include <stdio.h> + #include <emscripten.h> + + extern "C" { + EMSCRIPTEN_KEEPALIVE void foobar(int x) { + printf("Worked! %d\n", x); + } + } + + int main() { + emscripten_run_script("Module['_foobar'](10)"); + return 0; + }''' + + Building.LLVM_OPTS = 3 + self.do_run(src, 'Worked! 10\n') + def test_emscripten_api(self): #if Settings.MICRO_OPTS or Settings.RELOOP or Building.LLVM_OPTS: return self.skip('FIXME') @@ -8730,6 +8749,8 @@ void*:16 self.do_run(path_from_root('tests', 'cubescript'), '*\nTemp is 33\n9\n5\nhello, everyone\n*', main_file='command.cpp') + if os.environ.get('EMCC_FAST_COMPILER') == '1': return self.skip('skipping extra parts in fastcomp') + assert 'asm2g' in test_modes if self.run_name == 'asm2g': results = {} @@ -8792,20 +8813,20 @@ int main(int argc, char **argv) { printf("zeros %d, %d, %d, %d\n", (int)c[0], (int)c[1], (int)c[2], (int)c[3]); } { - uint32x4 *a = (uint32x4*)&data[0]; - uint32x4 *b = (uint32x4*)&data[4]; - uint32x4 c, d, e, f; + int32x4 *a = (int32x4*)&data[0]; + int32x4 *b = (int32x4*)&data[4]; + int32x4 c, d, e, f; c = *a; d = *b; - printf("4uints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]); + printf("4ints! %d, %d, %d, %d %d, %d, %d, %d\n", c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3]); e = c+d; f = c-d; - printf("5uints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); + printf("5ints! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); e = c&d; f = c|d; e = ~c&d; f = c^d; - printf("5uintops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); + printf("5intops! %d, %d, %d, %d %d, %d, %d, %d\n", e[0], e[1], e[2], e[3], f[0], f[1], f[2], f[3]); } { float32x4 c, d, e, f; @@ -8823,9 +8844,9 @@ int main(int argc, char **argv) { 2floats! 48, 68, 92, 120 42, 56, 72, 90 3floats! 48, 68, 92, 120 2016, 3808, 6624, 10800 zeros 0, 0, 0, 0 -4uints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736 -5uints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056 -5uintops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096 +4ints! 1086324736, 1094713344, 1101004800, 1106247680 1109917696, 1113587712, 1116733440, 1119092736 +5ints! -2098724864, -2086666240, -2077229056, -2069626880 -23592960, -18874368, -15728640, -12845056 +5intops! 36175872, 35651584, 34603008, 33816576 48758784, 52428800, 53477376, 54788096 6floats! -9, 0, 4, 9 -2, -12, 14, 10 ''') @@ -8876,6 +8897,484 @@ zeros 0, 0, 0, 0 16.000000 ''') + def test_simd3(self): + if Settings.USE_TYPED_ARRAYS != 2: return self.skip('needs ta2') + if Settings.ASM_JS: Settings.ASM_JS = 2 # does not validate + src = r''' + #include <iostream> + #include <emmintrin.h> + #include <assert.h> + #include <stdint.h> + #include <bitset> + + using namespace std; + + void testSetPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + _mm_store_ps(ar, v); + assert(ar[0] == 4.0); + assert(ar[1] == 3.0); + assert(ar[2] == 2.0); + assert(ar[3] == 1.0); + } + + void testSet1Ps() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_set1_ps(5.5); + _mm_store_ps(ar, v); + assert(ar[0] == 5.5); + assert(ar[1] == 5.5); + assert(ar[2] == 5.5); + assert(ar[3] == 5.5); + } + + void testSetZeroPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v = _mm_setzero_ps(); + _mm_store_ps(ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testSetEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_set_epi32(5, 7, 126, 381); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 381); + assert(ar[1] == 126); + assert(ar[2] == 7); + assert(ar[3] == 5); + v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x12345678); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xaaaaaaaa); + assert(ar[3] == 0x55555555); + } + + void testSet1Epi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_set1_epi32(-5); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == -5); + assert(ar[1] == -5); + assert(ar[2] == -5); + assert(ar[3] == -5); + } + + void testSetZeroSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v = _mm_setzero_si128(); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testBitCasts() { + int32_t __attribute__((__aligned__(16))) ar1[4]; + float __attribute__((__aligned__(16))) ar2[4]; + __m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000); + __m128 v2 = _mm_castsi128_ps(v1); + _mm_store_ps(ar2, v2); + assert(ar2[0] == 4.0); + assert(ar2[1] == 3.0); + assert(ar2[2] == 2.0); + assert(ar2[3] == 1.0); + v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0); + v1 = _mm_castps_si128(v2); + _mm_store_si128((__m128i *)ar1, v1); + assert(ar1[0] == 0x41000000); + assert(ar1[1] == 0x40e00000); + assert(ar1[2] == 0x40c00000); + assert(ar1[3] == 0x40a00000); + float w = 0; + float z = -278.3; + float y = 5.2; + float x = -987654321; + v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x)); + _mm_store_ps(ar2, _mm_castsi128_ps(v1)); + assert(ar2[0] == x); + assert(ar2[1] == y); + assert(ar2[2] == z); + assert(ar2[3] == w); + /* + std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned long*>(&(ar2[0]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned long*>(&(ar2[1]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned long*>(&(ar2[2]))); + std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned long*>(&(ar2[3]))); + std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned long*>(&x)); + std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned long*>(&y)); + std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned long*>(&z)); + std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned long*>(&w)); + assert(bits1x == bits2x); + assert(bits1y == bits2y); + assert(bits1z == bits2z); + assert(bits1w == bits2w); + */ + v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa)); + _mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2)); + assert(ar1[0] == 0xaaaaaaaa); + assert(ar1[1] == 0x5555cccc); + assert(ar1[2] == 0); + assert(ar1[3] == 0xffffffff); + } + + void testConversions() { + int32_t __attribute__((__aligned__(16))) ar1[4]; + float __attribute__((__aligned__(16))) ar2[4]; + __m128i v1 = _mm_set_epi32(0, -3, -517, 256); + __m128 v2 = _mm_cvtepi32_ps(v1); + _mm_store_ps(ar2, v2); + assert(ar2[0] == 256.0); + assert(ar2[1] == -517.0); + assert(ar2[2] == -3.0); + assert(ar2[3] == 0); + v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0); + v1 = _mm_cvtps_epi32(v2); + _mm_store_si128((__m128i *)ar1, v1); + assert(ar1[0] == -8); + assert(ar1[1] == 7); + assert(ar1[2] == 6); + assert(ar1[3] == 5); + } + + void testMoveMaskPs() { + __m128 v = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + int mask = _mm_movemask_ps(v); + assert(mask == 13); + } + + void testAddPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_add_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 41.0); + assert(ar[1] == 32.0); + assert(ar[2] == 23.0); + assert(ar[3] == 14.0); + } + + void testSubPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_sub_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == -39.0); + assert(ar[1] == -28.0); + assert(ar[2] == -17.0); + assert(ar[3] == -6.0); + } + + void testMulPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0); + __m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0); + __m128 v = _mm_mul_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 40.0); + assert(ar[1] == 60.0); + assert(ar[2] == 60.0); + assert(ar[3] == 40.0); + } + + void testDivPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0); + __m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5); + __m128 v = _mm_div_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 2.0); + assert(ar[1] == 8.0); + assert(ar[2] == 3.0); + assert(ar[3] == 2.0); + } + + void testMinPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5); + __m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0); + __m128 v = _mm_min_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 0.0); + assert(ar[1] == 30.0); + assert(ar[2] == 1.0); + assert(ar[3] == -20.0); + } + + void testMaxPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5); + __m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0); + __m128 v = _mm_max_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 1.0); + assert(ar[1] == 55.0); + assert(ar[2] == 10.0); + assert(ar[3] == 2.5); + } + + void testSqrtPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0); + __m128 v = _mm_sqrt_ps(v1); + _mm_store_ps(ar, v); + assert(ar[0] == 1.0); + assert(ar[1] == 2.0); + assert(ar[2] == 3.0); + assert(ar[3] == 4.0); + } + + void testCmpLtPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmplt_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0xffffffff); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0xffffffff); + assert(_mm_movemask_ps(v) == 9); + } + + void testCmpLePs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmple_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0xffffffff); + assert(ar[1] == 0); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + assert(_mm_movemask_ps(v) == 13); + } + + void testCmpEqPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpeq_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 4); + } + + void testCmpGePs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpge_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 6); + } + + void testCmpGtPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001); + __m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1); + __m128 v = _mm_cmpgt_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0); + assert(ar[3] == 0); + assert(_mm_movemask_ps(v) == 2); + } + + void testAndPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(425, -501, -32, 68); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + __m128 v = _mm_and_ps(v1, v2); + _mm_store_ps(ar, v); + assert(ar[0] == 68); + assert(ar[1] == 0); + assert(ar[2] == -501); + assert(ar[3] == 425); + int32_t __attribute__((__aligned__(16))) ar2[4]; + v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa)); + v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + v = _mm_and_ps(v1, v2); + _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v)); + assert(ar2[0] == 0); + assert(ar2[1] == 0); + assert(ar2[2] == 0); + assert(ar2[3] == 0); + } + + void testAndNotPs() { + float __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_set_ps(425, -501, -32, 68); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff)); + __m128 v = _mm_andnot_ps(v2, v1); + _mm_store_ps(ar, v); + assert(ar[0] == 0); + assert(ar[1] == -32); + assert(ar[2] == 0); + assert(ar[3] == 0); + int32_t __attribute__((__aligned__(16))) ar2[4]; + v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa)); + v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + v = _mm_andnot_ps(v1, v2); + _mm_store_si128((__m128i *)ar2, _mm_castps_si128(v)); + assert(ar2[0] == 0x55555555); + assert(ar2[1] == 0x55555555); + assert(ar2[2] == 0x55555555); + assert(ar2[3] == 0x55555555); + } + + void testOrPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0)); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + __m128 v = _mm_or_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testXorPs() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128 v1 = _mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0)); + __m128 v2 = _mm_castsi128_ps(_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555)); + __m128 v = _mm_xor_ps(v1, v2); + _mm_store_si128((__m128i *)ar, _mm_castps_si128(v)); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xaaaaaaaa); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testAndSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_and_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0); + assert(ar[1] == 0); + assert(ar[2] == 0); + assert(ar[3] == 0); + } + + void testAndNotSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_andnot_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0x55555555); + assert(ar[2] == 0x55555555); + assert(ar[3] == 0x55555555); + } + + void testOrSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_or_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xffffffff); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testXorSi128() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0); + __m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555); + __m128i v = _mm_xor_si128(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 0x55555555); + assert(ar[1] == 0xaaaaaaaa); + assert(ar[2] == 0xffffffff); + assert(ar[3] == 0xffffffff); + } + + void testAddEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(4, 3, 2, 1); + __m128i v2 = _mm_set_epi32(10, 20, 30, 40); + __m128i v = _mm_add_epi32(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == 41); + assert(ar[1] == 32); + assert(ar[2] == 23); + assert(ar[3] == 14); + } + + void testSubEpi32() { + int32_t __attribute__((__aligned__(16))) ar[4]; + __m128i v1 = _mm_set_epi32(4, 3, 2, 1); + __m128i v2 = _mm_set_epi32(10, 20, 30, 40); + __m128i v = _mm_sub_epi32(v1, v2); + _mm_store_si128((__m128i *)ar, v); + assert(ar[0] == -39); + assert(ar[1] == -28); + assert(ar[2] == -17); + assert(ar[3] == -6); + } + + int main(int argc, char ** argv) { + testSetPs(); + testSet1Ps(); + testSetZeroPs(); + testSetEpi32(); + testSet1Epi32(); + testSetZeroSi128(); + testBitCasts(); + testConversions(); + testMoveMaskPs(); + testAddPs(); + testSubPs(); + testMulPs(); + testDivPs(); + testMaxPs(); + testMinPs(); + testSqrtPs(); + testCmpLtPs(); + testCmpLePs(); + testCmpEqPs(); + testCmpGePs(); + testCmpGtPs(); + testAndPs(); + testAndNotPs(); + testOrPs(); + testXorPs(); + testAndSi128(); + testAndNotSi128(); + testOrSi128(); + testXorSi128(); + testAddEpi32(); + testSubEpi32(); + printf("DONE"); + return 0; + } + ''' + + self.do_run(src, 'DONE') + + def test_gcc_unmangler(self): Settings.NAMED_GLOBALS = 1 # test coverage for this diff --git a/tests/test_other.py b/tests/test_other.py index 0dd0bd12..5100db72 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -371,6 +371,11 @@ f.close() process.communicate() assert process.returncode is 0, 'User should be able to specify custom -std= on the command line!' + def test_cap_suffixes(self): + shutil.copyfile(path_from_root('tests', 'hello_world.cpp'), 'test.CPP') + Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'test.CPP')]).communicate() + self.assertContained('hello, world!', run_js(os.path.join(self.get_dir(), 'a.out.js'))) + def test_catch_undef(self): open(os.path.join(self.get_dir(), 'test.cpp'), 'w').write(r''' #include <vector> @@ -686,6 +691,38 @@ f.close() } ''', ['hello through side\n']) + # js library call + open('lib.js', 'w').write(r''' + mergeInto(LibraryManager.library, { + test_lib_func: function(x) { + return x + 17.2; + } + }); + ''') + test('js-lib', 'extern "C" { extern double test_lib_func(int input); }', r''' + #include <stdio.h> + #include "header.h" + extern double sidey(); + int main2() { return 11; } + int main() { + int input = sidey(); + double temp = test_lib_func(input); + printf("other says %.2f\n", temp); + printf("more: %.5f, %d\n", temp, input); + return 0; + } + ''', r''' + #include <stdio.h> + #include "header.h" + extern int main2(); + double sidey() { + int temp = main2(); + printf("main2 sed: %d\n", temp); + printf("main2 sed: %u, %c\n", temp, temp/2); + return test_lib_func(temp); + } + ''', 'other says 45.2', ['--js-library', 'lib.js']) + # libc usage in one modules. must force libc inclusion in the main module if that isn't the one using mallinfo() try: os.environ['EMCC_FORCE_STDLIBS'] = 'libc' @@ -2074,3 +2111,28 @@ int main() Popen([PYTHON, EMCC, path_from_root('tests', 'linpack.c'), '-O2', '-DSP', '--llvm-opts', '''['-O3', '-vectorize', '-vectorize-loops', '-bb-vectorize-vector-bits=128', '-force-vector-width=4']''']).communicate() self.assertContained('Unrolled Single Precision', run_js('a.out.js')) + def test_dependency_file(self): + # Issue 1732: -MMD (and friends) create dependency files that need to be + # copied from the temporary directory. + + open(os.path.join(self.get_dir(), 'test.cpp'), 'w').write(r''' + #include "test.hpp" + + void my_function() + { + } + ''') + open(os.path.join(self.get_dir(), 'test.hpp'), 'w').write(r''' + void my_function(); + ''') + + Popen([PYTHON, EMCC, '-MMD', '-c', os.path.join(self.get_dir(), 'test.cpp'), '-o', + os.path.join(self.get_dir(), 'test.o')]).communicate() + + assert os.path.exists(os.path.join(self.get_dir(), 'test.d')), 'No dependency file generated' + deps = open(os.path.join(self.get_dir(), 'test.d')).read() + # Look for ': ' instead of just ':' to not confuse C:\path\ notation with make "target: deps" rule. Not perfect, but good enough for this test. + head, tail = deps.split(': ', 2) + assert 'test.o' in head, 'Invalid dependency target' + assert 'test.cpp' in tail and 'test.hpp' in tail, 'Invalid dependencies generated' + diff --git a/tools/cache.py b/tools/cache.py index c316a1fd..6f2443e8 100644 --- a/tools/cache.py +++ b/tools/cache.py @@ -13,8 +13,7 @@ class Cache: self.debug = debug def ensure(self): - if not os.path.exists(self.dirname): - os.makedirs(self.dirname) + shared.safe_ensure_dirs(self.dirname) def erase(self): tempfiles.try_delete(self.dirname) @@ -48,11 +47,7 @@ class JCache: def ensure(self): self.cache.ensure() - if not os.path.exists(self.dirname): - try: - os.makedirs(self.dirname) - except (IOError, OSError): - pass + shared.safe_ensure_dirs(self.dirname) def get_shortkey(self, keys): if type(keys) not in [list, tuple]: @@ -196,3 +191,6 @@ def chunkify(funcs, chunk_size, chunking_file, DEBUG=False): # if previous_mapping.get(ident) != new_mapping.get(ident): # print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident) return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names + +import shared + diff --git a/tools/file_packager.py b/tools/file_packager.py index 3ba5b23f..7d9344cd 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -453,7 +453,13 @@ if has_preloaded: package_uuid = uuid.uuid4(); remote_package_name = os.path.basename(Compression.compressed_name(data_target) if Compression.on else data_target) code += r''' - var PACKAGE_PATH = window['encodeURIComponent'](window.location.pathname.toString().substring(0, window.location.pathname.toString().lastIndexOf('/')) + '/'); + var PACKAGE_PATH; + if (typeof window === 'object') { + PACKAGE_PATH = window['encodeURIComponent'](window.location.pathname.toString().substring(0, window.location.pathname.toString().lastIndexOf('/')) + '/'); + } else { + // worker + PACKAGE_PATH = encodeURIComponent(location.pathname.toString().substring(0, location.pathname.toString().lastIndexOf('/')) + '/'); + } var PACKAGE_NAME = '%s'; var REMOTE_PACKAGE_NAME = '%s'; var PACKAGE_UUID = '%s'; diff --git a/tools/jsrun.py b/tools/jsrun.py index 7acfc978..f74a1492 100644 --- a/tools/jsrun.py +++ b/tools/jsrun.py @@ -3,7 +3,7 @@ from subprocess import Popen, PIPE, STDOUT TRACK_PROCESS_SPAWNS = True if (os.getenv('EM_BUILD_VERBOSE') and int(os.getenv('EM_BUILD_VERBOSE')) >= 3) else False -def timeout_run(proc, timeout, note='unnamed process', full_output=False): +def timeout_run(proc, timeout=None, note='unnamed process', full_output=False): start = time.time() if timeout is not None: while time.time() - start < timeout and proc.poll() is None: diff --git a/tools/shared.py b/tools/shared.py index 6330b8a6..3eb72a1e 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1,4 +1,4 @@ -import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, re +import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, re, errno from subprocess import Popen, PIPE, STDOUT from tempfile import mkstemp from distutils.spawn import find_executable @@ -456,6 +456,18 @@ FILE_PACKAGER = path_from_root('tools', 'file_packager.py') # Temp dir. Create a random one, unless EMCC_DEBUG is set, in which case use TEMP_DIR/emscripten_temp +def safe_ensure_dirs(dirname): + try: + os.makedirs(dirname) + except os.error, e: + # Ignore error for already existing dirname + if e.errno != errno.EEXIST: + raise e + # FIXME: Notice that this will result in a false positive, + # should the dirname be a file! There seems to no way to + # handle this atomically in Python 2.x. + # There is an additional option for Python 3.x, though. + class Configuration: def __init__(self, environ=os.environ): self.DEBUG = environ.get('EMCC_DEBUG') @@ -480,10 +492,9 @@ class Configuration: if self.DEBUG: try: self.EMSCRIPTEN_TEMP_DIR = self.CANONICAL_TEMP_DIR - if not os.path.exists(self.EMSCRIPTEN_TEMP_DIR): - os.makedirs(self.EMSCRIPTEN_TEMP_DIR) + safe_ensure_dirs(self.EMSCRIPTEN_TEMP_DIR) except Exception, e: - logging.debug(e + 'Could not create canonical temp dir. Check definition of TEMP_DIR in ~/.emscripten') + logging.error(str(e) + 'Could not create canonical temp dir. Check definition of TEMP_DIR in ~/.emscripten') def get_temp_files(self): return tempfiles.TempFiles( @@ -1017,8 +1028,7 @@ class Building: try: temp_dir = os.path.join(EMSCRIPTEN_TEMP_DIR, 'ar_output_' + str(os.getpid()) + '_' + str(len(temp_dirs))) temp_dirs.append(temp_dir) - if not os.path.exists(temp_dir): - os.makedirs(temp_dir) + safe_ensure_dirs(temp_dir) os.chdir(temp_dir) contents = filter(lambda x: len(x) > 0, Popen([LLVM_AR, 't', f], stdout=PIPE).communicate()[0].split('\n')) #print >> sys.stderr, ' considering archive', f, ':', contents @@ -1027,9 +1037,9 @@ class Building: else: for content in contents: # ar will silently fail if the directory for the file does not exist, so make all the necessary directories dirname = os.path.dirname(content) - if dirname and not os.path.exists(dirname): - os.makedirs(dirname) - Popen([LLVM_AR, 'x', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory + if dirname: + safe_ensure_dirs(dirname) + Popen([LLVM_AR, 'xo', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory contents = map(lambda content: os.path.join(temp_dir, content), contents) contents = filter(os.path.exists, map(os.path.abspath, contents)) added_contents = set() @@ -1411,6 +1421,10 @@ class Building: @staticmethod def ensure_relooper(relooper): if os.path.exists(relooper): return + if os.environ.get('EMCC_FAST_COMPILER'): + logging.debug('not building relooper to js, using it in c++ backend') + return + Cache.ensure() curr = os.getcwd() try: @@ -1509,7 +1523,7 @@ class JS: @staticmethod def to_nice_ident(ident): # limited version of the JS function toNiceIdent - return ident.replace('%', '$').replace('@', '_') + return ident.replace('%', '$').replace('@', '_').replace('.', '_') @staticmethod def make_initializer(sig, settings=None): |