diff options
author | Jukka Jylänki <jujjyl@gmail.com> | 2013-11-02 01:12:26 +0200 |
---|---|---|
committer | Jukka Jylänki <jujjyl@gmail.com> | 2013-11-02 01:46:31 +0200 |
commit | cb3fe2bb6f6d348d3da7ea8c71af909748ff8d71 (patch) | |
tree | 93db047235ba65e3aa82c726abf7ec650fb10b71 | |
parent | 97f20af483140174e96385f142df43d7251cfccb (diff) |
Add command line parameter --no-heap-copy that optimizes for small memory footprint and fread() performance over the default behavior that copied VFS to HEAP, that is designed for mmap() performance.
Adjust MEMFS node contentMode enum to reflect whether content is off the main HEAP or not. Note that this enum is not much used, so this has little effect. Add browser tests to check that fread() and mmap() work with and without --no-heap-copy.
-rwxr-xr-x | emcc | 6 | ||||
-rw-r--r-- | src/library_memfs.js | 6 | ||||
-rw-r--r-- | tests/mmap_file.c | 27 | ||||
-rw-r--r-- | tests/test_browser.py | 11 | ||||
-rw-r--r-- | tests/test_core.py | 27 | ||||
-rw-r--r-- | tools/file_packager.py | 23 |
6 files changed, 71 insertions, 29 deletions
@@ -777,6 +777,7 @@ try: save_bc = False memory_init_file = False use_preload_cache = False + no_heap_copy = False proxy_to_worker = False if use_cxx: @@ -897,6 +898,9 @@ try: elif newargs[i].startswith('--use-preload-cache'): use_preload_cache = True newargs[i] = '' + elif newargs[i].startswith('--no-heap-copy'): + no_heap_copy = True + newargs[i] = '' elif newargs[i] == '--ignore-dynamic-linking': ignore_dynamic_linking = True newargs[i] = '' @@ -1667,6 +1671,8 @@ try: file_args += ['--compress', Compression.encoder, Compression.decoder, Compression.js_name] if use_preload_cache: file_args.append('--use-preload-cache') + if no_heap_copy: + file_args.append('--no-heap-copy') file_code = execute([shared.PYTHON, shared.FILE_PACKAGER, unsuffixed(target) + '.data'] + file_args, stdout=PIPE)[0] pre_js = file_code + pre_js diff --git a/src/library_memfs.js b/src/library_memfs.js index 9f528108..d3148d8b 100644 --- a/src/library_memfs.js +++ b/src/library_memfs.js @@ -225,9 +225,9 @@ mergeInto(LibraryManager.library, { #if ASSERTIONS assert(buffer.length); #endif - if (canOwn && buffer.buffer === HEAP8.buffer && offset === 0) { - node.contents = buffer; // this is a subarray of the heap, and we can own it - node.contentMode = MEMFS.CONTENT_OWNING; + if (canOwn && offset === 0) { + node.contents = buffer; // this could be a subarray of Emscripten HEAP, or allocated from some other source. + node.contentMode = (buffer.buffer === HEAP8.buffer) ? MEMFS.CONTENT_OWNING : MEMFS.CONTENT_FIXED; } else { node.contents = new Uint8Array(buffer.subarray(offset, offset+length)); node.contentMode = MEMFS.CONTENT_FIXED; diff --git a/tests/mmap_file.c b/tests/mmap_file.c new file mode 100644 index 00000000..6eed95e0 --- /dev/null +++ b/tests/mmap_file.c @@ -0,0 +1,27 @@ +#include <stdio.h> +#include <sys/mman.h> +#include <emscripten.h> +#include <string.h> +#include <assert.h> + +int main() { + printf("*\n"); + FILE *f = fopen("data.dat", "r"); + char *m; + m = (char*)mmap(NULL, 9000, PROT_READ, MAP_PRIVATE, fileno(f), 0); + for (int i = 0; i < 20; i++) putchar(m[i]); + assert(!strncmp(m, "data from the file .", 20)); + munmap(m, 9000); + printf("\n"); + m = (char*)mmap(NULL, 9000, PROT_READ, MAP_PRIVATE, fileno(f), 5); + for (int i = 0; i < 20; i++) putchar(m[i]); + assert(!strncmp(m, "from the file ......", 20)); + munmap(m, 9000); + printf("\n*\n"); + +#ifdef REPORT_RESULT + int result = 1; + REPORT_RESULT(); +#endif + return 0; +} diff --git a/tests/test_browser.py b/tests/test_browser.py index 2ff9106b..d52f109f 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -338,7 +338,7 @@ If manually bisecting: ("somefile.txt@/directory/file.txt", "/directory/file.txt"), ("somefile.txt@/directory/file.txt", "directory/file.txt"), (absolute_src_path + "@/directory/file.txt", "directory/file.txt")] - + for test in test_cases: (srcpath, dstpath) = test print 'Testing', srcpath, dstpath @@ -346,6 +346,11 @@ If manually bisecting: Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp'), '--preload-file', srcpath, '-o', 'page.html']).communicate() self.run_browser('page.html', 'You should see |load me right before|.', '/report_result?1') + # Test that '--no-heap-copy' works. + make_main('somefile.txt') + Popen([PYTHON, EMCC, os.path.join(self.get_dir(), 'main.cpp'), '--preload-file', 'somefile.txt', '--no-heap-copy', '-o', 'page.html']).communicate() + self.run_browser('page.html', 'You should see |load me right before|.', '/report_result?1') + # By absolute path make_main('somefile.txt') # absolute becomes relative @@ -1566,3 +1571,7 @@ keydown(100);keyup(100); // trigger the end Popen([PYTHON, EMCC, path_from_root('tests', 'browser_module.cpp'), '-o', 'module.js', '-O2', '-s', 'SIDE_MODULE=1', '-s', 'DLOPEN_SUPPORT=1', '-s', 'EXPORTED_FUNCTIONS=["_one", "_two"]']).communicate() self.btest('browser_main.cpp', args=['-O2', '-s', 'MAIN_MODULE=1', '-s', 'DLOPEN_SUPPORT=1'], expected='8') + def test_mmap_file(self): + open(self.in_dir('data.dat'), 'w').write('data from the file ' + ('.' * 9000)) + for extra_args in [[], ['--no-heap-copy']]: + self.btest(path_from_root('tests', 'mmap_file.c'), expected='1', args=['--preload-file', 'data.dat'] + extra_args) diff --git a/tests/test_core.py b/tests/test_core.py index b421659c..89dd725f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8594,30 +8594,13 @@ void*:16 def test_mmap_file(self): if self.emcc_args is None: return self.skip('requires emcc') - self.emcc_args += ['--embed-file', 'data.dat'] + for extra_args in [[], ['--no-heap-copy']]: + self.emcc_args += ['--embed-file', 'data.dat'] + extra_args - open(self.in_dir('data.dat'), 'w').write('data from the file ' + ('.' * 9000)) + open(self.in_dir('data.dat'), 'w').write('data from the file ' + ('.' * 9000)) - src = r''' - #include <stdio.h> - #include <sys/mman.h> - - int main() { - printf("*\n"); - FILE *f = fopen("data.dat", "r"); - char *m; - m = (char*)mmap(NULL, 9000, PROT_READ, MAP_PRIVATE, fileno(f), 0); - for (int i = 0; i < 20; i++) putchar(m[i]); - munmap(m, 9000); - printf("\n"); - m = (char*)mmap(NULL, 9000, PROT_READ, MAP_PRIVATE, fileno(f), 5); - for (int i = 0; i < 20; i++) putchar(m[i]); - munmap(m, 9000); - printf("\n*\n"); - return 0; - } - ''' - self.do_run(src, '*\ndata from the file .\nfrom the file ......\n*\n') + src = open(path_from_root('tests', 'mmap_file.c')).read() + self.do_run(src, '*\ndata from the file .\nfrom the file ......\n*\n') def test_cubescript(self): if self.emcc_args is None: return self.skip('requires emcc') diff --git a/tools/file_packager.py b/tools/file_packager.py index 1d0ec447..3ba5b23f 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -11,7 +11,7 @@ data downloads. Usage: - file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] + file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] --crunch=X Will compress dxt files to crn with quality level X. The crunch commandline tool must be present and CRUNCH should be defined in ~/.emscripten that points to it. JS crunch decompressing code will @@ -27,6 +27,10 @@ Usage: --use-preload-cache Stores package in IndexedDB so that subsequent loads don't need to do XHR. Checks package version. + --no-heap-copy If specified, the preloaded filesystem is not copied inside the Emscripten HEAP, but kept in a separate typed array outside it. + The default, if this is not specified, is to embed the VFS inside the HEAP, so that mmap()ing files in it is a no-op. + Passing this flag optimizes for fread() usage, omitting it optimizes for mmap() usage. + Notes: * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at @@ -43,7 +47,7 @@ from shared import Compression, execute, suffix, unsuffixed from subprocess import Popen, PIPE, STDOUT if len(sys.argv) == 1: - print '''Usage: file_packager.py TARGET [--preload A...] [--embed B...] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] + print '''Usage: file_packager.py TARGET [--preload A...] [--embed B...] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] See the source for more details.''' sys.exit(0) @@ -70,7 +74,12 @@ crunch = 0 plugins = [] jsoutput = None force = True +# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally cache VFS XHR so that subsequent +# page loads can read the data from the offline cache instead. use_preload_cache = False +# If set to True, the blob received from XHR is moved to the Emscripten HEAP, optimizing for mmap() performance. +# If set to False, the XHR blob is kept intact, and fread()s etc. are performed directly to that data. This optimizes for minimal memory usage and fread() performance. +no_heap_copy = True for arg in sys.argv[1:]: if arg == '--preload': @@ -91,6 +100,8 @@ for arg in sys.argv[1:]: force = False elif arg == '--use-preload-cache': use_preload_cache = True + elif arg == '--no-heap-copy': + no_heap_copy = False elif arg.startswith('--js-output'): jsoutput = arg.split('=')[1] if '=' in arg else None elif arg.startswith('--crunch'): @@ -414,12 +425,18 @@ for file_ in data_files: if has_preloaded: # Get the big archive and split it up - use_data = ''' + if no_heap_copy: + use_data = ''' // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though. var ptr = Module['_malloc'](byteArray.length); Module['HEAPU8'].set(byteArray, ptr); DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length); ''' + else: + use_data = ''' + // Reuse the bytearray from the XHR as the source for file reads. + DataRequest.prototype.byteArray = byteArray; +''' for file_ in data_files: if file_['mode'] == 'preload': use_data += ' DataRequest.prototype.requests["%s"].onload();\n' % (file_['dstpath']) |