diff options
author | Jukka Jylänki <jujjyl@gmail.com> | 2014-06-05 02:00:08 +0300 |
---|---|---|
committer | Jukka Jylänki <jujjyl@gmail.com> | 2014-06-05 05:54:04 +0300 |
commit | 92cab32f5ffcc00779b521588bec62f597c98aa7 (patch) | |
tree | a7c248ecf7a34dac50f326e62f8c398ec07ab2c6 | |
parent | 3431eab292e0b9e2359b183fa2c954eb7e5cf7e7 (diff) |
Use typed arrays instead of normal JS arrays to back files in the MEMFS filesystem for improved performance, especially when backing to IDBFS.
-rw-r--r-- | src/library_fs.js | 5 | ||||
-rw-r--r-- | src/library_memfs.js | 161 | ||||
-rw-r--r-- | src/settings.js | 4 | ||||
-rw-r--r-- | tests/test_core.py | 22 |
4 files changed, 136 insertions, 56 deletions
diff --git a/src/library_fs.js b/src/library_fs.js index 5f7f1dea..1fff6348 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1480,6 +1480,7 @@ mergeInto(LibraryManager.library, { // WARNING: Can't read binary files in V8's d8 or tracemonkey's js, as // read() will try to parse UTF8. obj.contents = intArrayFromString(Module['read'](obj.url), true); + obj.usedBytes = obj.contents.length; } catch (e) { success = false; } @@ -1601,6 +1602,10 @@ mergeInto(LibraryManager.library, { node.contents = null; node.url = properties.url; } + // Add a function that defers querying the file size until it is asked the first time. + Object.defineProperty(node, "usedBytes", { + get: function() { return this.contents.length; } + }); // override each stream op with one that tries to force load the lazy file first var stream_ops = {}; var keys = Object.keys(node.stream_ops); diff --git a/src/library_memfs.js b/src/library_memfs.js index 95c3ae65..4b56ebbb 100644 --- a/src/library_memfs.js +++ b/src/library_memfs.js @@ -2,11 +2,6 @@ mergeInto(LibraryManager.library, { $MEMFS__deps: ['$FS'], $MEMFS: { ops_table: null, - - // content modes - CONTENT_OWNING: 1, // contains a subarray into the heap, and we own it, without copying (note: someone else needs to free() it, if that is necessary) - CONTENT_FLEXIBLE: 2, // has been modified or never set to anything, and is a flexible js array that can grow/shrink - CONTENT_FIXED: 3, // contains some fixed-size content written into it, in a typed array mount: function(mount) { return MEMFS.createNode(null, '/', {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, 0); }, @@ -71,8 +66,11 @@ mergeInto(LibraryManager.library, { } else if (FS.isFile(node.mode)) { node.node_ops = MEMFS.ops_table.file.node; node.stream_ops = MEMFS.ops_table.file.stream; - node.contents = []; - node.contentMode = MEMFS.CONTENT_FLEXIBLE; + node.usedBytes = 0; // The actual number of bytes used in the typed array, as opposed to contents.buffer.byteLength which gives the whole capacity. + // When the byte data of the file is populated, this will point to either a typed array, or a normal JS array. Typed arrays are preferred + // for performance, and used by default. However, typed arrays are not resizable like normal JS arrays are, so there is a small disk size + // penalty involved for appending file writes that continuously grow a file similar to std::vector capacity vs used -scheme. + node.contents = null; } else if (FS.isLink(node.mode)) { node.node_ops = MEMFS.ops_table.link.node; node.stream_ops = MEMFS.ops_table.link.stream; @@ -87,13 +85,80 @@ mergeInto(LibraryManager.library, { } return node; }, - ensureFlexible: function(node) { - if (node.contentMode !== MEMFS.CONTENT_FLEXIBLE) { - var contents = node.contents; - node.contents = Array.prototype.slice.call(contents); - node.contentMode = MEMFS.CONTENT_FLEXIBLE; + + // Given a file node, returns its file data converted to a regular JS array. You should treat this as read-only. + getFileDataAsRegularArray: function(node) { +#if USE_TYPED_ARRAYS == 2 + if (node.contents && node.contents.subarray) { + var arr = []; + for(var i = 0; i < node.usedBytes; ++i) arr.push(node.contents[i]); + return arr; // Returns a copy of the original data. + } +#endif + return node.contents; // No-op, the file contents are already in a JS array. Return as-is. + }, + + // Allocates a new backing store for the given node so that it can fit at least newSize amount of bytes. + // May allocate more, to provide automatic geometric increase and amortized linear performance appending writes. + // Never shrinks the storage. + expandFileStorage: function(node, newCapacity) { +#if USE_TYPED_ARRAYS == 2 + +#if !MEMFS_APPEND_TO_TYPED_ARRAYS + // If we are asked to expand the size of a file that already exists, revert to using a standard JS array to store the file + // instead of a typed array. This makes resizing the array more flexible because we can just .push() elements at the back to + // increase the size. + if (node.contents && node.contents.subarray && newCapacity > node.contents.buffer.byteLength) { + node.contents = MEMFS.getFileDataAsRegularArray(node); + node.usedBytes = node.contents.length; + } +#endif + + if (!node.contents || node.contents.subarray) { // Keep using a typed array if creating a new storage, or if old one was a typed array as well. + var prevCapacity = node.contents ? node.contents.buffer.byteLength : 0; + if (prevCapacity >= newCapacity) return; // No need to expand, the storage was already large enough. + // Don't expand strictly to the given requested limit if it's only a very small increase, but instead geometrically grow capacity. + // For small filesizes (<1MB), perform size*2 geometric increase, but for large sizes, do a much more conservative size*1.125 increase to + // avoid overshooting the allocation cap by a very large margin. + var CAPACITY_DOUBLING_MAX = 1024 * 1024; + newCapacity = Math.max(newCapacity, (prevCapacity * (prevCapacity < CAPACITY_DOUBLING_MAX ? 2.0 : 1.125)) | 0); + if (prevCapacity != 0) newCapacity = Math.max(newCapacity, 256); // At minimum allocate 256b for each file when expanding. + var oldContents = node.contents; + node.contents = new Uint8Array(new ArrayBuffer(newCapacity)); // Allocate new storage. + if (node.usedBytes > 0) node.contents.set(oldContents.subarray(0, node.usedBytes), 0); // Copy old data over to the new storage. + return; } +#endif + // Not using a typed array to back the file storage. Use a standard JS array instead. + if (!node.contents && newCapacity > 0) node.contents = []; + while (node.contents.length < newCapacity) node.contents.push(0); }, + + // Performs an exact resize of the backing file storage to the given size, if the size is not exactly this, the storage is fully reallocated. + resizeFileStorage: function(node, newSize) { + if (node.usedBytes == newSize) return; + if (newSize == 0) { + node.contents = null; // Fully decommit when requesting a resize to zero. + node.usedBytes = 0; + return; + } + +#if USE_TYPED_ARRAYS == 2 + if (!node.contents || node.contents.subarray) { // Resize a typed array if that is being used as the backing store. + var oldContents = node.contents; + node.contents = new Uint8Array(new ArrayBuffer(newSize)); // Allocate new storage. + node.contents.set(oldContents.subarray(0, Math.min(newSize, node.usedBytes))); // Copy old data over to the new storage. + node.usedBytes = newSize; + return; + } +#endif + // Backing with a JS array. + if (!node.contents) node.contents = []; + if (node.contents.length > newSize) node.contents.length = newSize; + else while (node.contents.length < newSize) node.contents.push(0); + node.usedBytes = newSize; + }, + node_ops: { getattr: function(node) { var attr = {}; @@ -108,7 +173,7 @@ mergeInto(LibraryManager.library, { if (FS.isDir(node.mode)) { attr.size = 4096; } else if (FS.isFile(node.mode)) { - attr.size = node.contents.length; + attr.size = node.usedBytes; } else if (FS.isLink(node.mode)) { attr.size = node.link.length; } else { @@ -131,10 +196,7 @@ mergeInto(LibraryManager.library, { node.timestamp = attr.timestamp; } if (attr.size !== undefined) { - MEMFS.ensureFlexible(node); - var contents = node.contents; - if (attr.size < contents.length) contents.length = attr.size; - else while (attr.size > contents.length) contents.push(0); + MEMFS.resizeFileStorage(node, attr.size); } }, lookup: function(parent, name) { @@ -198,9 +260,8 @@ mergeInto(LibraryManager.library, { stream_ops: { read: function(stream, buffer, offset, length, position) { var contents = stream.node.contents; - if (position >= contents.length) - return 0; - var size = Math.min(contents.length - position, length); + if (position >= stream.node.usedBytes) return 0; + var size = Math.min(stream.node.usedBytes - position, length); assert(size >= 0); #if USE_TYPED_ARRAYS == 2 if (size > 8 && contents.subarray) { // non-trivial, and typed array @@ -208,47 +269,53 @@ mergeInto(LibraryManager.library, { } else #endif { - for (var i = 0; i < size; i++) { - buffer[offset + i] = contents[position + i]; - } + for (var i = 0; i < size; i++) buffer[offset + i] = contents[position + i]; } return size; }, + + // Writes the byte range (buffer[offset], buffer[offset+length]) to offset 'position' into the file pointed by 'stream' write: function(stream, buffer, offset, length, position, canOwn) { + if (!length) return 0; var node = stream.node; node.timestamp = Date.now(); - var contents = node.contents; + #if USE_TYPED_ARRAYS == 2 - if (length && contents.length === 0 && position === 0 && buffer.subarray) { - // just replace it with the new data -#if ASSERTIONS - assert(buffer.length); -#endif - if (canOwn && offset === 0) { - node.contents = buffer; // this could be a subarray of Emscripten HEAP, or allocated from some other source. - node.contentMode = (buffer.buffer === HEAP8.buffer) ? MEMFS.CONTENT_OWNING : MEMFS.CONTENT_FIXED; - } else { - node.contents = new Uint8Array(buffer.subarray(offset, offset+length)); - node.contentMode = MEMFS.CONTENT_FIXED; + if (buffer.subarray && (!node.contents || node.contents.subarray)) { // This write is from a typed array to a typed array? + if (canOwn) { // Can we just reuse the buffer we are given? + node.contents = buffer.subarray(offset, offset + length); + node.usedBytes = length; + return length; + } else if (node.usedBytes === 0) { // If this first write to an empty file, do a fast set since we don't need to care about old data. + node.contents = new Uint8Array(buffer.subarray(offset, offset + length)); + node.usedBytes = length; + return length; + } else if (position + length <= node.usedBytes) { // Writing to an already allocated and used subrange of the file? + node.contents.set(buffer.subarray(offset, offset + length), position); + return length; } - return length; } #endif - MEMFS.ensureFlexible(node); - var contents = node.contents; - while (contents.length < position) contents.push(0); - for (var i = 0; i < length; i++) { - contents[position + i] = buffer[offset + i]; - } + // Appending to an existing file and we need to reallocate, or source data did not come as a typed array. + MEMFS.expandFileStorage(node, position+length); +#if USE_TYPED_ARRAYS == 2 + if (node.contents.subarray && buffer.subarray) node.contents.set(buffer.subarray(offset, offset + length), position); // Use typed array write if available. + else +#endif + for (var i = 0; i < length; i++) { + node.contents[position + i] = buffer[offset + i]; // Or fall back to manual write if not. + } + node.usedBytes = Math.max(node.usedBytes, position+length); return length; }, + llseek: function(stream, offset, whence) { var position = offset; if (whence === 1) { // SEEK_CUR. position += stream.position; } else if (whence === 2) { // SEEK_END. if (FS.isFile(stream.node.mode)) { - position += stream.node.contents.length; + position += stream.node.usedBytes; } } if (position < 0) { @@ -259,10 +326,8 @@ mergeInto(LibraryManager.library, { return position; }, allocate: function(stream, offset, length) { - MEMFS.ensureFlexible(stream.node); - var contents = stream.node.contents; - var limit = offset + length; - while (limit > contents.length) contents.push(0); + MEMFS.expandFileStorage(stream.node, offset + length); + stream.node.usedBytes = Math.max(stream.node.usedBytes, offset + length); }, mmap: function(stream, buffer, offset, length, position, prot, flags) { if (!FS.isFile(stream.node.mode)) { @@ -280,7 +345,7 @@ mergeInto(LibraryManager.library, { ptr = contents.byteOffset; } else { // Try to avoid unnecessary slices. - if (position > 0 || position + length < contents.length) { + if (position > 0 || position + length < stream.node.usedBytes) { if (contents.subarray) { contents = contents.subarray(position, position + length); } else { diff --git a/src/settings.js b/src/settings.js index bdb149e3..7d9d1b57 100644 --- a/src/settings.js +++ b/src/settings.js @@ -323,6 +323,10 @@ var FS_LOG = 0; // Log all FS operations. This is especially helpful when you'r // so that you can create a virtual file system with all of the required files. var CASE_INSENSITIVE_FS = 0; // If set to nonzero, the provided virtual filesystem if treated case-insensitive, like // Windows and OSX do. If set to 0, the VFS is case-sensitive, like on Linux. +var MEMFS_APPEND_TO_TYPED_ARRAYS = 0; // If set to nonzero, MEMFS will always utilize typed arrays as the backing store + // for writing to files. The default behavior is to use typed arrays for files + // when the file size doesn't change (appending writes), and for files that do + // change size, use normal JS arrays instead. var USE_BSS = 1; // https://en.wikipedia.org/wiki/.bss // When enabled, 0-initialized globals are sorted to the end of the globals list, diff --git a/tests/test_core.py b/tests/test_core.py index bcb03830..f34ba03e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4030,11 +4030,14 @@ def process(filename): src = open(path_from_root('tests', 'files.cpp'), 'r').read() mem_file = 'src.cpp.o.js.mem' - try_delete(mem_file) - self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\n'), - post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h']) - if self.emcc_args and '--memory-init-file' in self.emcc_args: - assert os.path.exists(mem_file) + orig_args = self.emcc_args + for modes in [[], ['-s', 'MEMFS_APPEND_TO_TYPED_ARRAYS=1']]: + self.emcc_args = orig_args + modes + try_delete(mem_file) + self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\n'), + post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h']) + if self.emcc_args and '--memory-init-file' in self.emcc_args: + assert os.path.exists(mem_file) def test_files_m(self): # Test for Module.stdin etc. @@ -4275,7 +4278,10 @@ def process(filename): if self.emcc_args is None: return self.skip('requires libcxx') test_path = path_from_root('tests', 'core', 'test_wprintf') src, output = (test_path + s for s in ('.c', '.out')) - self.do_run_from_file(src, output) + orig_args = self.emcc_args + for modes in [[], ['-s', 'MEMFS_APPEND_TO_TYPED_ARRAYS=1']]: + self.emcc_args = orig_args + modes + self.do_run_from_file(src, output) def test_direct_string_constant_usage(self): if self.emcc_args is None: return self.skip('requires libcxx') @@ -5108,7 +5114,7 @@ def process(filename): \'\'\' FS.createDataFile('/', 'paper.pdf', eval(Module.read('paper.pdf.js')), true, false); Module.callMain(Module.arguments); - Module.print("Data: " + JSON.stringify(FS.root.contents['filename-1.ppm'].contents.map(function(x) { return unSign(x, 8) }))); + Module.print("Data: " + JSON.stringify(MEMFS.getFileDataAsRegularArray(FS.root.contents['filename-1.ppm']).map(function(x) { return unSign(x, 8) }))); \'\'\' ) src.close() @@ -5158,7 +5164,7 @@ def process(filename): )) ).replace( '// {{POST_RUN_ADDITIONS}}', - "Module.print('Data: ' + JSON.stringify(FS.analyzePath('image.raw').object.contents));" + "Module.print('Data: ' + JSON.stringify(MEMFS.getFileDataAsRegularArray(FS.analyzePath('image.raw').object)));" ) open(filename, 'w').write(src) ''' |