aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJukka Jylänki <jujjyl@gmail.com>2014-06-05 02:00:08 +0300
committerJukka Jylänki <jujjyl@gmail.com>2014-06-05 05:54:04 +0300
commit92cab32f5ffcc00779b521588bec62f597c98aa7 (patch)
treea7c248ecf7a34dac50f326e62f8c398ec07ab2c6
parent3431eab292e0b9e2359b183fa2c954eb7e5cf7e7 (diff)
Use typed arrays instead of normal JS arrays to back files in the MEMFS filesystem for improved performance, especially when backing to IDBFS.
-rw-r--r--src/library_fs.js5
-rw-r--r--src/library_memfs.js161
-rw-r--r--src/settings.js4
-rw-r--r--tests/test_core.py22
4 files changed, 136 insertions, 56 deletions
diff --git a/src/library_fs.js b/src/library_fs.js
index 5f7f1dea..1fff6348 100644
--- a/src/library_fs.js
+++ b/src/library_fs.js
@@ -1480,6 +1480,7 @@ mergeInto(LibraryManager.library, {
// WARNING: Can't read binary files in V8's d8 or tracemonkey's js, as
// read() will try to parse UTF8.
obj.contents = intArrayFromString(Module['read'](obj.url), true);
+ obj.usedBytes = obj.contents.length;
} catch (e) {
success = false;
}
@@ -1601,6 +1602,10 @@ mergeInto(LibraryManager.library, {
node.contents = null;
node.url = properties.url;
}
+ // Add a function that defers querying the file size until it is asked the first time.
+ Object.defineProperty(node, "usedBytes", {
+ get: function() { return this.contents.length; }
+ });
// override each stream op with one that tries to force load the lazy file first
var stream_ops = {};
var keys = Object.keys(node.stream_ops);
diff --git a/src/library_memfs.js b/src/library_memfs.js
index 95c3ae65..4b56ebbb 100644
--- a/src/library_memfs.js
+++ b/src/library_memfs.js
@@ -2,11 +2,6 @@ mergeInto(LibraryManager.library, {
$MEMFS__deps: ['$FS'],
$MEMFS: {
ops_table: null,
-
- // content modes
- CONTENT_OWNING: 1, // contains a subarray into the heap, and we own it, without copying (note: someone else needs to free() it, if that is necessary)
- CONTENT_FLEXIBLE: 2, // has been modified or never set to anything, and is a flexible js array that can grow/shrink
- CONTENT_FIXED: 3, // contains some fixed-size content written into it, in a typed array
mount: function(mount) {
return MEMFS.createNode(null, '/', {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, 0);
},
@@ -71,8 +66,11 @@ mergeInto(LibraryManager.library, {
} else if (FS.isFile(node.mode)) {
node.node_ops = MEMFS.ops_table.file.node;
node.stream_ops = MEMFS.ops_table.file.stream;
- node.contents = [];
- node.contentMode = MEMFS.CONTENT_FLEXIBLE;
+ node.usedBytes = 0; // The actual number of bytes used in the typed array, as opposed to contents.buffer.byteLength which gives the whole capacity.
+ // When the byte data of the file is populated, this will point to either a typed array, or a normal JS array. Typed arrays are preferred
+ // for performance, and used by default. However, typed arrays are not resizable like normal JS arrays are, so there is a small disk size
+ // penalty involved for appending file writes that continuously grow a file similar to std::vector capacity vs used -scheme.
+ node.contents = null;
} else if (FS.isLink(node.mode)) {
node.node_ops = MEMFS.ops_table.link.node;
node.stream_ops = MEMFS.ops_table.link.stream;
@@ -87,13 +85,80 @@ mergeInto(LibraryManager.library, {
}
return node;
},
- ensureFlexible: function(node) {
- if (node.contentMode !== MEMFS.CONTENT_FLEXIBLE) {
- var contents = node.contents;
- node.contents = Array.prototype.slice.call(contents);
- node.contentMode = MEMFS.CONTENT_FLEXIBLE;
+
+ // Given a file node, returns its file data converted to a regular JS array. You should treat this as read-only.
+ getFileDataAsRegularArray: function(node) {
+#if USE_TYPED_ARRAYS == 2
+ if (node.contents && node.contents.subarray) {
+ var arr = [];
+ for(var i = 0; i < node.usedBytes; ++i) arr.push(node.contents[i]);
+ return arr; // Returns a copy of the original data.
+ }
+#endif
+ return node.contents; // No-op, the file contents are already in a JS array. Return as-is.
+ },
+
+ // Allocates a new backing store for the given node so that it can fit at least newSize amount of bytes.
+ // May allocate more, to provide automatic geometric increase and amortized linear performance appending writes.
+ // Never shrinks the storage.
+ expandFileStorage: function(node, newCapacity) {
+#if USE_TYPED_ARRAYS == 2
+
+#if !MEMFS_APPEND_TO_TYPED_ARRAYS
+ // If we are asked to expand the size of a file that already exists, revert to using a standard JS array to store the file
+ // instead of a typed array. This makes resizing the array more flexible because we can just .push() elements at the back to
+ // increase the size.
+ if (node.contents && node.contents.subarray && newCapacity > node.contents.buffer.byteLength) {
+ node.contents = MEMFS.getFileDataAsRegularArray(node);
+ node.usedBytes = node.contents.length;
+ }
+#endif
+
+ if (!node.contents || node.contents.subarray) { // Keep using a typed array if creating a new storage, or if old one was a typed array as well.
+ var prevCapacity = node.contents ? node.contents.buffer.byteLength : 0;
+ if (prevCapacity >= newCapacity) return; // No need to expand, the storage was already large enough.
+ // Don't expand strictly to the given requested limit if it's only a very small increase, but instead geometrically grow capacity.
+ // For small filesizes (<1MB), perform size*2 geometric increase, but for large sizes, do a much more conservative size*1.125 increase to
+ // avoid overshooting the allocation cap by a very large margin.
+ var CAPACITY_DOUBLING_MAX = 1024 * 1024;
+ newCapacity = Math.max(newCapacity, (prevCapacity * (prevCapacity < CAPACITY_DOUBLING_MAX ? 2.0 : 1.125)) | 0);
+ if (prevCapacity != 0) newCapacity = Math.max(newCapacity, 256); // At minimum allocate 256b for each file when expanding.
+ var oldContents = node.contents;
+ node.contents = new Uint8Array(new ArrayBuffer(newCapacity)); // Allocate new storage.
+ if (node.usedBytes > 0) node.contents.set(oldContents.subarray(0, node.usedBytes), 0); // Copy old data over to the new storage.
+ return;
}
+#endif
+ // Not using a typed array to back the file storage. Use a standard JS array instead.
+ if (!node.contents && newCapacity > 0) node.contents = [];
+ while (node.contents.length < newCapacity) node.contents.push(0);
},
+
+ // Performs an exact resize of the backing file storage to the given size, if the size is not exactly this, the storage is fully reallocated.
+ resizeFileStorage: function(node, newSize) {
+ if (node.usedBytes == newSize) return;
+ if (newSize == 0) {
+ node.contents = null; // Fully decommit when requesting a resize to zero.
+ node.usedBytes = 0;
+ return;
+ }
+
+#if USE_TYPED_ARRAYS == 2
+ if (!node.contents || node.contents.subarray) { // Resize a typed array if that is being used as the backing store.
+ var oldContents = node.contents;
+ node.contents = new Uint8Array(new ArrayBuffer(newSize)); // Allocate new storage.
+ node.contents.set(oldContents.subarray(0, Math.min(newSize, node.usedBytes))); // Copy old data over to the new storage.
+ node.usedBytes = newSize;
+ return;
+ }
+#endif
+ // Backing with a JS array.
+ if (!node.contents) node.contents = [];
+ if (node.contents.length > newSize) node.contents.length = newSize;
+ else while (node.contents.length < newSize) node.contents.push(0);
+ node.usedBytes = newSize;
+ },
+
node_ops: {
getattr: function(node) {
var attr = {};
@@ -108,7 +173,7 @@ mergeInto(LibraryManager.library, {
if (FS.isDir(node.mode)) {
attr.size = 4096;
} else if (FS.isFile(node.mode)) {
- attr.size = node.contents.length;
+ attr.size = node.usedBytes;
} else if (FS.isLink(node.mode)) {
attr.size = node.link.length;
} else {
@@ -131,10 +196,7 @@ mergeInto(LibraryManager.library, {
node.timestamp = attr.timestamp;
}
if (attr.size !== undefined) {
- MEMFS.ensureFlexible(node);
- var contents = node.contents;
- if (attr.size < contents.length) contents.length = attr.size;
- else while (attr.size > contents.length) contents.push(0);
+ MEMFS.resizeFileStorage(node, attr.size);
}
},
lookup: function(parent, name) {
@@ -198,9 +260,8 @@ mergeInto(LibraryManager.library, {
stream_ops: {
read: function(stream, buffer, offset, length, position) {
var contents = stream.node.contents;
- if (position >= contents.length)
- return 0;
- var size = Math.min(contents.length - position, length);
+ if (position >= stream.node.usedBytes) return 0;
+ var size = Math.min(stream.node.usedBytes - position, length);
assert(size >= 0);
#if USE_TYPED_ARRAYS == 2
if (size > 8 && contents.subarray) { // non-trivial, and typed array
@@ -208,47 +269,53 @@ mergeInto(LibraryManager.library, {
} else
#endif
{
- for (var i = 0; i < size; i++) {
- buffer[offset + i] = contents[position + i];
- }
+ for (var i = 0; i < size; i++) buffer[offset + i] = contents[position + i];
}
return size;
},
+
+ // Writes the byte range (buffer[offset], buffer[offset+length]) to offset 'position' into the file pointed by 'stream'
write: function(stream, buffer, offset, length, position, canOwn) {
+ if (!length) return 0;
var node = stream.node;
node.timestamp = Date.now();
- var contents = node.contents;
+
#if USE_TYPED_ARRAYS == 2
- if (length && contents.length === 0 && position === 0 && buffer.subarray) {
- // just replace it with the new data
-#if ASSERTIONS
- assert(buffer.length);
-#endif
- if (canOwn && offset === 0) {
- node.contents = buffer; // this could be a subarray of Emscripten HEAP, or allocated from some other source.
- node.contentMode = (buffer.buffer === HEAP8.buffer) ? MEMFS.CONTENT_OWNING : MEMFS.CONTENT_FIXED;
- } else {
- node.contents = new Uint8Array(buffer.subarray(offset, offset+length));
- node.contentMode = MEMFS.CONTENT_FIXED;
+ if (buffer.subarray && (!node.contents || node.contents.subarray)) { // This write is from a typed array to a typed array?
+ if (canOwn) { // Can we just reuse the buffer we are given?
+ node.contents = buffer.subarray(offset, offset + length);
+ node.usedBytes = length;
+ return length;
+ } else if (node.usedBytes === 0) { // If this first write to an empty file, do a fast set since we don't need to care about old data.
+ node.contents = new Uint8Array(buffer.subarray(offset, offset + length));
+ node.usedBytes = length;
+ return length;
+ } else if (position + length <= node.usedBytes) { // Writing to an already allocated and used subrange of the file?
+ node.contents.set(buffer.subarray(offset, offset + length), position);
+ return length;
}
- return length;
}
#endif
- MEMFS.ensureFlexible(node);
- var contents = node.contents;
- while (contents.length < position) contents.push(0);
- for (var i = 0; i < length; i++) {
- contents[position + i] = buffer[offset + i];
- }
+ // Appending to an existing file and we need to reallocate, or source data did not come as a typed array.
+ MEMFS.expandFileStorage(node, position+length);
+#if USE_TYPED_ARRAYS == 2
+ if (node.contents.subarray && buffer.subarray) node.contents.set(buffer.subarray(offset, offset + length), position); // Use typed array write if available.
+ else
+#endif
+ for (var i = 0; i < length; i++) {
+ node.contents[position + i] = buffer[offset + i]; // Or fall back to manual write if not.
+ }
+ node.usedBytes = Math.max(node.usedBytes, position+length);
return length;
},
+
llseek: function(stream, offset, whence) {
var position = offset;
if (whence === 1) { // SEEK_CUR.
position += stream.position;
} else if (whence === 2) { // SEEK_END.
if (FS.isFile(stream.node.mode)) {
- position += stream.node.contents.length;
+ position += stream.node.usedBytes;
}
}
if (position < 0) {
@@ -259,10 +326,8 @@ mergeInto(LibraryManager.library, {
return position;
},
allocate: function(stream, offset, length) {
- MEMFS.ensureFlexible(stream.node);
- var contents = stream.node.contents;
- var limit = offset + length;
- while (limit > contents.length) contents.push(0);
+ MEMFS.expandFileStorage(stream.node, offset + length);
+ stream.node.usedBytes = Math.max(stream.node.usedBytes, offset + length);
},
mmap: function(stream, buffer, offset, length, position, prot, flags) {
if (!FS.isFile(stream.node.mode)) {
@@ -280,7 +345,7 @@ mergeInto(LibraryManager.library, {
ptr = contents.byteOffset;
} else {
// Try to avoid unnecessary slices.
- if (position > 0 || position + length < contents.length) {
+ if (position > 0 || position + length < stream.node.usedBytes) {
if (contents.subarray) {
contents = contents.subarray(position, position + length);
} else {
diff --git a/src/settings.js b/src/settings.js
index bdb149e3..7d9d1b57 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -323,6 +323,10 @@ var FS_LOG = 0; // Log all FS operations. This is especially helpful when you'r
// so that you can create a virtual file system with all of the required files.
var CASE_INSENSITIVE_FS = 0; // If set to nonzero, the provided virtual filesystem if treated case-insensitive, like
// Windows and OSX do. If set to 0, the VFS is case-sensitive, like on Linux.
+var MEMFS_APPEND_TO_TYPED_ARRAYS = 0; // If set to nonzero, MEMFS will always utilize typed arrays as the backing store
+ // for writing to files. The default behavior is to use typed arrays for files
+ // when the file size doesn't change (appending writes), and for files that do
+ // change size, use normal JS arrays instead.
var USE_BSS = 1; // https://en.wikipedia.org/wiki/.bss
// When enabled, 0-initialized globals are sorted to the end of the globals list,
diff --git a/tests/test_core.py b/tests/test_core.py
index bcb03830..f34ba03e 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4030,11 +4030,14 @@ def process(filename):
src = open(path_from_root('tests', 'files.cpp'), 'r').read()
mem_file = 'src.cpp.o.js.mem'
- try_delete(mem_file)
- self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\n'),
- post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h'])
- if self.emcc_args and '--memory-init-file' in self.emcc_args:
- assert os.path.exists(mem_file)
+ orig_args = self.emcc_args
+ for modes in [[], ['-s', 'MEMFS_APPEND_TO_TYPED_ARRAYS=1']]:
+ self.emcc_args = orig_args + modes
+ try_delete(mem_file)
+ self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\n'),
+ post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h'])
+ if self.emcc_args and '--memory-init-file' in self.emcc_args:
+ assert os.path.exists(mem_file)
def test_files_m(self):
# Test for Module.stdin etc.
@@ -4275,7 +4278,10 @@ def process(filename):
if self.emcc_args is None: return self.skip('requires libcxx')
test_path = path_from_root('tests', 'core', 'test_wprintf')
src, output = (test_path + s for s in ('.c', '.out'))
- self.do_run_from_file(src, output)
+ orig_args = self.emcc_args
+ for modes in [[], ['-s', 'MEMFS_APPEND_TO_TYPED_ARRAYS=1']]:
+ self.emcc_args = orig_args + modes
+ self.do_run_from_file(src, output)
def test_direct_string_constant_usage(self):
if self.emcc_args is None: return self.skip('requires libcxx')
@@ -5108,7 +5114,7 @@ def process(filename):
\'\'\'
FS.createDataFile('/', 'paper.pdf', eval(Module.read('paper.pdf.js')), true, false);
Module.callMain(Module.arguments);
- Module.print("Data: " + JSON.stringify(FS.root.contents['filename-1.ppm'].contents.map(function(x) { return unSign(x, 8) })));
+ Module.print("Data: " + JSON.stringify(MEMFS.getFileDataAsRegularArray(FS.root.contents['filename-1.ppm']).map(function(x) { return unSign(x, 8) })));
\'\'\'
)
src.close()
@@ -5158,7 +5164,7 @@ def process(filename):
))
).replace(
'// {{POST_RUN_ADDITIONS}}',
- "Module.print('Data: ' + JSON.stringify(FS.analyzePath('image.raw').object.contents));"
+ "Module.print('Data: ' + JSON.stringify(MEMFS.getFileDataAsRegularArray(FS.analyzePath('image.raw').object)));"
)
open(filename, 'w').write(src)
'''