aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJukka Jylänki <jujjyl@gmail.com>2014-01-15 17:25:53 +0200
committerJukka Jylänki <jujjyl@gmail.com>2014-01-15 17:31:19 +0200
commit4a2fd2a412f2406c190d78d3e2a8e4676165aa93 (patch)
tree41017967996b5dcb7e65678a0a1dfd3627a8f07c
parentaba027c6e69d3e2750d0644e8967879bfc4df0a9 (diff)
Optimize rendering from client-side memory by maintaining a set of doublebuffered VBOs. This improves performance by reducing CPU-GPU pipeline stalls.
-rw-r--r--src/library_browser.js9
-rw-r--r--src/library_gl.js139
2 files changed, 108 insertions, 40 deletions
diff --git a/src/library_browser.js b/src/library_browser.js
index d5e35339..458a8dd2 100644
--- a/src/library_browser.js
+++ b/src/library_browser.js
@@ -775,6 +775,15 @@ mergeInto(LibraryManager.library, {
return;
}
+ // Signal GL rendering layer that processing of a new frame is about to start. This helps it optimize
+ // VBO double-buffering and reduce GPU stalls.
+#if FULL_ES2
+ GL.newRenderingFrameStarted();
+#endif
+#if LEGACY_GL_EMULATION
+ GL.newRenderingFrameStarted();
+#endif
+
if (Module['preMainLoop']) {
Module['preMainLoop']();
}
diff --git a/src/library_gl.js b/src/library_gl.js
index a4e66b47..899638e8 100644
--- a/src/library_gl.js
+++ b/src/library_gl.js
@@ -57,6 +57,7 @@ var LibraryGL = {
unpackAlignment: 4, // default alignment is 4 bytes
init: function() {
+ GL.createLog2ceilLookup(GL.MAX_TEMP_BUFFER_SIZE);
Browser.moduleContextCreatedCallbacks.push(GL.initExtensions);
},
@@ -81,36 +82,58 @@ var LibraryGL = {
miniTempBuffer: null,
miniTempBufferViews: [0], // index i has the view of size i+1
- // Large temporary buffers
+ // When user GL code wants to render from client-side memory, we need to upload the vertex data to a temp VBO
+ // for rendering. Maintain a set of temp VBOs that are created-on-demand to appropriate sizes, and never destroyed.
+ // Also, for best performance the VBOs are double-buffered, i.e. every second frame we switch the set of VBOs we
+ // upload to, so that rendering from the previous frame is not disturbed by uploading from new data to it, which
+ // could cause a GPU-CPU pipeline stall.
+ // Note that index buffers are not double-buffered (at the moment) in this manner.
MAX_TEMP_BUFFER_SIZE: {{{ GL_MAX_TEMP_BUFFER_SIZE }}},
- tempBufferIndexLookup: null,
- tempVertexBuffers: null,
- tempIndexBuffers: null,
+ tempVertexBuffers1: [],
+ tempVertexBufferCounters1: [],
+ tempVertexBuffers2: [],
+ tempVertexBufferCounters2: [],
+ // Maximum number of temp VBOs of one size to maintain, after that we start reusing old ones, which is safe but can give
+ // a performance impact. If CPU-GPU stalls are a problem, increasing this might help.
+ numTempVertexBuffersPerSize: 64, // (const)
+ tempIndexBuffers: [],
tempQuadIndexBuffer: null,
- generateTempBuffers: function(quads) {
- GL.tempBufferIndexLookup = new Uint8Array(GL.MAX_TEMP_BUFFER_SIZE+1);
- GL.tempVertexBuffers = [];
- GL.tempIndexBuffers = [];
- var last = -1, curr = -1;
- var size = 1;
- for (var i = 0; i <= GL.MAX_TEMP_BUFFER_SIZE; i++) {
- if (i > size) {
- size <<= 1;
+ // Precompute a lookup table for the function ceil(log2(x)), i.e. how many bits are needed to represent x, or,
+ // if x was rounded up to next pow2, which index is the single '1' bit at?
+ // Then log2ceilLookup[x] returns ceil(log2(x)).
+ log2ceilLookup: null,
+ createLog2ceilLookup: function(maxValue) {
+ GL.log2ceilLookup = new Uint8Array(maxValue+1);
+ var log2 = 0;
+ var pow2 = 1;
+ GL.log2ceilLookup[0] = 0;
+ for(var i = 1; i <= maxValue; ++i) {
+ if (i > pow2) {
+ pow2 <<= 1;
+ ++log2;
}
- if (size != last) {
- curr++;
- GL.tempVertexBuffers[curr] = GLctx.createBuffer();
- GLctx.bindBuffer(GLctx.ARRAY_BUFFER, GL.tempVertexBuffers[curr]);
- GLctx.bufferData(GLctx.ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
- GLctx.bindBuffer(GLctx.ARRAY_BUFFER, null);
- GL.tempIndexBuffers[curr] = GLctx.createBuffer();
- GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[curr]);
- GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
- GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, null);
- last = size;
+ GL.log2ceilLookup[i] = log2;
+ }
+ },
+
+ generateTempBuffers: function(quads) {
+ var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+ GL.tempVertexBufferCounters1.length = GL.tempVertexBufferCounters2.length = largestIndex+1;
+ GL.tempVertexBuffers1.length = GL.tempVertexBuffers2.length = largestIndex+1;
+ GL.tempIndexBuffers.length = largestIndex+1;
+ for(var i = 0; i <= largestIndex; ++i) {
+ GL.tempIndexBuffers[i] = null; // Created on-demand
+ GL.tempVertexBufferCounters1[i] = GL.tempVertexBufferCounters2[i] = 0;
+ var ringbufferLength = GL.numTempVertexBuffersPerSize;
+ GL.tempVertexBuffers1[i] = [];
+ GL.tempVertexBuffers2[i] = [];
+ var ringbuffer1 = GL.tempVertexBuffers1[i];
+ var ringbuffer2 = GL.tempVertexBuffers2[i];
+ ringbuffer1.length = ringbuffer2.length = ringbufferLength;
+ for(var j = 0; j < ringbufferLength; ++j) {
+ ringbuffer1[j] = ringbuffer2[j] = null; // Created on-demand
}
- GL.tempBufferIndexLookup[i] = curr;
}
if (quads) {
@@ -140,6 +163,53 @@ var LibraryGL = {
}
},
+ getTempVertexBuffer: function getTempVertexBuffer(sizeBytes) {
+ var idx = GL.log2ceilLookup[sizeBytes];
+ var ringbuffer = GL.tempVertexBuffers1[idx];
+ var nextFreeBufferIndex = GL.tempVertexBufferCounters1[idx];
+ GL.tempVertexBufferCounters1[idx] = (GL.tempVertexBufferCounters1[idx]+1) & (GL.numTempVertexBuffersPerSize-1);
+ var vbo = ringbuffer[nextFreeBufferIndex];
+ if (vbo) {
+ return vbo;
+ }
+ var prevVBO = GLctx.getParameter(GLctx.ARRAY_BUFFER_BINDING);
+ ringbuffer[nextFreeBufferIndex] = GLctx.createBuffer();
+ GLctx.bindBuffer(GLctx.ARRAY_BUFFER, ringbuffer[nextFreeBufferIndex]);
+ GLctx.bufferData(GLctx.ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+ GLctx.bindBuffer(GLctx.ARRAY_BUFFER, prevVBO);
+ return ringbuffer[nextFreeBufferIndex];
+ },
+
+ getTempIndexBuffer: function getTempIndexBuffer(sizeBytes) {
+ var idx = GL.log2ceilLookup[sizeBytes];
+ var ibo = GL.tempIndexBuffers[idx];
+ if (ibo) {
+ return ibo;
+ }
+ var prevIBO = GLctx.getParameter(GLctx.ELEMENT_ARRAY_BUFFER_BINDING);
+ GL.tempIndexBuffers[idx] = GLctx.createBuffer();
+ GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[idx]);
+ GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+ GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, prevIBO);
+ return GL.tempIndexBuffers[idx];
+ },
+
+ // Called at start of each new WebGL rendering frame. This swaps the doublebuffered temp VB memory pointers,
+ // so that every second frame utilizes different set of temp buffers. The aim is to keep the set of buffers
+ // being rendered, and the set of buffers being updated disjoint.
+ newRenderingFrameStarted: function newRenderingFrameStarted() {
+ var vb = GL.tempVertexBuffers1;
+ GL.tempVertexBuffers1 = GL.tempVertexBuffers2;
+ GL.tempVertexBuffers2 = vb;
+ vb = GL.tempVertexBufferCounters1;
+ GL.tempVertexBufferCounters1 = GL.tempVertexBufferCounters2;
+ GL.tempVertexBufferCounters2 = vb;
+ var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+ for(var i = 0; i <= largestIndex; ++i) {
+ GL.tempVertexBufferCounters1[i] = 0;
+ }
+ },
+
// Find a token in a shader source string
findToken: function(source, token) {
function isIdentChar(ch) {
@@ -446,9 +516,6 @@ var LibraryGL = {
preDrawHandleClientVertexAttribBindings: function preDrawHandleClientVertexAttribBindings(count) {
GL.resetBufferBinding = false;
- var used = GL.usedTempBuffers;
- used.length = 0;
-
// TODO: initial pass to detect ranges we need to upload, might not need an upload per attrib
for (var i = 0; i < GL.maxVertexAttribs; ++i) {
var cb = GL.clientBuffers[i];
@@ -457,15 +524,7 @@ var LibraryGL = {
GL.resetBufferBinding = true;
var size = GL.calcBufLength(cb.size, cb.type, cb.stride, count);
- var index = GL.tempBufferIndexLookup[size];
- var buf;
- do {
-#if ASSERTIONS
- assert(index < GL.tempVertexBuffers.length);
-#endif
- buf = GL.tempVertexBuffers[index++];
- } while (used.indexOf(buf) >= 0);
- used.push(buf);
+ var buf = GL.getTempVertexBuffer(size);
GLctx.bindBuffer(GLctx.ARRAY_BUFFER, buf);
GLctx.bufferSubData(GLctx.ARRAY_BUFFER,
0,
@@ -3725,7 +3784,7 @@ var LibraryGL = {
#if ASSERTIONS
assert(end <= GL.MAX_TEMP_BUFFER_SIZE, 'too much vertex data');
#endif
- arrayBuffer = GL.tempVertexBuffers[GL.tempBufferIndexLookup[end]];
+ arrayBuffer = GL.getTempVertexBuffer(end);
// TODO: consider using the last buffer we bound, if it was larger. downside is larger buffer, but we might avoid rebinding and preparing
} else {
arrayBuffer = GL.currArrayBuffer;
@@ -4227,7 +4286,7 @@ var LibraryGL = {
#if ASSERTIONS
assert(numProvidedIndexes << 1 <= GL.MAX_TEMP_BUFFER_SIZE, 'too many immediate mode indexes (a)');
#endif
- var indexBuffer = GL.tempIndexBuffers[GL.tempBufferIndexLookup[numProvidedIndexes << 1]];
+ var indexBuffer = GL.getTempIndexBuffer(numProvidedIndexes << 1);
GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, indexBuffer);
GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER, 0, {{{ makeHEAPView('U16', 'ptr', 'ptr + (numProvidedIndexes << 1)') }}});
ptr = 0;
@@ -4992,7 +5051,7 @@ var LibraryGL = {
var buf;
if (!GL.currElementArrayBuffer) {
var size = GL.calcBufLength(1, type, 0, count);
- buf = GL.tempIndexBuffers[GL.tempBufferIndexLookup[size]];
+ buf = GL.getTempIndexBuffer(size);
GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, buf);
GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER,
0,