diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/library_browser.js | 9 | ||||
-rw-r--r-- | src/library_gl.js | 227 | ||||
-rw-r--r-- | src/parseTools.js | 4 | ||||
-rw-r--r-- | src/relooper/Relooper.cpp | 27 | ||||
-rw-r--r-- | src/relooper/Relooper.h | 4 | ||||
-rw-r--r-- | src/relooper/test.txt | 4 |
6 files changed, 194 insertions, 81 deletions
diff --git a/src/library_browser.js b/src/library_browser.js index d5e35339..458a8dd2 100644 --- a/src/library_browser.js +++ b/src/library_browser.js @@ -775,6 +775,15 @@ mergeInto(LibraryManager.library, { return; } + // Signal GL rendering layer that processing of a new frame is about to start. This helps it optimize + // VBO double-buffering and reduce GPU stalls. +#if FULL_ES2 + GL.newRenderingFrameStarted(); +#endif +#if LEGACY_GL_EMULATION + GL.newRenderingFrameStarted(); +#endif + if (Module['preMainLoop']) { Module['preMainLoop'](); } diff --git a/src/library_gl.js b/src/library_gl.js index f6978c04..61ca8957 100644 --- a/src/library_gl.js +++ b/src/library_gl.js @@ -57,6 +57,7 @@ var LibraryGL = { unpackAlignment: 4, // default alignment is 4 bytes init: function() { + GL.createLog2ceilLookup(GL.MAX_TEMP_BUFFER_SIZE); Browser.moduleContextCreatedCallbacks.push(GL.initExtensions); }, @@ -81,36 +82,58 @@ var LibraryGL = { miniTempBuffer: null, miniTempBufferViews: [0], // index i has the view of size i+1 - // Large temporary buffers + // When user GL code wants to render from client-side memory, we need to upload the vertex data to a temp VBO + // for rendering. Maintain a set of temp VBOs that are created-on-demand to appropriate sizes, and never destroyed. + // Also, for best performance the VBOs are double-buffered, i.e. every second frame we switch the set of VBOs we + // upload to, so that rendering from the previous frame is not disturbed by uploading from new data to it, which + // could cause a GPU-CPU pipeline stall. + // Note that index buffers are not double-buffered (at the moment) in this manner. MAX_TEMP_BUFFER_SIZE: {{{ GL_MAX_TEMP_BUFFER_SIZE }}}, - tempBufferIndexLookup: null, - tempVertexBuffers: null, - tempIndexBuffers: null, + tempVertexBuffers1: [], + tempVertexBufferCounters1: [], + tempVertexBuffers2: [], + tempVertexBufferCounters2: [], + // Maximum number of temp VBOs of one size to maintain, after that we start reusing old ones, which is safe but can give + // a performance impact. If CPU-GPU stalls are a problem, increasing this might help. + numTempVertexBuffersPerSize: 64, // (const) + tempIndexBuffers: [], tempQuadIndexBuffer: null, - generateTempBuffers: function(quads) { - GL.tempBufferIndexLookup = new Uint8Array(GL.MAX_TEMP_BUFFER_SIZE+1); - GL.tempVertexBuffers = []; - GL.tempIndexBuffers = []; - var last = -1, curr = -1; - var size = 1; - for (var i = 0; i <= GL.MAX_TEMP_BUFFER_SIZE; i++) { - if (i > size) { - size <<= 1; + // Precompute a lookup table for the function ceil(log2(x)), i.e. how many bits are needed to represent x, or, + // if x was rounded up to next pow2, which index is the single '1' bit at? + // Then log2ceilLookup[x] returns ceil(log2(x)). + log2ceilLookup: null, + createLog2ceilLookup: function(maxValue) { + GL.log2ceilLookup = new Uint8Array(maxValue+1); + var log2 = 0; + var pow2 = 1; + GL.log2ceilLookup[0] = 0; + for(var i = 1; i <= maxValue; ++i) { + if (i > pow2) { + pow2 <<= 1; + ++log2; } - if (size != last) { - curr++; - GL.tempVertexBuffers[curr] = GLctx.createBuffer(); - GLctx.bindBuffer(GLctx.ARRAY_BUFFER, GL.tempVertexBuffers[curr]); - GLctx.bufferData(GLctx.ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW); - GLctx.bindBuffer(GLctx.ARRAY_BUFFER, null); - GL.tempIndexBuffers[curr] = GLctx.createBuffer(); - GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[curr]); - GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW); - GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, null); - last = size; + GL.log2ceilLookup[i] = log2; + } + }, + + generateTempBuffers: function(quads) { + var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE]; + GL.tempVertexBufferCounters1.length = GL.tempVertexBufferCounters2.length = largestIndex+1; + GL.tempVertexBuffers1.length = GL.tempVertexBuffers2.length = largestIndex+1; + GL.tempIndexBuffers.length = largestIndex+1; + for(var i = 0; i <= largestIndex; ++i) { + GL.tempIndexBuffers[i] = null; // Created on-demand + GL.tempVertexBufferCounters1[i] = GL.tempVertexBufferCounters2[i] = 0; + var ringbufferLength = GL.numTempVertexBuffersPerSize; + GL.tempVertexBuffers1[i] = []; + GL.tempVertexBuffers2[i] = []; + var ringbuffer1 = GL.tempVertexBuffers1[i]; + var ringbuffer2 = GL.tempVertexBuffers2[i]; + ringbuffer1.length = ringbuffer2.length = ringbufferLength; + for(var j = 0; j < ringbufferLength; ++j) { + ringbuffer1[j] = ringbuffer2[j] = null; // Created on-demand } - GL.tempBufferIndexLookup[i] = curr; } if (quads) { @@ -140,6 +163,53 @@ var LibraryGL = { } }, + getTempVertexBuffer: function getTempVertexBuffer(sizeBytes) { + var idx = GL.log2ceilLookup[sizeBytes]; + var ringbuffer = GL.tempVertexBuffers1[idx]; + var nextFreeBufferIndex = GL.tempVertexBufferCounters1[idx]; + GL.tempVertexBufferCounters1[idx] = (GL.tempVertexBufferCounters1[idx]+1) & (GL.numTempVertexBuffersPerSize-1); + var vbo = ringbuffer[nextFreeBufferIndex]; + if (vbo) { + return vbo; + } + var prevVBO = GLctx.getParameter(GLctx.ARRAY_BUFFER_BINDING); + ringbuffer[nextFreeBufferIndex] = GLctx.createBuffer(); + GLctx.bindBuffer(GLctx.ARRAY_BUFFER, ringbuffer[nextFreeBufferIndex]); + GLctx.bufferData(GLctx.ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW); + GLctx.bindBuffer(GLctx.ARRAY_BUFFER, prevVBO); + return ringbuffer[nextFreeBufferIndex]; + }, + + getTempIndexBuffer: function getTempIndexBuffer(sizeBytes) { + var idx = GL.log2ceilLookup[sizeBytes]; + var ibo = GL.tempIndexBuffers[idx]; + if (ibo) { + return ibo; + } + var prevIBO = GLctx.getParameter(GLctx.ELEMENT_ARRAY_BUFFER_BINDING); + GL.tempIndexBuffers[idx] = GLctx.createBuffer(); + GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[idx]); + GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW); + GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, prevIBO); + return GL.tempIndexBuffers[idx]; + }, + + // Called at start of each new WebGL rendering frame. This swaps the doublebuffered temp VB memory pointers, + // so that every second frame utilizes different set of temp buffers. The aim is to keep the set of buffers + // being rendered, and the set of buffers being updated disjoint. + newRenderingFrameStarted: function newRenderingFrameStarted() { + var vb = GL.tempVertexBuffers1; + GL.tempVertexBuffers1 = GL.tempVertexBuffers2; + GL.tempVertexBuffers2 = vb; + vb = GL.tempVertexBufferCounters1; + GL.tempVertexBufferCounters1 = GL.tempVertexBufferCounters2; + GL.tempVertexBufferCounters2 = vb; + var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE]; + for(var i = 0; i <= largestIndex; ++i) { + GL.tempVertexBufferCounters1[i] = 0; + } + }, + // Find a token in a shader source string findToken: function(source, token) { function isIdentChar(ch) { @@ -446,9 +516,6 @@ var LibraryGL = { preDrawHandleClientVertexAttribBindings: function preDrawHandleClientVertexAttribBindings(count) { GL.resetBufferBinding = false; - var used = GL.usedTempBuffers; - used.length = 0; - // TODO: initial pass to detect ranges we need to upload, might not need an upload per attrib for (var i = 0; i < GL.maxVertexAttribs; ++i) { var cb = GL.clientBuffers[i]; @@ -457,15 +524,7 @@ var LibraryGL = { GL.resetBufferBinding = true; var size = GL.calcBufLength(cb.size, cb.type, cb.stride, count); - var index = GL.tempBufferIndexLookup[size]; - var buf; - do { -#if ASSERTIONS - assert(index < GL.tempVertexBuffers.length); -#endif - buf = GL.tempVertexBuffers[index++]; - } while (used.indexOf(buf) >= 0); - used.push(buf); + var buf = GL.getTempVertexBuffer(size); GLctx.bindBuffer(GLctx.ARRAY_BUFFER, buf); GLctx.bufferSubData(GLctx.ARRAY_BUFFER, 0, @@ -2742,14 +2801,6 @@ var LibraryGL = { this.key0 = -1; // The key of this texture unit must be recomputed when rendering the next time. GLImmediate.currentRenderer = null; // The currently used renderer must be re-evaluated at next render. } - this.traverseState = function(keyView) { - if (this.key0 == -1) { - this.recomputeKey(); - } - keyView.next(this.key0); - keyView.next(this.key1); - keyView.next(this.key2); - }; } function CTexUnit() { @@ -2758,26 +2809,55 @@ var LibraryGL = { this.enabled_tex2D = false; this.enabled_tex3D = false; this.enabled_texCube = false; + this.texTypesEnabled = 0; // A bitfield combination of the four flags above, used for fast access to operations. this.traverseState = function CTexUnit_traverseState(keyView) { - var texUnitType = this.getTexType(); - keyView.next(texUnitType); - if (!texUnitType) return; - this.env.traverseState(keyView); + if (this.texTypesEnabled) { + if (this.env.key0 == -1) { + this.env.recomputeKey(); + } + keyView.next(this.texTypesEnabled | (this.env.key0 << 4)); + keyView.next(this.env.key1); + keyView.next(this.env.key2); + } else { + // For correctness, must traverse a zero value, theoretically a subsequent integer key could collide with this value otherwise. + keyView.next(0); + } }; }; // Class impls: CTexUnit.prototype.enabled = function CTexUnit_enabled() { - return this.getTexType() != 0; + return this.texTypesEnabled; } CTexUnit.prototype.genPassLines = function CTexUnit_genPassLines(passOutputVar, passInputVar, texUnitID) { if (!this.enabled()) { return ["vec4 " + passOutputVar + " = " + passInputVar + ";"]; } - - return this.env.genPassLines(passOutputVar, passInputVar, texUnitID); + var lines = this.env.genPassLines(passOutputVar, passInputVar, texUnitID).join('\n'); + + var texLoadLines = ''; + var texLoadRegex = /(texture.*?\(.*?\))/g; + var loadCounter = 0; + var load; + + // As an optimization, merge duplicate identical texture loads to one var. + while(load = texLoadRegex.exec(lines)) { + var texLoadExpr = load[1]; + var secondOccurrence = lines.slice(load.index+1).indexOf(texLoadExpr); + if (secondOccurrence != -1) { // And also has a second occurrence of same load expression.. + // Create new var to store the common load. + var prefix = TEXENVJIT_NAMESPACE_PREFIX + 'env' + texUnitID + "_"; + var texLoadVar = prefix + 'texload' + loadCounter++; + var texLoadLine = 'vec4 ' + texLoadVar + ' = ' + texLoadExpr + ';\n'; + texLoadLines += texLoadLine + '\n'; // Store the generated texture load statements in a temp string to not confuse regex search in progress. + lines = lines.split(texLoadExpr).join(texLoadVar); + // Reset regex search, since we modified the string. + texLoadRegex = /(texture.*\(.*\))/g; + } + } + return [texLoadLines + lines]; } CTexUnit.prototype.getTexType = function CTexUnit_getTexType() { @@ -2898,13 +2978,18 @@ var LibraryGL = { var alphaLines = this.genCombinerLines(false, alphaVar, passInputVar, texUnitID, this.alphaCombiner, this.alphaSrc, this.alphaOp); + + // Generate scale, but avoid generating an identity op that multiplies by one. + var scaledColor = (this.colorScale == 1) ? colorVar : (colorVar + " * " + valToFloatLiteral(this.colorScale)); + var scaledAlpha = (this.alphaScale == 1) ? alphaVar : (alphaVar + " * " + valToFloatLiteral(this.alphaScale)); + var line = [ "vec4 " + passOutputVar, " = ", "vec4(", - colorVar + " * " + valToFloatLiteral(this.colorScale), + scaledColor, ", ", - alphaVar + " * " + valToFloatLiteral(this.alphaScale), + scaledAlpha, ")", ";", ].join(""); @@ -3084,12 +3169,7 @@ var LibraryGL = { traverseState: function(keyView) { for (var i = 0; i < s_texUnits.length; i++) { - var texUnit = s_texUnits[i]; - var enabled = texUnit.enabled(); - keyView.next(enabled); - if (enabled) { - texUnit.traverseState(keyView); - } + s_texUnits[i].traverseState(keyView); } }, @@ -3113,24 +3193,28 @@ var LibraryGL = { if (!cur.enabled_tex1D) { GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again. cur.enabled_tex1D = true; + cur.texTypesEnabled |= 1; } break; case GL_TEXTURE_2D: if (!cur.enabled_tex2D) { GLImmediate.currentRenderer = null; cur.enabled_tex2D = true; + cur.texTypesEnabled |= 2; } break; case GL_TEXTURE_3D: if (!cur.enabled_tex3D) { GLImmediate.currentRenderer = null; cur.enabled_tex3D = true; + cur.texTypesEnabled |= 4; } break; case GL_TEXTURE_CUBE_MAP: if (!cur.enabled_texCube) { GLImmediate.currentRenderer = null; cur.enabled_texCube = true; + cur.texTypesEnabled |= 8; } break; } @@ -3143,24 +3227,28 @@ var LibraryGL = { if (cur.enabled_tex1D) { GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again. cur.enabled_tex1D = false; + cur.texTypesEnabled &= ~1; } break; case GL_TEXTURE_2D: if (cur.enabled_tex2D) { GLImmediate.currentRenderer = null; cur.enabled_tex2D = false; + cur.texTypesEnabled &= ~2; } break; case GL_TEXTURE_3D: if (cur.enabled_tex3D) { GLImmediate.currentRenderer = null; cur.enabled_tex3D = false; + cur.texTypesEnabled &= ~4; } break; case GL_TEXTURE_CUBE_MAP: if (cur.enabled_texCube) { GLImmediate.currentRenderer = null; cur.enabled_texCube = false; + cur.texTypesEnabled &= ~8; } break; } @@ -3434,7 +3522,6 @@ var LibraryGL = { // we maintain a cache of renderers, optimized to not generate garbage var attributes = GLImmediate.liveClientAttributes; var cacheMap = GLImmediate.rendererCache; - var temp; var keyView = cacheMap.getStaticKeyView().reset(); // By attrib state: @@ -3442,7 +3529,6 @@ var LibraryGL = { for (var i = 0; i < attributes.length; i++) { enabledAttributesKey |= 1 << attributes[i].name; } - keyView.next(enabledAttributesKey); // By fog state: var fogParam = 0; @@ -3459,13 +3545,17 @@ var LibraryGL = { break; } } - keyView.next(fogParam); + keyView.next((enabledAttributesKey << 2) | fogParam); +#if !GL_FFP_ONLY // By cur program: keyView.next(GL.currProgram); if (!GL.currProgram) { +#endif GLImmediate.TexEnvJIT.traverseState(keyView); +#if !GL_FFP_ONLY } +#endif // If we don't already have it, create it. var renderer = keyView.get(); @@ -3720,7 +3810,7 @@ var LibraryGL = { #if ASSERTIONS assert(end <= GL.MAX_TEMP_BUFFER_SIZE, 'too much vertex data'); #endif - arrayBuffer = GL.tempVertexBuffers[GL.tempBufferIndexLookup[end]]; + arrayBuffer = GL.getTempVertexBuffer(end); // TODO: consider using the last buffer we bound, if it was larger. downside is larger buffer, but we might avoid rebinding and preparing } else { arrayBuffer = GL.currArrayBuffer; @@ -4028,11 +4118,12 @@ var LibraryGL = { if (!Module.useWebGL) return; // a 2D canvas may be currently used TODO: make sure we are actually called in that case - GLImmediate.TexEnvJIT.init(GLctx); - // User can override the maximum number of texture units that we emulate. Using fewer texture units increases runtime performance // slightly, so it is advantageous to choose as small value as needed. GLImmediate.MAX_TEXTURES = Module['GL_MAX_TEXTURE_IMAGE_UNITS'] || GLctx.getParameter(GLctx.MAX_TEXTURE_IMAGE_UNITS); + + GLImmediate.TexEnvJIT.init(GLctx, GLImmediate.MAX_TEXTURES); + GLImmediate.NUM_ATTRIBUTES = 3 /*pos+normal+color attributes*/ + GLImmediate.MAX_TEXTURES; GLImmediate.clientAttributes = []; GLEmulation.enabledClientAttribIndices = []; @@ -4221,7 +4312,7 @@ var LibraryGL = { #if ASSERTIONS assert(numProvidedIndexes << 1 <= GL.MAX_TEMP_BUFFER_SIZE, 'too many immediate mode indexes (a)'); #endif - var indexBuffer = GL.tempIndexBuffers[GL.tempBufferIndexLookup[numProvidedIndexes << 1]]; + var indexBuffer = GL.getTempIndexBuffer(numProvidedIndexes << 1); GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, indexBuffer); GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER, 0, {{{ makeHEAPView('U16', 'ptr', 'ptr + (numProvidedIndexes << 1)') }}}); ptr = 0; @@ -4986,7 +5077,7 @@ var LibraryGL = { var buf; if (!GL.currElementArrayBuffer) { var size = GL.calcBufLength(1, type, 0, count); - buf = GL.tempIndexBuffers[GL.tempBufferIndexLookup[size]]; + buf = GL.getTempIndexBuffer(size); GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, buf); GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER, 0, diff --git a/src/parseTools.js b/src/parseTools.js index b7f97a40..be0cbcab 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -2150,9 +2150,9 @@ function makeRounding(value, bits, signed, floatConversion) { } } // Math.floor is reasonably fast if we don't care about corrections (and even correct if unsigned) - if (!correctRoundings() || !signed) return 'Math_floor(' + value + ')'; + if (!correctRoundings() || !signed) return '(+Math_floor(' + value + '))'; // We are left with >32 bits - return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? Math_floor(VALUE) : Math_ceil(VALUE)', value, 'tempBigIntR'); + return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? +Math_floor(VALUE) : +Math_ceil(VALUE)', value, 'tempBigIntR'); } } diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp index d5772c62..204986da 100644 --- a/src/relooper/Relooper.cpp +++ b/src/relooper/Relooper.cpp @@ -322,12 +322,26 @@ void MultipleShape::RenderLoopPostfix() { void MultipleShape::Render(bool InLoop) { RenderLoopPrefix(); - bool First = true; + + // We know that blocks with the same Id were split from the same source, so their contents are identical and they are logically the same, so re-merge them here + typedef std::map<int, Shape*> IdShapeMap; + IdShapeMap IdMap; for (BlockShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) { + int Id = iter->first->Id; + IdShapeMap::iterator Test = IdMap.find(Id); + if (Test != IdMap.end()) { + assert(Shape::IsSimple(iter->second) && Shape::IsSimple(Test->second)); // we can only merge simple blocks, something horrible has gone wrong if we see anything else + continue; + } + IdMap[iter->first->Id] = iter->second; + } + + bool First = true; + for (IdShapeMap::iterator iter = IdMap.begin(); iter != IdMap.end(); iter++) { if (AsmJS) { - PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first->Id); + PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first); } else { - PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first->Id); + PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first); } First = false; Indenter::Indent(); @@ -391,8 +405,8 @@ Relooper::~Relooper() { for (unsigned i = 0; i < Shapes.size(); i++) delete Shapes[i]; } -void Relooper::AddBlock(Block *New) { - New->Id = BlockIdCounter++; +void Relooper::AddBlock(Block *New, int Id) { + New->Id = Id == -1 ? BlockIdCounter++ : Id; Blocks.push_back(New); } @@ -446,8 +460,7 @@ void Relooper::Calculate(Block *Entry) { for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { Block *Prior = *iter; Block *Split = new Block(Original->Code, Original->BranchVar); - Parent->AddBlock(Split); - PrintDebug(" to %d\n", Split->Id); + Parent->AddBlock(Split, Original->Id); Split->BranchesIn.insert(Prior); Branch *Details = Prior->BranchesOut[Original]; Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code); diff --git a/src/relooper/Relooper.h b/src/relooper/Relooper.h index 6b9394db..85adf359 100644 --- a/src/relooper/Relooper.h +++ b/src/relooper/Relooper.h @@ -57,7 +57,7 @@ struct Block { BlockBranchMap ProcessedBranchesOut; BlockSet ProcessedBranchesIn; Shape *Parent; // The shape we are directly inside - int Id; // A unique identifier, defined when added to relooper + int Id; // A unique identifier, defined when added to relooper. Note that this uniquely identifies a *logical* block - if we split it, the two instances have the same content *and* the same Id const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input) const char *BranchVar; // If we have more than one branch out, the variable whose value determines where we go bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable @@ -191,7 +191,7 @@ struct Relooper { Relooper(); ~Relooper(); - void AddBlock(Block *New); + void AddBlock(Block *New, int Id=-1); // Calculates the shapes void Calculate(Block *Entry); diff --git a/src/relooper/test.txt b/src/relooper/test.txt index cb02b867..82b02ad7 100644 --- a/src/relooper/test.txt +++ b/src/relooper/test.txt @@ -91,7 +91,7 @@ } default: { var $x_1 = $x_0; - label = 8; + label = 7; break L1; } } @@ -106,7 +106,7 @@ } } } - if (label == 8) { + if (label == 7) { // code 7 } // code 4 |