6 files changed, 194 insertions, 81 deletions
diff --git a/src/library_browser.js b/src/library_browser.js
index d5e35339..458a8dd2 100644
--- a/src/library_browser.js
+++ b/src/library_browser.js
@@ -775,6 +775,15 @@ mergeInto(LibraryManager.library, {
         return;
       }
 
+      // Signal GL rendering layer that processing of a new frame is about to start. This helps it optimize
+      // VBO double-buffering and reduce GPU stalls.
+#if FULL_ES2
+      GL.newRenderingFrameStarted();
+#endif
+#if LEGACY_GL_EMULATION
+      GL.newRenderingFrameStarted();
+#endif
+
       if (Module['preMainLoop']) {
         Module['preMainLoop']();
       }
diff --git a/src/library_gl.js b/src/library_gl.js
index f6978c04..61ca8957 100644
--- a/src/library_gl.js
+++ b/src/library_gl.js
@@ -57,6 +57,7 @@ var LibraryGL = {
     unpackAlignment: 4, // default alignment is 4 bytes
 
     init: function() {
+      GL.createLog2ceilLookup(GL.MAX_TEMP_BUFFER_SIZE);
       Browser.moduleContextCreatedCallbacks.push(GL.initExtensions);
     },
 
@@ -81,36 +82,58 @@ var LibraryGL = {
     miniTempBuffer: null,
     miniTempBufferViews: [0], // index i has the view of size i+1
 
-    // Large temporary buffers
+    // When user GL code wants to render from client-side memory, we need to upload the vertex data to a temp VBO
+    // for rendering. Maintain a set of temp VBOs that are created-on-demand to appropriate sizes, and never destroyed.
+    // Also, for best performance the VBOs are double-buffered, i.e. every second frame we switch the set of VBOs we
+    // upload to, so that rendering from the previous frame is not disturbed by uploading from new data to it, which
+    // could cause a GPU-CPU pipeline stall.
+    // Note that index buffers are not double-buffered (at the moment) in this manner.
     MAX_TEMP_BUFFER_SIZE: {{{ GL_MAX_TEMP_BUFFER_SIZE }}},
-    tempBufferIndexLookup: null,
-    tempVertexBuffers: null,
-    tempIndexBuffers: null,
+    tempVertexBuffers1: [],
+    tempVertexBufferCounters1: [],
+    tempVertexBuffers2: [],
+    tempVertexBufferCounters2: [],
+    // Maximum number of temp VBOs of one size to maintain, after that we start reusing old ones, which is safe but can give
+    // a performance impact. If CPU-GPU stalls are a problem, increasing this might help.
+    numTempVertexBuffersPerSize: 64, // (const)
+    tempIndexBuffers: [],
     tempQuadIndexBuffer: null,
 
-    generateTempBuffers: function(quads) {
-      GL.tempBufferIndexLookup = new Uint8Array(GL.MAX_TEMP_BUFFER_SIZE+1);
-      GL.tempVertexBuffers = [];
-      GL.tempIndexBuffers = [];
-      var last = -1, curr = -1;
-      var size = 1;
-      for (var i = 0; i <= GL.MAX_TEMP_BUFFER_SIZE; i++) {
-        if (i > size) {
-          size <<= 1;
+    // Precompute a lookup table for the function ceil(log2(x)), i.e. how many bits are needed to represent x, or,
+    // if x was rounded up to next pow2, which index is the single '1' bit at?
+    // Then log2ceilLookup[x] returns ceil(log2(x)).
+    log2ceilLookup: null,
+    createLog2ceilLookup: function(maxValue) {
+      GL.log2ceilLookup = new Uint8Array(maxValue+1);
+      var log2 = 0;
+      var pow2 = 1;
+      GL.log2ceilLookup[0] = 0;
+      for(var i = 1; i <= maxValue; ++i) {
+        if (i > pow2) {
+          pow2 <<= 1;
+          ++log2;
         }
-        if (size != last) {
-          curr++;
-          GL.tempVertexBuffers[curr] = GLctx.createBuffer();
-          GLctx.bindBuffer(GLctx.ARRAY_BUFFER, GL.tempVertexBuffers[curr]);
-          GLctx.bufferData(GLctx.ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
-          GLctx.bindBuffer(GLctx.ARRAY_BUFFER, null);
-          GL.tempIndexBuffers[curr] = GLctx.createBuffer();
-          GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[curr]);
-          GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
-          GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, null);
-          last = size;
+        GL.log2ceilLookup[i] = log2;
+      }
+    },
+
+    generateTempBuffers: function(quads) {
+      var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+      GL.tempVertexBufferCounters1.length = GL.tempVertexBufferCounters2.length = largestIndex+1;
+      GL.tempVertexBuffers1.length = GL.tempVertexBuffers2.length = largestIndex+1;
+      GL.tempIndexBuffers.length = largestIndex+1;
+      for(var i = 0; i <= largestIndex; ++i) {
+        GL.tempIndexBuffers[i] = null; // Created on-demand
+        GL.tempVertexBufferCounters1[i] = GL.tempVertexBufferCounters2[i] = 0;
+        var ringbufferLength = GL.numTempVertexBuffersPerSize;
+        GL.tempVertexBuffers1[i] = [];
+        GL.tempVertexBuffers2[i] = [];
+        var ringbuffer1 = GL.tempVertexBuffers1[i];
+        var ringbuffer2 = GL.tempVertexBuffers2[i];
+        ringbuffer1.length = ringbuffer2.length = ringbufferLength;
+        for(var j = 0; j < ringbufferLength; ++j) {
+          ringbuffer1[j] = ringbuffer2[j] = null; // Created on-demand
         }
-        GL.tempBufferIndexLookup[i] = curr;
       }
 
       if (quads) {
@@ -140,6 +163,53 @@ var LibraryGL = {
       }
     },
 
+    getTempVertexBuffer: function getTempVertexBuffer(sizeBytes) {
+      var idx = GL.log2ceilLookup[sizeBytes];
+      var ringbuffer = GL.tempVertexBuffers1[idx];
+      var nextFreeBufferIndex = GL.tempVertexBufferCounters1[idx];
+      GL.tempVertexBufferCounters1[idx] = (GL.tempVertexBufferCounters1[idx]+1) & (GL.numTempVertexBuffersPerSize-1);
+      var vbo = ringbuffer[nextFreeBufferIndex];
+      if (vbo) {
+        return vbo;
+      }
+      var prevVBO = GLctx.getParameter(GLctx.ARRAY_BUFFER_BINDING);
+      ringbuffer[nextFreeBufferIndex] = GLctx.createBuffer();
+      GLctx.bindBuffer(GLctx.ARRAY_BUFFER, ringbuffer[nextFreeBufferIndex]);
+      GLctx.bufferData(GLctx.ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+      GLctx.bindBuffer(GLctx.ARRAY_BUFFER, prevVBO);
+      return ringbuffer[nextFreeBufferIndex];
+    },
+
+    getTempIndexBuffer: function getTempIndexBuffer(sizeBytes) {
+      var idx = GL.log2ceilLookup[sizeBytes];
+      var ibo = GL.tempIndexBuffers[idx];
+      if (ibo) {
+        return ibo;
+      }
+      var prevIBO = GLctx.getParameter(GLctx.ELEMENT_ARRAY_BUFFER_BINDING);
+      GL.tempIndexBuffers[idx] = GLctx.createBuffer();
+      GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[idx]);
+      GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+      GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, prevIBO);
+      return GL.tempIndexBuffers[idx];
+    },
+
+    // Called at start of each new WebGL rendering frame. This swaps the doublebuffered temp VB memory pointers,
+    // so that every second frame utilizes different set of temp buffers. The aim is to keep the set of buffers
+    // being rendered, and the set of buffers being updated disjoint.
+    newRenderingFrameStarted: function newRenderingFrameStarted() {
+      var vb = GL.tempVertexBuffers1;
+      GL.tempVertexBuffers1 = GL.tempVertexBuffers2;
+      GL.tempVertexBuffers2 = vb;
+      vb = GL.tempVertexBufferCounters1;
+      GL.tempVertexBufferCounters1 = GL.tempVertexBufferCounters2;
+      GL.tempVertexBufferCounters2 = vb;
+      var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+      for(var i = 0; i <= largestIndex; ++i) {
+        GL.tempVertexBufferCounters1[i] = 0;
+      }
+    },
+
     // Find a token in a shader source string
     findToken: function(source, token) {
       function isIdentChar(ch) {
@@ -446,9 +516,6 @@ var LibraryGL = {
     preDrawHandleClientVertexAttribBindings: function preDrawHandleClientVertexAttribBindings(count) {
       GL.resetBufferBinding = false;
 
-      var used = GL.usedTempBuffers;
-      used.length = 0;
-
       // TODO: initial pass to detect ranges we need to upload, might not need an upload per attrib
       for (var i = 0; i < GL.maxVertexAttribs; ++i) {
         var cb = GL.clientBuffers[i];
@@ -457,15 +524,7 @@ var LibraryGL = {
         GL.resetBufferBinding = true;
 
         var size = GL.calcBufLength(cb.size, cb.type, cb.stride, count);
-        var index = GL.tempBufferIndexLookup[size];
-        var buf;
-        do {
-#if ASSERTIONS
-          assert(index < GL.tempVertexBuffers.length);
-#endif
-          buf = GL.tempVertexBuffers[index++];
-        } while (used.indexOf(buf) >= 0);
-        used.push(buf);
+        var buf = GL.getTempVertexBuffer(size);
         GLctx.bindBuffer(GLctx.ARRAY_BUFFER, buf);
         GLctx.bufferSubData(GLctx.ARRAY_BUFFER,
                                  0,
@@ -2742,14 +2801,6 @@ var LibraryGL = {
           this.key0 = -1; // The key of this texture unit must be recomputed when rendering the next time.
           GLImmediate.currentRenderer = null; // The currently used renderer must be re-evaluated at next render.
         }
-        this.traverseState = function(keyView) {
-          if (this.key0 == -1) {
-            this.recomputeKey();
-          }
-          keyView.next(this.key0);
-          keyView.next(this.key1);
-          keyView.next(this.key2);
-        };
       }
 
       function CTexUnit() {
@@ -2758,26 +2809,55 @@ var LibraryGL = {
         this.enabled_tex2D   = false;
         this.enabled_tex3D   = false;
         this.enabled_texCube = false;
+        this.texTypesEnabled = 0; // A bitfield combination of the four flags above, used for fast access to operations.
 
         this.traverseState = function CTexUnit_traverseState(keyView) {
-          var texUnitType = this.getTexType();
-          keyView.next(texUnitType);
-          if (!texUnitType) return;
-          this.env.traverseState(keyView);
+          if (this.texTypesEnabled) {
+            if (this.env.key0 == -1) {
+              this.env.recomputeKey();
+            }
+            keyView.next(this.texTypesEnabled | (this.env.key0 << 4));
+            keyView.next(this.env.key1);
+            keyView.next(this.env.key2);
+          } else {
+            // For correctness, must traverse a zero value, theoretically a subsequent integer key could collide with this value otherwise.
+            keyView.next(0);
+          }
         };
       };
 
       // Class impls:
       CTexUnit.prototype.enabled = function CTexUnit_enabled() {
-        return this.getTexType() != 0;
+        return this.texTypesEnabled;
       }
 
       CTexUnit.prototype.genPassLines = function CTexUnit_genPassLines(passOutputVar, passInputVar, texUnitID) {
         if (!this.enabled()) {
           return ["vec4 " + passOutputVar + " = " + passInputVar + ";"];
         }
-
-        return this.env.genPassLines(passOutputVar, passInputVar, texUnitID);
+        var lines = this.env.genPassLines(passOutputVar, passInputVar, texUnitID).join('\n');
+
+        var texLoadLines = '';
+        var texLoadRegex = /(texture.*?\(.*?\))/g;
+        var loadCounter = 0;
+        var load;
+
+        // As an optimization, merge duplicate identical texture loads to one var.
+        while(load = texLoadRegex.exec(lines)) {
+          var texLoadExpr = load[1];
+          var secondOccurrence = lines.slice(load.index+1).indexOf(texLoadExpr);
+          if (secondOccurrence != -1) { // And also has a second occurrence of same load expression..
+            // Create new var to store the common load.
+            var prefix = TEXENVJIT_NAMESPACE_PREFIX + 'env' + texUnitID + "_";
+            var texLoadVar = prefix + 'texload' + loadCounter++;
+            var texLoadLine = 'vec4 ' + texLoadVar + ' = ' + texLoadExpr + ';\n';
+            texLoadLines += texLoadLine + '\n'; // Store the generated texture load statements in a temp string to not confuse regex search in progress.
+            lines = lines.split(texLoadExpr).join(texLoadVar);
+            // Reset regex search, since we modified the string.
+            texLoadRegex = /(texture.*\(.*\))/g;
+          }
+        }
+        return [texLoadLines + lines];
       }
 
       CTexUnit.prototype.getTexType = function CTexUnit_getTexType() {
@@ -2898,13 +2978,18 @@ var LibraryGL = {
             var alphaLines = this.genCombinerLines(false, alphaVar,
                                                    passInputVar, texUnitID,
                                                    this.alphaCombiner, this.alphaSrc, this.alphaOp);
+
+            // Generate scale, but avoid generating an identity op that multiplies by one.
+            var scaledColor = (this.colorScale == 1) ? colorVar : (colorVar + " * " + valToFloatLiteral(this.colorScale));
+            var scaledAlpha = (this.alphaScale == 1) ? alphaVar : (alphaVar + " * " + valToFloatLiteral(this.alphaScale));
+
             var line = [
               "vec4 " + passOutputVar,
               " = ",
                 "vec4(",
-                    colorVar + " * " + valToFloatLiteral(this.colorScale),
+                    scaledColor,
                     ", ",
-                    alphaVar + " * " + valToFloatLiteral(this.alphaScale),
+                    scaledAlpha,
                 ")",
               ";",
             ].join("");
@@ -3084,12 +3169,7 @@ var LibraryGL = {
 
         traverseState: function(keyView) {
           for (var i = 0; i < s_texUnits.length; i++) {
-            var texUnit = s_texUnits[i];
-            var enabled = texUnit.enabled();
-            keyView.next(enabled);
-            if (enabled) {
-              texUnit.traverseState(keyView);
-            }
+            s_texUnits[i].traverseState(keyView);
           }
         },
 
@@ -3113,24 +3193,28 @@ var LibraryGL = {
               if (!cur.enabled_tex1D) {
                 GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again.
                 cur.enabled_tex1D = true;
+                cur.texTypesEnabled |= 1;
               }
               break;
             case GL_TEXTURE_2D:
               if (!cur.enabled_tex2D) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_tex2D = true;
+                cur.texTypesEnabled |= 2;
               }
               break;
             case GL_TEXTURE_3D:
               if (!cur.enabled_tex3D) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_tex3D = true;
+                cur.texTypesEnabled |= 4;
               }
               break;
             case GL_TEXTURE_CUBE_MAP:
               if (!cur.enabled_texCube) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_texCube = true;
+                cur.texTypesEnabled |= 8;
               }
               break;
           }
@@ -3143,24 +3227,28 @@ var LibraryGL = {
               if (cur.enabled_tex1D) {
                 GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again.
                 cur.enabled_tex1D = false;
+                cur.texTypesEnabled &= ~1;
               }
               break;
             case GL_TEXTURE_2D:
               if (cur.enabled_tex2D) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_tex2D = false;
+                cur.texTypesEnabled &= ~2;
               }
               break;
             case GL_TEXTURE_3D:
               if (cur.enabled_tex3D) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_tex3D = false;
+                cur.texTypesEnabled &= ~4;
               }
               break;
             case GL_TEXTURE_CUBE_MAP:
               if (cur.enabled_texCube) {
                 GLImmediate.currentRenderer = null;
                 cur.enabled_texCube = false;
+                cur.texTypesEnabled &= ~8;
               }
               break;
           }
@@ -3434,7 +3522,6 @@ var LibraryGL = {
       // we maintain a cache of renderers, optimized to not generate garbage
       var attributes = GLImmediate.liveClientAttributes;
       var cacheMap = GLImmediate.rendererCache;
-      var temp;
       var keyView = cacheMap.getStaticKeyView().reset();
 
       // By attrib state:
@@ -3442,7 +3529,6 @@ var LibraryGL = {
       for (var i = 0; i < attributes.length; i++) {
         enabledAttributesKey |= 1 << attributes[i].name;
       }
-      keyView.next(enabledAttributesKey);
 
       // By fog state:
       var fogParam = 0;
@@ -3459,13 +3545,17 @@ var LibraryGL = {
             break;
         }
       }
-      keyView.next(fogParam);
+      keyView.next((enabledAttributesKey << 2) | fogParam);
 
+#if !GL_FFP_ONLY
       // By cur program:
       keyView.next(GL.currProgram);
       if (!GL.currProgram) {
+#endif
         GLImmediate.TexEnvJIT.traverseState(keyView);
+#if !GL_FFP_ONLY
       }
+#endif
 
       // If we don't already have it, create it.
       var renderer = keyView.get();
@@ -3720,7 +3810,7 @@ var LibraryGL = {
 #if ASSERTIONS
             assert(end <= GL.MAX_TEMP_BUFFER_SIZE, 'too much vertex data');
 #endif
-            arrayBuffer = GL.tempVertexBuffers[GL.tempBufferIndexLookup[end]];
+            arrayBuffer = GL.getTempVertexBuffer(end);
             // TODO: consider using the last buffer we bound, if it was larger. downside is larger buffer, but we might avoid rebinding and preparing
           } else {
             arrayBuffer = GL.currArrayBuffer;
@@ -4028,11 +4118,12 @@ var LibraryGL = {
 
       if (!Module.useWebGL) return; // a 2D canvas may be currently used TODO: make sure we are actually called in that case
 
-      GLImmediate.TexEnvJIT.init(GLctx);
-
       // User can override the maximum number of texture units that we emulate. Using fewer texture units increases runtime performance
       // slightly, so it is advantageous to choose as small value as needed.
       GLImmediate.MAX_TEXTURES = Module['GL_MAX_TEXTURE_IMAGE_UNITS'] || GLctx.getParameter(GLctx.MAX_TEXTURE_IMAGE_UNITS);
+
+      GLImmediate.TexEnvJIT.init(GLctx, GLImmediate.MAX_TEXTURES);
+
       GLImmediate.NUM_ATTRIBUTES = 3 /*pos+normal+color attributes*/ + GLImmediate.MAX_TEXTURES;
       GLImmediate.clientAttributes = [];
       GLEmulation.enabledClientAttribIndices = [];
@@ -4221,7 +4312,7 @@ var LibraryGL = {
 #if ASSERTIONS
           assert(numProvidedIndexes << 1 <= GL.MAX_TEMP_BUFFER_SIZE, 'too many immediate mode indexes (a)');
 #endif
-          var indexBuffer = GL.tempIndexBuffers[GL.tempBufferIndexLookup[numProvidedIndexes << 1]];
+          var indexBuffer = GL.getTempIndexBuffer(numProvidedIndexes << 1);
           GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, indexBuffer);
           GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER, 0, {{{ makeHEAPView('U16', 'ptr', 'ptr + (numProvidedIndexes << 1)') }}});
           ptr = 0;
@@ -4986,7 +5077,7 @@ var LibraryGL = {
     var buf;
     if (!GL.currElementArrayBuffer) {
       var size = GL.calcBufLength(1, type, 0, count);
-      buf = GL.tempIndexBuffers[GL.tempBufferIndexLookup[size]];
+      buf = GL.getTempIndexBuffer(size);
       GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, buf);
       GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER,
                                0,
diff --git a/src/parseTools.js b/src/parseTools.js
index b7f97a40..be0cbcab 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -2150,9 +2150,9 @@ function makeRounding(value, bits, signed, floatConversion) {
       }
     }
     // Math.floor is reasonably fast if we don't care about corrections (and even correct if unsigned)
-    if (!correctRoundings() || !signed) return 'Math_floor(' + value + ')';
+    if (!correctRoundings() || !signed) return '(+Math_floor(' + value + '))';
     // We are left with >32 bits
-    return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? Math_floor(VALUE) : Math_ceil(VALUE)', value, 'tempBigIntR');
+    return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? +Math_floor(VALUE) : +Math_ceil(VALUE)', value, 'tempBigIntR');
   }
 }
 
diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp
index d5772c62..204986da 100644
--- a/src/relooper/Relooper.cpp
+++ b/src/relooper/Relooper.cpp
@@ -322,12 +322,26 @@ void MultipleShape::RenderLoopPostfix() {
 
 void MultipleShape::Render(bool InLoop) {
   RenderLoopPrefix();
-  bool First = true;
+
+  // We know that blocks with the same Id were split from the same source, so their contents are identical and they are logically the same, so re-merge them here
+  typedef std::map<int, Shape*> IdShapeMap;
+  IdShapeMap IdMap;
   for (BlockShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) {
+    int Id = iter->first->Id;
+    IdShapeMap::iterator Test = IdMap.find(Id);
+    if (Test != IdMap.end()) {
+      assert(Shape::IsSimple(iter->second) && Shape::IsSimple(Test->second)); // we can only merge simple blocks, something horrible has gone wrong if we see anything else
+      continue;
+    }
+    IdMap[iter->first->Id] = iter->second;
+  }
+
+  bool First = true;
+  for (IdShapeMap::iterator iter = IdMap.begin(); iter != IdMap.end(); iter++) {
     if (AsmJS) {
-      PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first->Id);
+      PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first);
     } else {
-      PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first->Id);
+      PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first);
     }
     First = false;
     Indenter::Indent();
@@ -391,8 +405,8 @@ Relooper::~Relooper() {
   for (unsigned i = 0; i < Shapes.size(); i++) delete Shapes[i];
 }
 
-void Relooper::AddBlock(Block *New) {
-  New->Id = BlockIdCounter++;
+void Relooper::AddBlock(Block *New, int Id) {
+  New->Id = Id == -1 ? BlockIdCounter++ : Id;
   Blocks.push_back(New);
 }
 
@@ -446,8 +460,7 @@ void Relooper::Calculate(Block *Entry) {
         for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) {
           Block *Prior = *iter;
           Block *Split = new Block(Original->Code, Original->BranchVar);
-          Parent->AddBlock(Split);
-          PrintDebug("  to %d\n", Split->Id);
+          Parent->AddBlock(Split, Original->Id);
           Split->BranchesIn.insert(Prior);
           Branch *Details = Prior->BranchesOut[Original];
           Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code);
diff --git a/src/relooper/Relooper.h b/src/relooper/Relooper.h
index 6b9394db..85adf359 100644
--- a/src/relooper/Relooper.h
+++ b/src/relooper/Relooper.h
@@ -57,7 +57,7 @@ struct Block {
   BlockBranchMap ProcessedBranchesOut;
   BlockSet ProcessedBranchesIn;
   Shape *Parent; // The shape we are directly inside
-  int Id; // A unique identifier, defined when added to relooper
+  int Id; // A unique identifier, defined when added to relooper. Note that this uniquely identifies a *logical* block - if we split it, the two instances have the same content *and* the same Id
   const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input)
   const char *BranchVar; // If we have more than one branch out, the variable whose value determines where we go
   bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable
@@ -191,7 +191,7 @@ struct Relooper {
   Relooper();
   ~Relooper();
 
-  void AddBlock(Block *New);
+  void AddBlock(Block *New, int Id=-1);
 
   // Calculates the shapes
   void Calculate(Block *Entry);
diff --git a/src/relooper/test.txt b/src/relooper/test.txt
index cb02b867..82b02ad7 100644
--- a/src/relooper/test.txt
+++ b/src/relooper/test.txt
@@ -91,7 +91,7 @@
   }
   default: {
    var $x_1 = $x_0; 
-   label = 8;
+   label = 7;
    break L1;
   }
   }
@@ -106,7 +106,7 @@
   }
   }
  }
- if (label == 8) {
+ if (label == 7) {
   // code 7
  }
  // code 4