aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xemcc3
-rw-r--r--src/library_browser.js9
-rw-r--r--src/library_gl.js227
-rw-r--r--src/parseTools.js4
-rw-r--r--src/relooper/Relooper.cpp27
-rw-r--r--src/relooper/Relooper.h4
-rw-r--r--src/relooper/test.txt4
-rw-r--r--system/lib/libc/musl/src/math/__cos.c71
-rw-r--r--system/lib/libc/musl/src/math/__sin.c64
-rw-r--r--system/lib/libc/musl/src/stdio/fputwc.c21
-rw-r--r--system/lib/libc/musl/src/stdio/fputws.c30
-rw-r--r--system/lib/libc/musl/src/stdio/vswprintf.c17
-rw-r--r--system/lib/libcextra.symbols2
-rw-r--r--tests/core/test_wprintf.c50
-rw-r--r--tests/core/test_wprintf.out35
-rw-r--r--tools/eliminator/asm-eliminator-test-output.js506
-rw-r--r--tools/eliminator/asm-eliminator-test.js651
-rw-r--r--tools/js-optimizer.js17
-rw-r--r--tools/shared.py2
19 files changed, 1654 insertions, 90 deletions
diff --git a/emcc b/emcc
index 6ae8f2ef..958b018c 100755
--- a/emcc
+++ b/emcc
@@ -1536,7 +1536,9 @@ try:
'wctype_l.c',
]],
['math', [
+ '__cos.c',
'__cosdf.c',
+ '__sin.c',
'__sindf.c',
'ilogb.c',
'ilogbf.c',
@@ -1593,6 +1595,7 @@ try:
'vwprintf.c',
'wprintf.c',
'fputwc.c',
+ 'fputws.c',
]],
['stdlib', [
'ecvt.c',
diff --git a/src/library_browser.js b/src/library_browser.js
index d5e35339..458a8dd2 100644
--- a/src/library_browser.js
+++ b/src/library_browser.js
@@ -775,6 +775,15 @@ mergeInto(LibraryManager.library, {
return;
}
+ // Signal GL rendering layer that processing of a new frame is about to start. This helps it optimize
+ // VBO double-buffering and reduce GPU stalls.
+#if FULL_ES2
+ GL.newRenderingFrameStarted();
+#endif
+#if LEGACY_GL_EMULATION
+ GL.newRenderingFrameStarted();
+#endif
+
if (Module['preMainLoop']) {
Module['preMainLoop']();
}
diff --git a/src/library_gl.js b/src/library_gl.js
index f6978c04..61ca8957 100644
--- a/src/library_gl.js
+++ b/src/library_gl.js
@@ -57,6 +57,7 @@ var LibraryGL = {
unpackAlignment: 4, // default alignment is 4 bytes
init: function() {
+ GL.createLog2ceilLookup(GL.MAX_TEMP_BUFFER_SIZE);
Browser.moduleContextCreatedCallbacks.push(GL.initExtensions);
},
@@ -81,36 +82,58 @@ var LibraryGL = {
miniTempBuffer: null,
miniTempBufferViews: [0], // index i has the view of size i+1
- // Large temporary buffers
+ // When user GL code wants to render from client-side memory, we need to upload the vertex data to a temp VBO
+ // for rendering. Maintain a set of temp VBOs that are created-on-demand to appropriate sizes, and never destroyed.
+ // Also, for best performance the VBOs are double-buffered, i.e. every second frame we switch the set of VBOs we
+ // upload to, so that rendering from the previous frame is not disturbed by uploading from new data to it, which
+ // could cause a GPU-CPU pipeline stall.
+ // Note that index buffers are not double-buffered (at the moment) in this manner.
MAX_TEMP_BUFFER_SIZE: {{{ GL_MAX_TEMP_BUFFER_SIZE }}},
- tempBufferIndexLookup: null,
- tempVertexBuffers: null,
- tempIndexBuffers: null,
+ tempVertexBuffers1: [],
+ tempVertexBufferCounters1: [],
+ tempVertexBuffers2: [],
+ tempVertexBufferCounters2: [],
+ // Maximum number of temp VBOs of one size to maintain, after that we start reusing old ones, which is safe but can give
+ // a performance impact. If CPU-GPU stalls are a problem, increasing this might help.
+ numTempVertexBuffersPerSize: 64, // (const)
+ tempIndexBuffers: [],
tempQuadIndexBuffer: null,
- generateTempBuffers: function(quads) {
- GL.tempBufferIndexLookup = new Uint8Array(GL.MAX_TEMP_BUFFER_SIZE+1);
- GL.tempVertexBuffers = [];
- GL.tempIndexBuffers = [];
- var last = -1, curr = -1;
- var size = 1;
- for (var i = 0; i <= GL.MAX_TEMP_BUFFER_SIZE; i++) {
- if (i > size) {
- size <<= 1;
+ // Precompute a lookup table for the function ceil(log2(x)), i.e. how many bits are needed to represent x, or,
+ // if x was rounded up to next pow2, which index is the single '1' bit at?
+ // Then log2ceilLookup[x] returns ceil(log2(x)).
+ log2ceilLookup: null,
+ createLog2ceilLookup: function(maxValue) {
+ GL.log2ceilLookup = new Uint8Array(maxValue+1);
+ var log2 = 0;
+ var pow2 = 1;
+ GL.log2ceilLookup[0] = 0;
+ for(var i = 1; i <= maxValue; ++i) {
+ if (i > pow2) {
+ pow2 <<= 1;
+ ++log2;
}
- if (size != last) {
- curr++;
- GL.tempVertexBuffers[curr] = GLctx.createBuffer();
- GLctx.bindBuffer(GLctx.ARRAY_BUFFER, GL.tempVertexBuffers[curr]);
- GLctx.bufferData(GLctx.ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
- GLctx.bindBuffer(GLctx.ARRAY_BUFFER, null);
- GL.tempIndexBuffers[curr] = GLctx.createBuffer();
- GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[curr]);
- GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, size, GLctx.DYNAMIC_DRAW);
- GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, null);
- last = size;
+ GL.log2ceilLookup[i] = log2;
+ }
+ },
+
+ generateTempBuffers: function(quads) {
+ var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+ GL.tempVertexBufferCounters1.length = GL.tempVertexBufferCounters2.length = largestIndex+1;
+ GL.tempVertexBuffers1.length = GL.tempVertexBuffers2.length = largestIndex+1;
+ GL.tempIndexBuffers.length = largestIndex+1;
+ for(var i = 0; i <= largestIndex; ++i) {
+ GL.tempIndexBuffers[i] = null; // Created on-demand
+ GL.tempVertexBufferCounters1[i] = GL.tempVertexBufferCounters2[i] = 0;
+ var ringbufferLength = GL.numTempVertexBuffersPerSize;
+ GL.tempVertexBuffers1[i] = [];
+ GL.tempVertexBuffers2[i] = [];
+ var ringbuffer1 = GL.tempVertexBuffers1[i];
+ var ringbuffer2 = GL.tempVertexBuffers2[i];
+ ringbuffer1.length = ringbuffer2.length = ringbufferLength;
+ for(var j = 0; j < ringbufferLength; ++j) {
+ ringbuffer1[j] = ringbuffer2[j] = null; // Created on-demand
}
- GL.tempBufferIndexLookup[i] = curr;
}
if (quads) {
@@ -140,6 +163,53 @@ var LibraryGL = {
}
},
+ getTempVertexBuffer: function getTempVertexBuffer(sizeBytes) {
+ var idx = GL.log2ceilLookup[sizeBytes];
+ var ringbuffer = GL.tempVertexBuffers1[idx];
+ var nextFreeBufferIndex = GL.tempVertexBufferCounters1[idx];
+ GL.tempVertexBufferCounters1[idx] = (GL.tempVertexBufferCounters1[idx]+1) & (GL.numTempVertexBuffersPerSize-1);
+ var vbo = ringbuffer[nextFreeBufferIndex];
+ if (vbo) {
+ return vbo;
+ }
+ var prevVBO = GLctx.getParameter(GLctx.ARRAY_BUFFER_BINDING);
+ ringbuffer[nextFreeBufferIndex] = GLctx.createBuffer();
+ GLctx.bindBuffer(GLctx.ARRAY_BUFFER, ringbuffer[nextFreeBufferIndex]);
+ GLctx.bufferData(GLctx.ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+ GLctx.bindBuffer(GLctx.ARRAY_BUFFER, prevVBO);
+ return ringbuffer[nextFreeBufferIndex];
+ },
+
+ getTempIndexBuffer: function getTempIndexBuffer(sizeBytes) {
+ var idx = GL.log2ceilLookup[sizeBytes];
+ var ibo = GL.tempIndexBuffers[idx];
+ if (ibo) {
+ return ibo;
+ }
+ var prevIBO = GLctx.getParameter(GLctx.ELEMENT_ARRAY_BUFFER_BINDING);
+ GL.tempIndexBuffers[idx] = GLctx.createBuffer();
+ GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, GL.tempIndexBuffers[idx]);
+ GLctx.bufferData(GLctx.ELEMENT_ARRAY_BUFFER, 1 << idx, GLctx.DYNAMIC_DRAW);
+ GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, prevIBO);
+ return GL.tempIndexBuffers[idx];
+ },
+
+ // Called at start of each new WebGL rendering frame. This swaps the doublebuffered temp VB memory pointers,
+ // so that every second frame utilizes different set of temp buffers. The aim is to keep the set of buffers
+ // being rendered, and the set of buffers being updated disjoint.
+ newRenderingFrameStarted: function newRenderingFrameStarted() {
+ var vb = GL.tempVertexBuffers1;
+ GL.tempVertexBuffers1 = GL.tempVertexBuffers2;
+ GL.tempVertexBuffers2 = vb;
+ vb = GL.tempVertexBufferCounters1;
+ GL.tempVertexBufferCounters1 = GL.tempVertexBufferCounters2;
+ GL.tempVertexBufferCounters2 = vb;
+ var largestIndex = GL.log2ceilLookup[GL.MAX_TEMP_BUFFER_SIZE];
+ for(var i = 0; i <= largestIndex; ++i) {
+ GL.tempVertexBufferCounters1[i] = 0;
+ }
+ },
+
// Find a token in a shader source string
findToken: function(source, token) {
function isIdentChar(ch) {
@@ -446,9 +516,6 @@ var LibraryGL = {
preDrawHandleClientVertexAttribBindings: function preDrawHandleClientVertexAttribBindings(count) {
GL.resetBufferBinding = false;
- var used = GL.usedTempBuffers;
- used.length = 0;
-
// TODO: initial pass to detect ranges we need to upload, might not need an upload per attrib
for (var i = 0; i < GL.maxVertexAttribs; ++i) {
var cb = GL.clientBuffers[i];
@@ -457,15 +524,7 @@ var LibraryGL = {
GL.resetBufferBinding = true;
var size = GL.calcBufLength(cb.size, cb.type, cb.stride, count);
- var index = GL.tempBufferIndexLookup[size];
- var buf;
- do {
-#if ASSERTIONS
- assert(index < GL.tempVertexBuffers.length);
-#endif
- buf = GL.tempVertexBuffers[index++];
- } while (used.indexOf(buf) >= 0);
- used.push(buf);
+ var buf = GL.getTempVertexBuffer(size);
GLctx.bindBuffer(GLctx.ARRAY_BUFFER, buf);
GLctx.bufferSubData(GLctx.ARRAY_BUFFER,
0,
@@ -2742,14 +2801,6 @@ var LibraryGL = {
this.key0 = -1; // The key of this texture unit must be recomputed when rendering the next time.
GLImmediate.currentRenderer = null; // The currently used renderer must be re-evaluated at next render.
}
- this.traverseState = function(keyView) {
- if (this.key0 == -1) {
- this.recomputeKey();
- }
- keyView.next(this.key0);
- keyView.next(this.key1);
- keyView.next(this.key2);
- };
}
function CTexUnit() {
@@ -2758,26 +2809,55 @@ var LibraryGL = {
this.enabled_tex2D = false;
this.enabled_tex3D = false;
this.enabled_texCube = false;
+ this.texTypesEnabled = 0; // A bitfield combination of the four flags above, used for fast access to operations.
this.traverseState = function CTexUnit_traverseState(keyView) {
- var texUnitType = this.getTexType();
- keyView.next(texUnitType);
- if (!texUnitType) return;
- this.env.traverseState(keyView);
+ if (this.texTypesEnabled) {
+ if (this.env.key0 == -1) {
+ this.env.recomputeKey();
+ }
+ keyView.next(this.texTypesEnabled | (this.env.key0 << 4));
+ keyView.next(this.env.key1);
+ keyView.next(this.env.key2);
+ } else {
+ // For correctness, must traverse a zero value, theoretically a subsequent integer key could collide with this value otherwise.
+ keyView.next(0);
+ }
};
};
// Class impls:
CTexUnit.prototype.enabled = function CTexUnit_enabled() {
- return this.getTexType() != 0;
+ return this.texTypesEnabled;
}
CTexUnit.prototype.genPassLines = function CTexUnit_genPassLines(passOutputVar, passInputVar, texUnitID) {
if (!this.enabled()) {
return ["vec4 " + passOutputVar + " = " + passInputVar + ";"];
}
-
- return this.env.genPassLines(passOutputVar, passInputVar, texUnitID);
+ var lines = this.env.genPassLines(passOutputVar, passInputVar, texUnitID).join('\n');
+
+ var texLoadLines = '';
+ var texLoadRegex = /(texture.*?\(.*?\))/g;
+ var loadCounter = 0;
+ var load;
+
+ // As an optimization, merge duplicate identical texture loads to one var.
+ while(load = texLoadRegex.exec(lines)) {
+ var texLoadExpr = load[1];
+ var secondOccurrence = lines.slice(load.index+1).indexOf(texLoadExpr);
+ if (secondOccurrence != -1) { // And also has a second occurrence of same load expression..
+ // Create new var to store the common load.
+ var prefix = TEXENVJIT_NAMESPACE_PREFIX + 'env' + texUnitID + "_";
+ var texLoadVar = prefix + 'texload' + loadCounter++;
+ var texLoadLine = 'vec4 ' + texLoadVar + ' = ' + texLoadExpr + ';\n';
+ texLoadLines += texLoadLine + '\n'; // Store the generated texture load statements in a temp string to not confuse regex search in progress.
+ lines = lines.split(texLoadExpr).join(texLoadVar);
+ // Reset regex search, since we modified the string.
+ texLoadRegex = /(texture.*\(.*\))/g;
+ }
+ }
+ return [texLoadLines + lines];
}
CTexUnit.prototype.getTexType = function CTexUnit_getTexType() {
@@ -2898,13 +2978,18 @@ var LibraryGL = {
var alphaLines = this.genCombinerLines(false, alphaVar,
passInputVar, texUnitID,
this.alphaCombiner, this.alphaSrc, this.alphaOp);
+
+ // Generate scale, but avoid generating an identity op that multiplies by one.
+ var scaledColor = (this.colorScale == 1) ? colorVar : (colorVar + " * " + valToFloatLiteral(this.colorScale));
+ var scaledAlpha = (this.alphaScale == 1) ? alphaVar : (alphaVar + " * " + valToFloatLiteral(this.alphaScale));
+
var line = [
"vec4 " + passOutputVar,
" = ",
"vec4(",
- colorVar + " * " + valToFloatLiteral(this.colorScale),
+ scaledColor,
", ",
- alphaVar + " * " + valToFloatLiteral(this.alphaScale),
+ scaledAlpha,
")",
";",
].join("");
@@ -3084,12 +3169,7 @@ var LibraryGL = {
traverseState: function(keyView) {
for (var i = 0; i < s_texUnits.length; i++) {
- var texUnit = s_texUnits[i];
- var enabled = texUnit.enabled();
- keyView.next(enabled);
- if (enabled) {
- texUnit.traverseState(keyView);
- }
+ s_texUnits[i].traverseState(keyView);
}
},
@@ -3113,24 +3193,28 @@ var LibraryGL = {
if (!cur.enabled_tex1D) {
GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again.
cur.enabled_tex1D = true;
+ cur.texTypesEnabled |= 1;
}
break;
case GL_TEXTURE_2D:
if (!cur.enabled_tex2D) {
GLImmediate.currentRenderer = null;
cur.enabled_tex2D = true;
+ cur.texTypesEnabled |= 2;
}
break;
case GL_TEXTURE_3D:
if (!cur.enabled_tex3D) {
GLImmediate.currentRenderer = null;
cur.enabled_tex3D = true;
+ cur.texTypesEnabled |= 4;
}
break;
case GL_TEXTURE_CUBE_MAP:
if (!cur.enabled_texCube) {
GLImmediate.currentRenderer = null;
cur.enabled_texCube = true;
+ cur.texTypesEnabled |= 8;
}
break;
}
@@ -3143,24 +3227,28 @@ var LibraryGL = {
if (cur.enabled_tex1D) {
GLImmediate.currentRenderer = null; // Renderer state changed, and must be recreated or looked up again.
cur.enabled_tex1D = false;
+ cur.texTypesEnabled &= ~1;
}
break;
case GL_TEXTURE_2D:
if (cur.enabled_tex2D) {
GLImmediate.currentRenderer = null;
cur.enabled_tex2D = false;
+ cur.texTypesEnabled &= ~2;
}
break;
case GL_TEXTURE_3D:
if (cur.enabled_tex3D) {
GLImmediate.currentRenderer = null;
cur.enabled_tex3D = false;
+ cur.texTypesEnabled &= ~4;
}
break;
case GL_TEXTURE_CUBE_MAP:
if (cur.enabled_texCube) {
GLImmediate.currentRenderer = null;
cur.enabled_texCube = false;
+ cur.texTypesEnabled &= ~8;
}
break;
}
@@ -3434,7 +3522,6 @@ var LibraryGL = {
// we maintain a cache of renderers, optimized to not generate garbage
var attributes = GLImmediate.liveClientAttributes;
var cacheMap = GLImmediate.rendererCache;
- var temp;
var keyView = cacheMap.getStaticKeyView().reset();
// By attrib state:
@@ -3442,7 +3529,6 @@ var LibraryGL = {
for (var i = 0; i < attributes.length; i++) {
enabledAttributesKey |= 1 << attributes[i].name;
}
- keyView.next(enabledAttributesKey);
// By fog state:
var fogParam = 0;
@@ -3459,13 +3545,17 @@ var LibraryGL = {
break;
}
}
- keyView.next(fogParam);
+ keyView.next((enabledAttributesKey << 2) | fogParam);
+#if !GL_FFP_ONLY
// By cur program:
keyView.next(GL.currProgram);
if (!GL.currProgram) {
+#endif
GLImmediate.TexEnvJIT.traverseState(keyView);
+#if !GL_FFP_ONLY
}
+#endif
// If we don't already have it, create it.
var renderer = keyView.get();
@@ -3720,7 +3810,7 @@ var LibraryGL = {
#if ASSERTIONS
assert(end <= GL.MAX_TEMP_BUFFER_SIZE, 'too much vertex data');
#endif
- arrayBuffer = GL.tempVertexBuffers[GL.tempBufferIndexLookup[end]];
+ arrayBuffer = GL.getTempVertexBuffer(end);
// TODO: consider using the last buffer we bound, if it was larger. downside is larger buffer, but we might avoid rebinding and preparing
} else {
arrayBuffer = GL.currArrayBuffer;
@@ -4028,11 +4118,12 @@ var LibraryGL = {
if (!Module.useWebGL) return; // a 2D canvas may be currently used TODO: make sure we are actually called in that case
- GLImmediate.TexEnvJIT.init(GLctx);
-
// User can override the maximum number of texture units that we emulate. Using fewer texture units increases runtime performance
// slightly, so it is advantageous to choose as small value as needed.
GLImmediate.MAX_TEXTURES = Module['GL_MAX_TEXTURE_IMAGE_UNITS'] || GLctx.getParameter(GLctx.MAX_TEXTURE_IMAGE_UNITS);
+
+ GLImmediate.TexEnvJIT.init(GLctx, GLImmediate.MAX_TEXTURES);
+
GLImmediate.NUM_ATTRIBUTES = 3 /*pos+normal+color attributes*/ + GLImmediate.MAX_TEXTURES;
GLImmediate.clientAttributes = [];
GLEmulation.enabledClientAttribIndices = [];
@@ -4221,7 +4312,7 @@ var LibraryGL = {
#if ASSERTIONS
assert(numProvidedIndexes << 1 <= GL.MAX_TEMP_BUFFER_SIZE, 'too many immediate mode indexes (a)');
#endif
- var indexBuffer = GL.tempIndexBuffers[GL.tempBufferIndexLookup[numProvidedIndexes << 1]];
+ var indexBuffer = GL.getTempIndexBuffer(numProvidedIndexes << 1);
GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, indexBuffer);
GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER, 0, {{{ makeHEAPView('U16', 'ptr', 'ptr + (numProvidedIndexes << 1)') }}});
ptr = 0;
@@ -4986,7 +5077,7 @@ var LibraryGL = {
var buf;
if (!GL.currElementArrayBuffer) {
var size = GL.calcBufLength(1, type, 0, count);
- buf = GL.tempIndexBuffers[GL.tempBufferIndexLookup[size]];
+ buf = GL.getTempIndexBuffer(size);
GLctx.bindBuffer(GLctx.ELEMENT_ARRAY_BUFFER, buf);
GLctx.bufferSubData(GLctx.ELEMENT_ARRAY_BUFFER,
0,
diff --git a/src/parseTools.js b/src/parseTools.js
index b7f97a40..be0cbcab 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -2150,9 +2150,9 @@ function makeRounding(value, bits, signed, floatConversion) {
}
}
// Math.floor is reasonably fast if we don't care about corrections (and even correct if unsigned)
- if (!correctRoundings() || !signed) return 'Math_floor(' + value + ')';
+ if (!correctRoundings() || !signed) return '(+Math_floor(' + value + '))';
// We are left with >32 bits
- return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? Math_floor(VALUE) : Math_ceil(VALUE)', value, 'tempBigIntR');
+ return makeInlineCalculation(makeComparison('VALUE', '>=', '0', 'float') + ' ? +Math_floor(VALUE) : +Math_ceil(VALUE)', value, 'tempBigIntR');
}
}
diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp
index d5772c62..204986da 100644
--- a/src/relooper/Relooper.cpp
+++ b/src/relooper/Relooper.cpp
@@ -322,12 +322,26 @@ void MultipleShape::RenderLoopPostfix() {
void MultipleShape::Render(bool InLoop) {
RenderLoopPrefix();
- bool First = true;
+
+ // We know that blocks with the same Id were split from the same source, so their contents are identical and they are logically the same, so re-merge them here
+ typedef std::map<int, Shape*> IdShapeMap;
+ IdShapeMap IdMap;
for (BlockShapeMap::iterator iter = InnerMap.begin(); iter != InnerMap.end(); iter++) {
+ int Id = iter->first->Id;
+ IdShapeMap::iterator Test = IdMap.find(Id);
+ if (Test != IdMap.end()) {
+ assert(Shape::IsSimple(iter->second) && Shape::IsSimple(Test->second)); // we can only merge simple blocks, something horrible has gone wrong if we see anything else
+ continue;
+ }
+ IdMap[iter->first->Id] = iter->second;
+ }
+
+ bool First = true;
+ for (IdShapeMap::iterator iter = IdMap.begin(); iter != IdMap.end(); iter++) {
if (AsmJS) {
- PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first->Id);
+ PrintIndented("%sif ((label|0) == %d) {\n", First ? "" : "else ", iter->first);
} else {
- PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first->Id);
+ PrintIndented("%sif (label == %d) {\n", First ? "" : "else ", iter->first);
}
First = false;
Indenter::Indent();
@@ -391,8 +405,8 @@ Relooper::~Relooper() {
for (unsigned i = 0; i < Shapes.size(); i++) delete Shapes[i];
}
-void Relooper::AddBlock(Block *New) {
- New->Id = BlockIdCounter++;
+void Relooper::AddBlock(Block *New, int Id) {
+ New->Id = Id == -1 ? BlockIdCounter++ : Id;
Blocks.push_back(New);
}
@@ -446,8 +460,7 @@ void Relooper::Calculate(Block *Entry) {
for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) {
Block *Prior = *iter;
Block *Split = new Block(Original->Code, Original->BranchVar);
- Parent->AddBlock(Split);
- PrintDebug(" to %d\n", Split->Id);
+ Parent->AddBlock(Split, Original->Id);
Split->BranchesIn.insert(Prior);
Branch *Details = Prior->BranchesOut[Original];
Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code);
diff --git a/src/relooper/Relooper.h b/src/relooper/Relooper.h
index 6b9394db..85adf359 100644
--- a/src/relooper/Relooper.h
+++ b/src/relooper/Relooper.h
@@ -57,7 +57,7 @@ struct Block {
BlockBranchMap ProcessedBranchesOut;
BlockSet ProcessedBranchesIn;
Shape *Parent; // The shape we are directly inside
- int Id; // A unique identifier, defined when added to relooper
+ int Id; // A unique identifier, defined when added to relooper. Note that this uniquely identifies a *logical* block - if we split it, the two instances have the same content *and* the same Id
const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input)
const char *BranchVar; // If we have more than one branch out, the variable whose value determines where we go
bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable
@@ -191,7 +191,7 @@ struct Relooper {
Relooper();
~Relooper();
- void AddBlock(Block *New);
+ void AddBlock(Block *New, int Id=-1);
// Calculates the shapes
void Calculate(Block *Entry);
diff --git a/src/relooper/test.txt b/src/relooper/test.txt
index cb02b867..82b02ad7 100644
--- a/src/relooper/test.txt
+++ b/src/relooper/test.txt
@@ -91,7 +91,7 @@
}
default: {
var $x_1 = $x_0;
- label = 8;
+ label = 7;
break L1;
}
}
@@ -106,7 +106,7 @@
}
}
}
- if (label == 8) {
+ if (label == 7) {
// code 7
}
// code 4
diff --git a/system/lib/libc/musl/src/math/__cos.c b/system/lib/libc/musl/src/math/__cos.c
new file mode 100644
index 00000000..46cefb38
--- /dev/null
+++ b/system/lib/libc/musl/src/math/__cos.c
@@ -0,0 +1,71 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+/*
+ * __cos( x, y )
+ * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
+ * Input x is assumed to be bounded by ~pi/4 in magnitude.
+ * Input y is the tail of x.
+ *
+ * Algorithm
+ * 1. Since cos(-x) = cos(x), we need only to consider positive x.
+ * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
+ * 3. cos(x) is approximated by a polynomial of degree 14 on
+ * [0,pi/4]
+ * 4 14
+ * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
+ * where the remez error is
+ *
+ * | 2 4 6 8 10 12 14 | -58
+ * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
+ * | |
+ *
+ * 4 6 8 10 12 14
+ * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
+ * cos(x) ~ 1 - x*x/2 + r
+ * since cos(x+y) ~ cos(x) - sin(x)*y
+ * ~ cos(x) - x*y,
+ * a correction term is necessary in cos(x) and hence
+ * cos(x+y) = 1 - (x*x/2 - (r - x*y))
+ * For better accuracy, rearrange to
+ * cos(x+y) ~ w + (tmp + (r-x*y))
+ * where w = 1 - x*x/2 and tmp is a tiny correction term
+ * (1 - x*x/2 == w + tmp exactly in infinite precision).
+ * The exactness of w + tmp in infinite precision depends on w
+ * and tmp having the same precision as x. If they have extra
+ * precision due to compiler bugs, then the extra precision is
+ * only good provided it is retained in all terms of the final
+ * expression for cos(). Retention happens in all cases tested
+ * under FreeBSD, so don't pessimize things by forcibly clipping
+ * any extra precision in w.
+ */
+
+#include "libm.h"
+
+static const double
+C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
+C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
+C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
+C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
+C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
+C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
+
+double __cos(double x, double y)
+{
+ double_t hz,z,r,w;
+
+ z = x*x;
+ w = z*z;
+ r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
+ hz = 0.5*z;
+ w = 1.0-hz;
+ return w + (((1.0-w)-hz) + (z*r-x*y));
+}
diff --git a/system/lib/libc/musl/src/math/__sin.c b/system/lib/libc/musl/src/math/__sin.c
new file mode 100644
index 00000000..40309496
--- /dev/null
+++ b/system/lib/libc/musl/src/math/__sin.c
@@ -0,0 +1,64 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+/* __sin( x, y, iy)
+ * kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
+ * Input x is assumed to be bounded by ~pi/4 in magnitude.
+ * Input y is the tail of x.
+ * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
+ *
+ * Algorithm
+ * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
+ * 2. Callers must return sin(-0) = -0 without calling here since our
+ * odd polynomial is not evaluated in a way that preserves -0.
+ * Callers may do the optimization sin(x) ~ x for tiny x.
+ * 3. sin(x) is approximated by a polynomial of degree 13 on
+ * [0,pi/4]
+ * 3 13
+ * sin(x) ~ x + S1*x + ... + S6*x
+ * where
+ *
+ * |sin(x) 2 4 6 8 10 12 | -58
+ * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
+ * | x |
+ *
+ * 4. sin(x+y) = sin(x) + sin'(x')*y
+ * ~ sin(x) + (1-x*x/2)*y
+ * For better accuracy, let
+ * 3 2 2 2 2
+ * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
+ * then 3 2
+ * sin(x) = x + (S1*x + (x *(r-y/2)+y))
+ */
+
+#include "libm.h"
+
+static const double
+S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
+S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
+S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
+S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
+
+double __sin(double x, double y, int iy)
+{
+ double_t z,r,v,w;
+
+ z = x*x;
+ w = z*z;
+ r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
+ v = z*x;
+ if (iy == 0)
+ return x + v*(S1 + z*r);
+ else
+ return x - ((z*(0.5*y - v*r) - y) - v*S1);
+}
diff --git a/system/lib/libc/musl/src/stdio/fputwc.c b/system/lib/libc/musl/src/stdio/fputwc.c
index e87e47d4..603fa615 100644
--- a/system/lib/libc/musl/src/stdio/fputwc.c
+++ b/system/lib/libc/musl/src/stdio/fputwc.c
@@ -17,15 +17,28 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
l = wctomb((void *)f->wpos, c);
if (l < 0) c = WEOF;
else f->wpos += l;
-#else
- if (isascii(c)) {
- c = fputc(c, f);
-#endif
} else {
l = wctomb(mbc, c);
if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
}
return c;
+#else
+ if (isascii(c)) {
+ c = fputc(c, f);
+ } else {
+ l = wctomb(mbc, c);
+ if (l < 0) c = WEOF;
+ else {
+ for (int i = 0; i < l; i++) {
+ if (fputc(mbc[i], f) == EOF) {
+ c = WEOF;
+ break;
+ }
+ }
+ }
+ }
+#endif
+ return c;
}
wint_t fputwc(wchar_t c, FILE *f)
diff --git a/system/lib/libc/musl/src/stdio/fputws.c b/system/lib/libc/musl/src/stdio/fputws.c
new file mode 100644
index 00000000..70e004c9
--- /dev/null
+++ b/system/lib/libc/musl/src/stdio/fputws.c
@@ -0,0 +1,30 @@
+#include "stdio_impl.h"
+#include <wchar.h>
+
+int fputws(const wchar_t *restrict ws, FILE *restrict f)
+{
+ unsigned char buf[BUFSIZ];
+ size_t l=0;
+
+ FLOCK(f);
+
+#if 0 // XXX EMSCRIPTEN
+ f->mode |= f->mode+1;
+#endif
+
+ while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
+#if 0 // XXX EMSCRIPTEN
+ if (__fwritex(buf, l, f) < l) {
+#else
+ if (fwrite(buf, 1, l, f) < l) {
+#endif
+ FUNLOCK(f);
+ return -1;
+ }
+
+ FUNLOCK(f);
+
+ return l; /* 0 or -1 */
+}
+
+weak_alias(fputws, fputws_unlocked);
diff --git a/system/lib/libc/musl/src/stdio/vswprintf.c b/system/lib/libc/musl/src/stdio/vswprintf.c