diff options
Diffstat (limited to 'src/parseTools.js')
-rw-r--r-- | src/parseTools.js | 158 |
1 files changed, 108 insertions, 50 deletions
diff --git a/src/parseTools.js b/src/parseTools.js index 520d278e..86e3c643 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -123,7 +123,7 @@ function isPointerType(type) { function isStructType(type) { if (isPointerType(type)) return false; if (/^\[\d+\ x\ (.*)\]/.test(type)) return true; // [15 x ?] blocks. Like structs - if (/<?{ [^}]* }>?/.test(type)) return true; // { i32, i8 } etc. - anonymous struct types + if (/<?{ ?[^}]* ?}>?/.test(type)) return true; // { i32, i8 } etc. - anonymous struct types // See comment in isStructPointerType() return type[0] == '%'; } @@ -169,7 +169,15 @@ function isFunctionDef(token, out) { function isFunctionType(type, out) { type = type.replace(/"[^"]+"/g, '".."'); - var parts = type.split(' '); + var parts; + // hackish, but quick splitting of function def parts. this must be fast as it happens a lot + if (type[0] != '[') { + parts = type.split(' '); + } else { + var index = type.search(']'); + index += type.substr(index).search(' '); + parts = [type.substr(0, index), type.substr(index+1)]; + } if (pointingLevels(type) !== 1) return false; var text = removeAllPointing(parts.slice(1).join(' ')); if (!text) return false; @@ -412,7 +420,7 @@ function cleanSegment(segment) { return segment; } -var MATHOPS = set(['add', 'sub', 'sdiv', 'udiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr']); +var MATHOPS = set(['add', 'sub', 'sdiv', 'udiv', 'mul', 'icmp', 'zext', 'urem', 'srem', 'fadd', 'fsub', 'fmul', 'fdiv', 'fcmp', 'frem', 'uitofp', 'sitofp', 'fpext', 'fptrunc', 'fptoui', 'fptosi', 'trunc', 'sext', 'select', 'shl', 'shr', 'ashl', 'ashr', 'lshr', 'lshl', 'xor', 'or', 'and', 'ptrtoint', 'inttoptr']); var PARSABLE_LLVM_FUNCTIONS = set('getelementptr', 'bitcast'); mergeInto(PARSABLE_LLVM_FUNCTIONS, MATHOPS); @@ -499,7 +507,7 @@ function IEEEUnHex(stringy) { while (stringy.length < 16) stringy = '0' + stringy; if (FAKE_X86_FP80 && stringy.length > 16) { stringy = stringy.substr(stringy.length-16, 16); - warnOnce('.ll contains floating-point values with more than 64 bits. Faking values for them. If they are used, this will almost certainly fail!'); + warnOnce('.ll contains floating-point values with more than 64 bits. Faking values for them. If they are used, this will almost certainly break horribly!'); } assert(stringy.length === 16, 'Can only unhex 16-digit double numbers, nothing platform-specific'); // |long double| can cause x86_fp80 which causes this var top = eval('0x' + stringy[0]); @@ -683,16 +691,14 @@ function parseArbitraryInt(str, bits) { return ret; } -function parseI64Constant(str) { - assert(USE_TYPED_ARRAYS == 2); - +function parseI64Constant(str, legalized) { if (!isNumber(str)) { // This is a variable. Copy it, so we do not modify the original return legalizedI64s ? str : makeCopyI64(str); } var parsed = parseArbitraryInt(str, 64); - if (legalizedI64s) return parsed; + if (legalizedI64s || legalized) return parsed; return '[' + parsed[0] + ',' + parsed[1] + ']'; } @@ -769,7 +775,7 @@ function generateStructTypes(type) { if (USE_TYPED_ARRAYS == 2 && type == 'i64') { return ['i64', 0, 0, 0, 'i32', 0, 0, 0]; } - return [type].concat(zeros(Runtime.getNativeFieldSize(type))); + return [type].concat(zeros(Runtime.getNativeFieldSize(type)-1)); } // Avoid multiple concats by finding the size first. This is much faster @@ -988,7 +994,7 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe, value = range(typeData.fields.length).map(function(i) { return value + '.f' + i }); } for (var i = 0; i < typeData.fields.length; i++) { - ret.push(makeSetValue(ptr, pos + typeData.flatIndexes[i], value[i], typeData.fields[i], noNeedFirst)); + ret.push(makeSetValue(ptr, getFastValue(pos, '+', typeData.flatIndexes[i]), value[i], typeData.fields[i], noNeedFirst)); } return ret.join('; '); } @@ -1145,6 +1151,13 @@ function makeCopyValues(dest, src, num, type, modifier, align, sep) { } } +function makeHEAPView(which, start, end) { + // Assumes USE_TYPED_ARRAYS == 2 + var size = parseInt(which.replace('U', '').replace('F', ''))/8; + var mod = size == 1 ? '' : ('>>' + log2(size)); + return 'HEAP' + which + '.subarray((' + start + ')' + mod + ',(' + end + ')' + mod + ')'; +} + var PLUS_MUL = set('+', '*'); var MUL_DIV = set('*', '/'); var PLUS_MINUS = set('+', '-'); @@ -1158,7 +1171,7 @@ function getFastValue(a, op, b, type) { if (op == 'pow') { return Math.pow(a, b).toString(); } else { - return eval(a + op + b).toString(); + return eval(a + op + '(' + b + ')').toString(); // parens protect us from "5 - -12" being seen as "5--12" which is "(5--)12" } } if (op == 'pow') { @@ -1204,7 +1217,7 @@ function getFastValue(a, op, b, type) { return a; } } - return a + op + b; + return '(' + a + ')' + op + '(' + b + ')'; } function getFastValues(list, op, type) { @@ -1251,7 +1264,7 @@ function makePointer(slab, pos, allocator, type) { var evaled = typeof slab === 'string' ? eval(slab) : slab; de = dedup(evaled); if (de.length === 1 && de[0] === 0) { - slab = evaled.length; + slab = types.length; } // TODO: if not all zeros, at least filter out items with type === 0. requires cleverness to know how to skip at runtime though. also // be careful of structure padding @@ -1288,17 +1301,11 @@ function makeGetSlabs(ptr, type, allowMultiple, unsigned) { } } else { // USE_TYPED_ARRAYS == 2) if (isPointerType(type)) type = 'i32'; // Hardcoded 32-bit - var warn64 = function() { - warnOnce('.ll contains i64 or double values. These 64-bit values are dangerous in USE_TYPED_ARRAYS == 2. ' + - 'We store i64 as i32, and double as float. This can cause serious problems!'); - }; switch(type) { case 'i1': case 'i8': return [unsigned ? 'HEAPU8' : 'HEAP8']; break; case 'i16': return [unsigned ? 'HEAPU16' : 'HEAP16']; break; - case 'i64': warn64(); - case 'i32': return [unsigned ? 'HEAPU32' : 'HEAP32']; break; - case 'float': return ['HEAPF32']; break; - case 'double': warn64(); return ['HEAPF32']; break; + case 'i32': case 'i64': return [unsigned ? 'HEAPU32' : 'HEAP32']; break; + case 'float': case 'double': return ['HEAPF32']; break; default: { throw 'what, exactly, can we do for unknown types in TA2?! ' + new Error().stack; } @@ -1315,28 +1322,35 @@ function finalizeLLVMFunctionCall(item, noIndexizeFunctions) { // Warn about some types of casts, then fall through to the handling code below var oldType = item.params[0].type; var newType = item.type; - if (isPossiblyFunctionType(oldType) && isPossiblyFunctionType(newType) && - countNormalArgs(oldType) != countNormalArgs(newType)) { - warn('Casting a function pointer type to another with a different number of arguments. See more info in the source (grep for this text). ' + - oldType + ' ==> ' + newType); - // This may be dangerous as clang generates different code for C and C++ calling conventions. The only problem - // case appears to be passing a structure by value, C will have (field1, field2) as function args, and the - // function will internally create a structure with that data, while C++ will have (struct* byVal) and it - // will create a copy before calling the function, then call it with a pointer to the copy. Mixing the two - // first of all leads to two copies being made, so this is a bad idea even regardless of Emscripten. But, - // what is a problem for Emscr ipten is that mixing these two calling conventions (say, calling a C one from - // C++) will then assume that (struct* byVal) is actually the same as (field1, field2). In native code, this - // is easily possible, you place the two fields on the stack and call the function (you know to place the - // values since there is 'byVal'). In Emscripten, though, this means we would need to always do one or the - // other of the two possibilities, for example, always passing by-value structs as (field1, field2). This - // would slow down everything, just to handle this corner case. (Which, just to point out how much of a - // corner case it is, does not appear to happen with nested structures!) - // - // The recommended solution for this problem is not to mix C and C++ calling conventions when passing structs - // by value. Either always pass structs by value within C code or C++ code, but not mixing the two by - // defining a function in one and calling it from the other (so, just changing .c to .cpp, or moving code - // from one file to another, would be enough to fix this), or, do not pass structs by value (which in general - // is inefficient, and worth avoiding if you can). + if (isPossiblyFunctionType(oldType) && isPossiblyFunctionType(newType)) { + var oldCount = countNormalArgs(oldType); + var newCount = countNormalArgs(newType); + if (oldCount != newCount && oldCount && newCount) { + warnOnce('Casting a function pointer type to another with a different number of arguments. See more info in the compiler source'); + if (VERBOSE) { + warnOnce('Casting a function pointer type to another with a different number of arguments: ' + oldType + ' vs. ' + newType + ', on ' + item.params[0].ident); + } + // This may be dangerous as clang generates different code for C and C++ calling conventions. The only problem + // case appears to be passing a structure by value, C will have (field1, field2) as function args, and the + // function will internally create a structure with that data, while C++ will have (struct* byVal) and it + // will create a copy before calling the function, then call it with a pointer to the copy. Mixing the two + // first of all leads to two copies being made, so this is a bad idea even regardless of Emscripten. But, + // what is a problem for Emscr ipten is that mixing these two calling conventions (say, calling a C one from + // C++) will then assume that (struct* byVal) is actually the same as (field1, field2). In native code, this + // is easily possible, you place the two fields on the stack and call the function (you know to place the + // values since there is 'byVal'). In Emscripten, though, this means we would need to always do one or the + // other of the two possibilities, for example, always passing by-value structs as (field1, field2). This + // would slow down everything, just to handle this corner case. (Which, just to point out how much of a + // corner case it is, does not appear to happen with nested structures!) + // + // The recommended solution for this problem is not to mix C and C++ calling conventions when passing structs + // by value. Either always pass structs by value within C code or C++ code, but not mixing the two by + // defining a function in one and calling it from the other (so, just changing .c to .cpp, or moving code + // from one file to another, would be enough to fix this), or, do not pass structs by value (which in general + // is inefficient, and worth avoiding if you can). + // + // Note that removing all arguments is acceptable, as a vast to void ()*. + } } } var temp = { @@ -1412,7 +1426,7 @@ function handleOverflow(text, bits) { // TODO: handle overflows of i64s if (!bits) return text; var correct = correctOverflows(); - warn(!correct || bits <= 32, 'Cannot correct overflows of this many bits: ' + bits + ' at line ' + Framework.currItem.lineNum); + warnOnce(!correct || bits <= 32, 'Cannot correct overflows of this many bits: ' + bits); if (CHECK_OVERFLOWS) return 'CHECK_OVERFLOW(' + text + ', ' + bits + ', ' + Math.floor(correctSpecificOverflow() && !PGO) + ( PGO ? ', "' + Debugging.getIdentifier() + '"' : '' ) + ')'; @@ -1537,7 +1551,8 @@ function makeRounding(value, bits, signed, floatConversion) { // TODO: handle roundings of i64s assert(bits); // C rounds to 0 (-5.5 to -5, +5.5 to 5), while JS has no direct way to do that. - if (bits <= 32 && signed) return '((' + value + ')|0)'; // This is fast and even correct, for all cases + if (bits <= 32 && signed) return '((' + value + ')&-1)'; // This is fast and even correct, for all cases. Note that it is the same + // as |0, but &-1 hints to the js optimizer that this is a rounding correction // Do Math.floor, which is reasonably fast, if we either don't care, or if we can be sure // the value is non-negative if (!correctRoundings() || (!signed && !floatConversion)) return 'Math.floor(' + value + ')'; @@ -1561,6 +1576,8 @@ function isSignedOp(op, variant) { } var legalizedI64s = USE_TYPED_ARRAYS == 2; // We do not legalize globals, but do legalize function lines. This will be true in the latter case +var preciseI64MathUsed = false; // Set to true if we actually use precise i64 math: If PRECISE_I64_MATH is set, and also such math is actually + // needed (+,-,*,/,% - we do not need it for bitops) function processMathop(item) { var op = item.op; @@ -1617,6 +1634,11 @@ function processMathop(item) { return result; } } + function i64PreciseOp(type, lastArg) { + preciseI64MathUsed = true; + return finish(['(i64Math.' + type + '(' + low1 + ',' + high1 + ',' + low2 + ',' + high2 + + (lastArg ? ',' + lastArg : '') + '),i64Math.result[0])', 'i64Math.result[1]']); + } switch (op) { // basic integer ops case 'or': { @@ -1702,11 +1724,46 @@ function processMathop(item) { case 'ptrtoint': return makeI64(idents[0], 0); case 'inttoptr': return '(' + idents[0] + '[0])'; // just directly truncate the i64 to a 'pointer', which is an i32 // Dangerous, rounded operations. TODO: Fully emulate - case 'add': warnI64_1(); return finish(splitI64(mergeI64(idents[0]) + '+' + mergeI64(idents[1]))); - case 'sub': warnI64_1(); return finish(splitI64(mergeI64(idents[0]) + '-' + mergeI64(idents[1]))); - case 'sdiv': case 'udiv': warnI64_1(); return finish(splitI64(makeRounding(mergeI64(idents[0], op[0] === 'u') + '/' + mergeI64(idents[1], op[0] === 'u'), bits, op[0] === 's'))); - case 'mul': warnI64_1(); return finish(splitI64(mergeI64(idents[0], op[0] === 'u') + '*' + mergeI64(idents[1], op[0] === 'u'))); - case 'urem': case 'srem': warnI64_1(); return finish(splitI64(mergeI64(idents[0], op[0] === 'u') + '%' + mergeI64(idents[1], op[0] === 'u'))); + case 'add': { + if (PRECISE_I64_MATH) { + return i64PreciseOp('add'); + } else { + warnI64_1(); + return finish(splitI64(mergeI64(idents[0]) + '+' + mergeI64(idents[1]))); + } + } + case 'sub': { + if (PRECISE_I64_MATH) { + return i64PreciseOp('subtract'); + } else { + warnI64_1(); + return finish(splitI64(mergeI64(idents[0]) + '-' + mergeI64(idents[1]))); + } + } + case 'sdiv': case 'udiv': { + if (PRECISE_I64_MATH) { + return i64PreciseOp('divide', op[0] === 'u'); + } else { + warnI64_1(); + return finish(splitI64(makeRounding(mergeI64(idents[0], op[0] === 'u') + '/' + mergeI64(idents[1], op[0] === 'u'), bits, op[0] === 's'))); + } + } + case 'mul': { + if (PRECISE_I64_MATH) { + return i64PreciseOp('multiply'); + } else { + warnI64_1(); + return finish(splitI64(mergeI64(idents[0], op[0] === 'u') + '*' + mergeI64(idents[1], op[0] === 'u'))); + } + } + case 'urem': case 'srem': { + if (PRECISE_I64_MATH) { + return i64PreciseOp('modulo', op[0] === 'u'); + } else { + warnI64_1(); + return finish(splitI64(mergeI64(idents[0], op[0] === 'u') + '%' + mergeI64(idents[1], op[0] === 'u'))); + } + } case 'bitcast': { // Pointers are not 64-bit, so there is really only one possible type of bitcast here, int to float or vice versa assert(USE_TYPED_ARRAYS == 2, 'Can only bitcast ints <-> floats with typed arrays mode 2'); @@ -1782,6 +1839,7 @@ function processMathop(item) { case 'fsub': return getFastValue(idents[0], '-', idents[1], item.type); case 'fdiv': return getFastValue(idents[0], '/', idents[1], item.type); case 'fmul': return getFastValue(idents[0], '*', idents[1], item.type); + case 'frem': return getFastValue(idents[0], '%', idents[1], item.type); case 'uitofp': case 'sitofp': return idents[0]; case 'fptoui': case 'fptosi': return makeRounding(idents[0], bitsLeft, op === 'fptosi', true); |