diff options
author | Alon Zakai <alonzakai@gmail.com> | 2012-07-14 19:01:09 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2012-07-14 19:01:09 -0700 |
commit | 4ffb769331329f8997d3597854cabe4e109e8684 (patch) | |
tree | 12ceefb66430ba98eeda93eaa77d721c1de11dd7 | |
parent | 19e90e544bf933609094389536cfb123313bf9df (diff) |
utf-8 support
-rw-r--r-- | src/library.js | 34 | ||||
-rwxr-xr-x | tests/runner.py | 13 |
2 files changed, 46 insertions, 1 deletions
diff --git a/src/library.js b/src/library.js index bb73c48a..a763bd9c 100644 --- a/src/library.js +++ b/src/library.js @@ -367,12 +367,44 @@ LibraryManager.library = { return input.cache.shift(); }; } + + var utf8Buffer = []; + var utf8Needed = 0; + function utf8Processor(code) { + code = code & 0xff; + if (utf8Needed) { + utf8Buffer.push(code); + utf8Needed--; + } + if (utf8Buffer.length == 0) { + if (code < 128) return String.fromCharCode(code); + utf8Buffer.push(code); + if (code > 191 && code < 224) { + utf8Needed = 1; + } else { + utf8Needed = 2; + } + return ''; + } + if (utf8Needed > 0) return ''; + var c1 = utf8Buffer[0]; + var c2 = utf8Buffer[1]; + var c3 = utf8Buffer[2]; + var ret; + if (c1 > 191 && c1 < 224) { + ret = String.fromCharCode(((c1 & 31) << 6) | (c2 & 63)); + } else { + ret = String.fromCharCode(((c1 & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); + } + utf8Buffer.length = 0; + return ret; + } function simpleOutput(val) { if (val === null || val === '\n'.charCodeAt(0)) { output.printer(output.buffer.join('')); output.buffer = []; } else { - output.buffer.push(String.fromCharCode(val)); + output.buffer.push(utf8Processor(val)); } } if (!output) { diff --git a/tests/runner.py b/tests/runner.py index 01a4fb61..c7d1ff28 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# This Python file uses the following encoding: utf-8 ''' Simple test runner @@ -4478,6 +4479,18 @@ def process(filename): ''' self.do_run(src, re.sub('(^|\n)\s+', '\\1', expected), post_build=add_pre_run_and_checks) + def test_utf(self): + self.banned_js_engines = [SPIDERMONKEY_ENGINE] # only node handles utf well + src = r''' + #include <stdio.h> + + int main() { + char *c = "μ†ℱ ╋ℯ╳╋"; + printf("%d %d %d %d %s\n", c[0]&0xff, c[1]&0xff, c[2]&0xff, c[3]&0xff, c); + } + ''' + self.do_run(src, '206 188 226 128 μ†ℱ ╋ℯ╳╋\n'); + def test_direct_string_constant_usage(self): if self.emcc_args is None: return self.skip('requires libcxx') |