diff options
-rw-r--r-- | src/preamble.js | 30 | ||||
-rw-r--r-- | tests/test_core.py | 2 | ||||
-rw-r--r-- | tests/utf32.cpp | 57 |
3 files changed, 73 insertions, 16 deletions
diff --git a/src/preamble.js b/src/preamble.js index 5f0d720e..abcd1c67 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -569,6 +569,36 @@ function Pointer_stringify(ptr, /* optional */ length) { } Module['Pointer_stringify'] = Pointer_stringify; +// Given a pointer 'ptr' to a null-terminated UTF16LE-encoded string in the emscripten HEAP, returns +// a copy of that string as a Javascript String object. +function UTF16ToString(ptr) { + var i = 0; + + var str = ''; + while (1) { + var codeUnit = {{{ makeGetValue('ptr', 'i*2', 'i16') }}}; + if (codeUnit == 0) + return str; + ++i; + // fromCharCode constructs a character from a UTF-16 code unit, so we can pass the UTF16 string right through. + str += String.fromCharCode(codeUnit); + } +} +Module['UTF16ToString'] = UTF16ToString; + +// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', +// null-terminated and encoded in UTF16LE form. The copy will require at most (str.length*2+1)*2 bytes of space in the HEAP. +function stringToUTF16(str, outPtr) { + for(var i = 0; i < str.length; ++i) { + // charCodeAt returns a UTF-16 encoded code unit, so it can be directly written to the HEAP. + var codeUnit = str.charCodeAt(i); // possibly a lead surrogate + {{{ makeSetValue('outPtr', 'i*2', 'codeUnit', 'i16') }}} + } + // Null-terminate the pointer to the HEAP. + {{{ makeSetValue('outPtr', 'str.length*2', 0, 'i16') }}} +} +Module['stringToUTF16'] = stringToUTF16; + // Given a pointer 'ptr' to a null-terminated UTF32LE-encoded string in the emscripten HEAP, returns // a copy of that string as a Javascript String object. function UTF32ToString(ptr) { diff --git a/tests/test_core.py b/tests/test_core.py index 739d1d7c..1dd07307 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7527,8 +7527,8 @@ def process(filename): def test_utf32(self): if self.emcc_args is None: return self.skip('need libc for wcslen()') - self.do_run(open(path_from_root('tests', 'utf32.cpp')).read(), 'OK.') + self.do_run(open(path_from_root('tests', 'utf32.cpp')).read(), 'OK.', args=['-fshort-wchar']) def test_direct_string_constant_usage(self): if self.emcc_args is None: return self.skip('requires libcxx') diff --git a/tests/utf32.cpp b/tests/utf32.cpp index 526efe76..6b75b244 100644 --- a/tests/utf32.cpp +++ b/tests/utf32.cpp @@ -4,23 +4,50 @@ #include <cassert> #include <wchar.h> -// This code tests that utf32-encoded std::wstrings can be marshalled between C++ and JS. +typedef unsigned int utf32; +typedef unsigned short utf16; + +// This code tests that Unicode std::wstrings can be marshalled between C++ and JS. int main() { std::wstring wstr = L"abc\u2603\u20AC\U0002007C123 --- abc\u2603\u20AC\U0002007C123"; // U+2603 is snowman, U+20AC is the Euro sign, U+2007C is a Chinese Han character that looks like three raindrops. - const int len = (wstr.length()+1)*4; - char *memory = new char[len]; - - asm("var str = Module.UTF32ToString(%0);" - "Module.print(str);" - "Module.stringToUTF32(str, %1);" - : - : "r"(wstr.c_str()), "r"(memory)); - - // Compare memory to confirm that the string is intact after taking a route through JS side. - const char *srcPtr = reinterpret_cast<const char *>(wstr.c_str()); - for(int i = 0; i < len; ++i) { - assert(memory[i] == srcPtr[i]); + + printf("sizeof(wchar_t): %d.\n", (int)sizeof(wchar_t)); + + if (sizeof(wchar_t) == 4) { + utf32 *memory = new utf32[wstr.length()+1]; + + asm("var str = Module.UTF32ToString(%0);" + "Module.print(str);" + "Module.stringToUTF32(str, %1);" + : + : "r"(wstr.c_str()), "r"(memory)); + + // Compare memory to confirm that the string is intact after taking a route through JS side. + const utf32 *srcPtr = reinterpret_cast<const utf32 *>(wstr.c_str()); + for(int i = 0;; ++i) { + assert(memory[i] == srcPtr[i]); + if (srcPtr[i] == 0) + break; + } + delete[] memory; + } else { // sizeof(wchar_t) == 2, and we're building with -fshort-wchar. + utf16 *memory = new utf16[2*wstr.length()+1]; + + asm("var str = Module.UTF16ToString(%0);" + "Module.print(str);" + "Module.stringToUTF16(str, %1);" + : + : "r"(wstr.c_str()), "r"(memory)); + + // Compare memory to confirm that the string is intact after taking a route through JS side. + const utf16 *srcPtr = reinterpret_cast<const utf16 *>(wstr.c_str()); + for(int i = 0;; ++i) { + assert(memory[i] == srcPtr[i]); + if (srcPtr[i] == 0) + break; + } + delete[] memory; } + printf("OK.\n"); - delete[] memory; } |