aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/preamble.js30
-rw-r--r--tests/test_core.py2
-rw-r--r--tests/utf32.cpp57
3 files changed, 73 insertions, 16 deletions
diff --git a/src/preamble.js b/src/preamble.js
index 5f0d720e..abcd1c67 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -569,6 +569,36 @@ function Pointer_stringify(ptr, /* optional */ length) {
}
Module['Pointer_stringify'] = Pointer_stringify;
+// Given a pointer 'ptr' to a null-terminated UTF16LE-encoded string in the emscripten HEAP, returns
+// a copy of that string as a Javascript String object.
+function UTF16ToString(ptr) {
+ var i = 0;
+
+ var str = '';
+ while (1) {
+ var codeUnit = {{{ makeGetValue('ptr', 'i*2', 'i16') }}};
+ if (codeUnit == 0)
+ return str;
+ ++i;
+ // fromCharCode constructs a character from a UTF-16 code unit, so we can pass the UTF16 string right through.
+ str += String.fromCharCode(codeUnit);
+ }
+}
+Module['UTF16ToString'] = UTF16ToString;
+
+// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',
+// null-terminated and encoded in UTF16LE form. The copy will require at most (str.length*2+1)*2 bytes of space in the HEAP.
+function stringToUTF16(str, outPtr) {
+ for(var i = 0; i < str.length; ++i) {
+ // charCodeAt returns a UTF-16 encoded code unit, so it can be directly written to the HEAP.
+ var codeUnit = str.charCodeAt(i); // possibly a lead surrogate
+ {{{ makeSetValue('outPtr', 'i*2', 'codeUnit', 'i16') }}}
+ }
+ // Null-terminate the pointer to the HEAP.
+ {{{ makeSetValue('outPtr', 'str.length*2', 0, 'i16') }}}
+}
+Module['stringToUTF16'] = stringToUTF16;
+
// Given a pointer 'ptr' to a null-terminated UTF32LE-encoded string in the emscripten HEAP, returns
// a copy of that string as a Javascript String object.
function UTF32ToString(ptr) {
diff --git a/tests/test_core.py b/tests/test_core.py
index 739d1d7c..1dd07307 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -7527,8 +7527,8 @@ def process(filename):
def test_utf32(self):
if self.emcc_args is None: return self.skip('need libc for wcslen()')
-
self.do_run(open(path_from_root('tests', 'utf32.cpp')).read(), 'OK.')
+ self.do_run(open(path_from_root('tests', 'utf32.cpp')).read(), 'OK.', args=['-fshort-wchar'])
def test_direct_string_constant_usage(self):
if self.emcc_args is None: return self.skip('requires libcxx')
diff --git a/tests/utf32.cpp b/tests/utf32.cpp
index 526efe76..6b75b244 100644
--- a/tests/utf32.cpp
+++ b/tests/utf32.cpp
@@ -4,23 +4,50 @@
#include <cassert>
#include <wchar.h>
-// This code tests that utf32-encoded std::wstrings can be marshalled between C++ and JS.
+typedef unsigned int utf32;
+typedef unsigned short utf16;
+
+// This code tests that Unicode std::wstrings can be marshalled between C++ and JS.
int main() {
std::wstring wstr = L"abc\u2603\u20AC\U0002007C123 --- abc\u2603\u20AC\U0002007C123"; // U+2603 is snowman, U+20AC is the Euro sign, U+2007C is a Chinese Han character that looks like three raindrops.
- const int len = (wstr.length()+1)*4;
- char *memory = new char[len];
-
- asm("var str = Module.UTF32ToString(%0);"
- "Module.print(str);"
- "Module.stringToUTF32(str, %1);"
- :
- : "r"(wstr.c_str()), "r"(memory));
-
- // Compare memory to confirm that the string is intact after taking a route through JS side.
- const char *srcPtr = reinterpret_cast<const char *>(wstr.c_str());
- for(int i = 0; i < len; ++i) {
- assert(memory[i] == srcPtr[i]);
+
+ printf("sizeof(wchar_t): %d.\n", (int)sizeof(wchar_t));
+
+ if (sizeof(wchar_t) == 4) {
+ utf32 *memory = new utf32[wstr.length()+1];
+
+ asm("var str = Module.UTF32ToString(%0);"
+ "Module.print(str);"
+ "Module.stringToUTF32(str, %1);"
+ :
+ : "r"(wstr.c_str()), "r"(memory));
+
+ // Compare memory to confirm that the string is intact after taking a route through JS side.
+ const utf32 *srcPtr = reinterpret_cast<const utf32 *>(wstr.c_str());
+ for(int i = 0;; ++i) {
+ assert(memory[i] == srcPtr[i]);
+ if (srcPtr[i] == 0)
+ break;
+ }
+ delete[] memory;
+ } else { // sizeof(wchar_t) == 2, and we're building with -fshort-wchar.
+ utf16 *memory = new utf16[2*wstr.length()+1];
+
+ asm("var str = Module.UTF16ToString(%0);"
+ "Module.print(str);"
+ "Module.stringToUTF16(str, %1);"
+ :
+ : "r"(wstr.c_str()), "r"(memory));
+
+ // Compare memory to confirm that the string is intact after taking a route through JS side.
+ const utf16 *srcPtr = reinterpret_cast<const utf16 *>(wstr.c_str());
+ for(int i = 0;; ++i) {
+ assert(memory[i] == srcPtr[i]);
+ if (srcPtr[i] == 0)
+ break;
+ }
+ delete[] memory;
}
+
printf("OK.\n");
- delete[] memory;
}