aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Naroff <snaroff@apple.com>2009-04-13 19:08:08 +0000
committerSteve Naroff <snaroff@apple.com>2009-04-13 19:08:08 +0000
commitaa4a756185e77755aaa10ae50db08ae5be58e70a (patch)
treed3105e387c5e58ebcb509970c89dbb8766323e2d
parent0a2b45e5885b6b8477b167042c0f6cd1d99a1f13 (diff)
Fixed crasher in <rdar://problem/6780904> [irgen] Assertion failed: (Result == conversionOK && "UTF-8 to UTF-16 conversion failed"), function GetAddrOfConstantCFString, file CodeGenModule.cpp, line 1063.
Still a diagnostic related FIXME (will discuss with Daniel/Fariborz offline). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@68975 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CodeGenModule.cpp27
-rw-r--r--test/CodeGen/illegal-UTF8.m8
2 files changed, 26 insertions, 9 deletions
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index ce893dbcfd..797c0cef00 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1060,16 +1060,25 @@ GetAddrOfConstantCFString(const StringLiteral *Literal) {
Result = ConvertUTF8toUTF16(&FromPtr, FromPtr+Literal->getByteLength(),
&ToPtr, ToPtr+Literal->getByteLength(),
strictConversion);
- assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed");
+ if (Result == conversionOK) {
+ // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
+ // without doing more surgery to this routine. Since we aren't explicitly
+ // checking for endianness here, it's also a bug (when generating code for
+ // a target that doesn't match the host endianness). Modeling this as an
+ // i16 array is likely the cleanest solution.
+ StringLength = ToPtr-&ToBuf[0];
+ str.assign((char *)&ToBuf[0], StringLength*2);// Twice as many UTF8 chars.
+ isUTF16 = true;
+ } else if (Result == sourceIllegal) {
+ // FIXME: GCC currently emits the following warning (in the backend):
+ // "warning: input conversion stopped due to an input byte that does not
+ // belong to the input codeset UTF-8"
+ // The clang backend doesn't currently emit any warnings.
+ str.assign(Literal->getStrData(), Literal->getByteLength());
+ StringLength = str.length();
+ } else
+ assert(Result == conversionOK && "UTF-8 to UTF-16 conversion failed");
- // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
- // without doing more surgery to this routine. Since we aren't explicitly
- // checking for endianness here, it's also a bug (when generating code for
- // a target that doesn't match the host endianness). Modeling this as an i16
- // array is likely the cleanest solution.
- StringLength = ToPtr-&ToBuf[0];
- str.assign((char *)&ToBuf[0], StringLength*2); // Twice as many UTF8 chars.
- isUTF16 = true;
} else {
str.assign(Literal->getStrData(), Literal->getByteLength());
StringLength = str.length();
diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m
new file mode 100644
index 0000000000..88467b6d41
--- /dev/null
+++ b/test/CodeGen/illegal-UTF8.m
@@ -0,0 +1,8 @@
+// RUN: clang %s -S -m64
+
+@class NSString;
+
+// FIXME: GCC emits the following warning:
+// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
+
+NSString *S = @"\xff\xff___WAIT___";