Have Sema check for validity of CGString literal

instead of asserting in IRGen. Fixes radar 8390459. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@113253 91177308-0d34-0410-b5e6-96231b3b80d8
author: Fariborz Jahanian <fjahanian@apple.com> 2010-09-07 19:38:13 +0000
committer: Fariborz Jahanian <fjahanian@apple.com> 2010-09-07 19:38:13 +0000
commit: 7da71020b97c69b95831bd03d35e8e8404bfdea0 (patch)
tree: 4be9be364a6c54b743bbcb015427e4b84ae5ff88 /lib
parent: 0f048a44b5280f5fae28b9363ed4eab618bb1083 (diff)
2 files changed, 17 insertions, 13 deletions
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index d125b370a0..6a527a229e 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1498,15 +1498,6 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
                                                &ToPtr, ToPtr + NumBytes,
                                                strictConversion);
 
-  // Check for conversion failure.
-  if (Result != conversionOK) {
-    // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove
-    // this duplicate code.
-    assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed");
-    StringLength = NumBytes;
-    return Map.GetOrCreateValue(String);
-  }
-
   // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
 
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index a0b4b988db..7b0941e34b 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -32,6 +32,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/ConvertUTF.h"
+
 #include <limits>
 using namespace clang;
 using namespace sema;
@@ -581,9 +583,6 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
 
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
-/// FIXME: GCC currently emits the following warning:
-/// "warning: input conversion stopped due to an input byte that does not
-///           belong to the input codeset UTF-8"
 /// Note: It might also make sense to do the UTF-16 conversion here (would
 /// simplify the backend).
 bool Sema::CheckObjCString(Expr *Arg) {
@@ -602,7 +601,21 @@ bool Sema::CheckObjCString(Expr *Arg) {
          diag::warn_cfstring_literal_contains_nul_character)
       << Arg->getSourceRange();
   }
-
+  if (Literal->containsNonAsciiOrNull()) {
+    llvm::StringRef String = Literal->getString();
+    unsigned NumBytes = String.size();
+    llvm::SmallVector<UTF16, 128> ToBuf(NumBytes);
+    const UTF8 *FromPtr = (UTF8 *)String.data();
+    UTF16 *ToPtr = &ToBuf[0];
+    
+    ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
+                                                 &ToPtr, ToPtr + NumBytes,
+                                                 strictConversion);
+    // Check for conversion failure.
+    if (Result != conversionOK)
+      Diag(Arg->getLocStart(),
+           diag::warn_cfstring_truncated) << Arg->getSourceRange();
+  }
   return false;
 }
author	Fariborz Jahanian <fjahanian@apple.com>	2010-09-07 19:38:13 +0000
committer	Fariborz Jahanian <fjahanian@apple.com>	2010-09-07 19:38:13 +0000
commit	7da71020b97c69b95831bd03d35e8e8404bfdea0 (patch)
tree	4be9be364a6c54b743bbcb015427e4b84ae5ff88 /lib
parent	0f048a44b5280f5fae28b9363ed4eab618bb1083 (diff)