diff options
-rw-r--r-- | include/clang/Basic/DiagnosticSemaKinds.td | 3 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 9 | ||||
-rw-r--r-- | lib/Sema/SemaChecking.cpp | 21 | ||||
-rw-r--r-- | test/CodeGen/illegal-UTF8.m | 4 | ||||
-rw-r--r-- | test/Sema/builtins.c | 2 |
5 files changed, 22 insertions, 17 deletions
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 3bb85fc0f0..e3c9967f9e 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -3091,6 +3091,9 @@ def err_cfstring_literal_not_string_constant : Error< "CFString literal is not a string constant">; def warn_cfstring_literal_contains_nul_character : Warning< "CFString literal contains NUL character">; +def warn_cfstring_truncated : Warning< + "input conversion stopped due to an input byte that does not " + "belong to the input codeset UTF-8">; // Statements. def err_continue_not_in_loop : Error< diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index d125b370a0..6a527a229e 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1498,15 +1498,6 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map, &ToPtr, ToPtr + NumBytes, strictConversion); - // Check for conversion failure. - if (Result != conversionOK) { - // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove - // this duplicate code. - assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed"); - StringLength = NumBytes; - return Map.GetOrCreateValue(String); - } - // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index a0b4b988db..7b0941e34b 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -32,6 +32,8 @@ #include "llvm/Support/raw_ostream.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Basic/ConvertUTF.h" + #include <limits> using namespace clang; using namespace sema; @@ -581,9 +583,6 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { /// CheckObjCString - Checks that the argument to the builtin /// CFString constructor is correct -/// FIXME: GCC currently emits the following warning: -/// "warning: input conversion stopped due to an input byte that does not -/// belong to the input codeset UTF-8" /// Note: It might also make sense to do the UTF-16 conversion here (would /// simplify the backend). bool Sema::CheckObjCString(Expr *Arg) { @@ -602,7 +601,21 @@ bool Sema::CheckObjCString(Expr *Arg) { diag::warn_cfstring_literal_contains_nul_character) << Arg->getSourceRange(); } - + if (Literal->containsNonAsciiOrNull()) { + llvm::StringRef String = Literal->getString(); + unsigned NumBytes = String.size(); + llvm::SmallVector<UTF16, 128> ToBuf(NumBytes); + const UTF8 *FromPtr = (UTF8 *)String.data(); + UTF16 *ToPtr = &ToBuf[0]; + + ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, + &ToPtr, ToPtr + NumBytes, + strictConversion); + // Check for conversion failure. + if (Result != conversionOK) + Diag(Arg->getLocStart(), + diag::warn_cfstring_truncated) << Arg->getSourceRange(); + } return false; } diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m index 871e6e5956..4762e80025 100644 --- a/test/CodeGen/illegal-UTF8.m +++ b/test/CodeGen/illegal-UTF8.m @@ -2,7 +2,5 @@ @class NSString; -// FIXME: GCC emits the following warning: -// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8 -NSString *S = @"\xff\xff___WAIT___"; +NSString *S = @"\xff\xff___WAIT___"; // expected-warning {{input conversion stopped due to an input byte that does not belong to the input codeset UTF-8}} diff --git a/test/Sema/builtins.c b/test/Sema/builtins.c index 787630c1a8..21a1f72e1d 100644 --- a/test/Sema/builtins.c +++ b/test/Sema/builtins.c @@ -26,7 +26,7 @@ int test6(float a, long double b) { #define CFSTR __builtin___CFStringMakeConstantString void test7() { const void *X; - X = CFSTR("\242"); + X = CFSTR("\242"); // expected-warning {{input conversion stopped}} X = CFSTR("\0"); // expected-warning {{ CFString literal contains NUL character }} X = CFSTR(242); // expected-error {{ CFString literal is not a string constant }} expected-warning {{incompatible integer to pointer conversion}} X = CFSTR("foo", "bar"); // expected-error {{too many arguments to function call}} |