aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/Basic/DiagnosticSemaKinds.td3
-rw-r--r--lib/CodeGen/CodeGenModule.cpp9
-rw-r--r--lib/Sema/SemaChecking.cpp21
-rw-r--r--test/CodeGen/illegal-UTF8.m4
-rw-r--r--test/Sema/builtins.c2
5 files changed, 22 insertions, 17 deletions
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 3bb85fc0f0..e3c9967f9e 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3091,6 +3091,9 @@ def err_cfstring_literal_not_string_constant : Error<
"CFString literal is not a string constant">;
def warn_cfstring_literal_contains_nul_character : Warning<
"CFString literal contains NUL character">;
+def warn_cfstring_truncated : Warning<
+ "input conversion stopped due to an input byte that does not "
+ "belong to the input codeset UTF-8">;
// Statements.
def err_continue_not_in_loop : Error<
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index d125b370a0..6a527a229e 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1498,15 +1498,6 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
&ToPtr, ToPtr + NumBytes,
strictConversion);
- // Check for conversion failure.
- if (Result != conversionOK) {
- // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove
- // this duplicate code.
- assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed");
- StringLength = NumBytes;
- return Map.GetOrCreateValue(String);
- }
-
// ConvertUTF8toUTF16 returns the length in ToPtr.
StringLength = ToPtr - &ToBuf[0];
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index a0b4b988db..7b0941e34b 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -32,6 +32,8 @@
#include "llvm/Support/raw_ostream.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/ConvertUTF.h"
+
#include <limits>
using namespace clang;
using namespace sema;
@@ -581,9 +583,6 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
/// CheckObjCString - Checks that the argument to the builtin
/// CFString constructor is correct
-/// FIXME: GCC currently emits the following warning:
-/// "warning: input conversion stopped due to an input byte that does not
-/// belong to the input codeset UTF-8"
/// Note: It might also make sense to do the UTF-16 conversion here (would
/// simplify the backend).
bool Sema::CheckObjCString(Expr *Arg) {
@@ -602,7 +601,21 @@ bool Sema::CheckObjCString(Expr *Arg) {
diag::warn_cfstring_literal_contains_nul_character)
<< Arg->getSourceRange();
}
-
+ if (Literal->containsNonAsciiOrNull()) {
+ llvm::StringRef String = Literal->getString();
+ unsigned NumBytes = String.size();
+ llvm::SmallVector<UTF16, 128> ToBuf(NumBytes);
+ const UTF8 *FromPtr = (UTF8 *)String.data();
+ UTF16 *ToPtr = &ToBuf[0];
+
+ ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
+ &ToPtr, ToPtr + NumBytes,
+ strictConversion);
+ // Check for conversion failure.
+ if (Result != conversionOK)
+ Diag(Arg->getLocStart(),
+ diag::warn_cfstring_truncated) << Arg->getSourceRange();
+ }
return false;
}
diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m
index 871e6e5956..4762e80025 100644
--- a/test/CodeGen/illegal-UTF8.m
+++ b/test/CodeGen/illegal-UTF8.m
@@ -2,7 +2,5 @@
@class NSString;
-// FIXME: GCC emits the following warning:
-// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
-NSString *S = @"\xff\xff___WAIT___";
+NSString *S = @"\xff\xff___WAIT___"; // expected-warning {{input conversion stopped due to an input byte that does not belong to the input codeset UTF-8}}
diff --git a/test/Sema/builtins.c b/test/Sema/builtins.c
index 787630c1a8..21a1f72e1d 100644
--- a/test/Sema/builtins.c
+++ b/test/Sema/builtins.c
@@ -26,7 +26,7 @@ int test6(float a, long double b) {
#define CFSTR __builtin___CFStringMakeConstantString
void test7() {
const void *X;
- X = CFSTR("\242");
+ X = CFSTR("\242"); // expected-warning {{input conversion stopped}}
X = CFSTR("\0"); // expected-warning {{ CFString literal contains NUL character }}
X = CFSTR(242); // expected-error {{ CFString literal is not a string constant }} expected-warning {{incompatible integer to pointer conversion}}
X = CFSTR("foo", "bar"); // expected-error {{too many arguments to function call}}