diff options
author | Fariborz Jahanian <fjahanian@apple.com> | 2010-09-07 19:38:13 +0000 |
---|---|---|
committer | Fariborz Jahanian <fjahanian@apple.com> | 2010-09-07 19:38:13 +0000 |
commit | 7da71020b97c69b95831bd03d35e8e8404bfdea0 (patch) | |
tree | 4be9be364a6c54b743bbcb015427e4b84ae5ff88 /lib | |
parent | 0f048a44b5280f5fae28b9363ed4eab618bb1083 (diff) |
Have Sema check for validity of CGString literal
instead of asserting in IRGen. Fixes radar 8390459.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@113253 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 9 | ||||
-rw-r--r-- | lib/Sema/SemaChecking.cpp | 21 |
2 files changed, 17 insertions, 13 deletions
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index d125b370a0..6a527a229e 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1498,15 +1498,6 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map, &ToPtr, ToPtr + NumBytes, strictConversion); - // Check for conversion failure. - if (Result != conversionOK) { - // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove - // this duplicate code. - assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed"); - StringLength = NumBytes; - return Map.GetOrCreateValue(String); - } - // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index a0b4b988db..7b0941e34b 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -32,6 +32,8 @@ #include "llvm/Support/raw_ostream.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" +#include "clang/Basic/ConvertUTF.h" + #include <limits> using namespace clang; using namespace sema; @@ -581,9 +583,6 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { /// CheckObjCString - Checks that the argument to the builtin /// CFString constructor is correct -/// FIXME: GCC currently emits the following warning: -/// "warning: input conversion stopped due to an input byte that does not -/// belong to the input codeset UTF-8" /// Note: It might also make sense to do the UTF-16 conversion here (would /// simplify the backend). bool Sema::CheckObjCString(Expr *Arg) { @@ -602,7 +601,21 @@ bool Sema::CheckObjCString(Expr *Arg) { diag::warn_cfstring_literal_contains_nul_character) << Arg->getSourceRange(); } - + if (Literal->containsNonAsciiOrNull()) { + llvm::StringRef String = Literal->getString(); + unsigned NumBytes = String.size(); + llvm::SmallVector<UTF16, 128> ToBuf(NumBytes); + const UTF8 *FromPtr = (UTF8 *)String.data(); + UTF16 *ToPtr = &ToBuf[0]; + + ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, + &ToPtr, ToPtr + NumBytes, + strictConversion); + // Check for conversion failure. + if (Result != conversionOK) + Diag(Arg->getLocStart(), + diag::warn_cfstring_truncated) << Arg->getSourceRange(); + } return false; } |