diff options
author | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-09-08 07:16:20 +0000 |
---|---|---|
committer | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-09-08 07:16:20 +0000 |
commit | e5f0588840b20897631cc8110344fd2745ef4caa (patch) | |
tree | 8fb7656a4564a1c23cb52532ca9622efe9f9ba67 /lib/Basic/ConvertUTF.c | |
parent | 49f9434a4a69d56779aa37feb2d85e06e7289fce (diff) |
When a bad UTF-8 encoding or bogus escape sequence is encountered in a
string literal, produce a diagnostic pointing at the erroneous character
range, not at the start of the literal.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@163459 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Basic/ConvertUTF.c')
-rw-r--r-- | lib/Basic/ConvertUTF.c | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c index 2e25e79c4c..ec57be701a 100644 --- a/lib/Basic/ConvertUTF.c +++ b/lib/Basic/ConvertUTF.c @@ -393,15 +393,25 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { /* --------------------------------------------------------------------- */ /* + * Exported function to return the total number of bytes in a codepoint + * represented in UTF-8, given the value of the first byte. + */ +unsigned getNumBytesForUTF8(UTF8 first) { + return trailingBytesForUTF8[first] + 1; +} + +/* --------------------------------------------------------------------- */ + +/* * Exported function to return whether a UTF-8 string is legal or not. * This is not used here; it's just exported. */ -Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) { - while (source != sourceEnd) { - int length = trailingBytesForUTF8[*source] + 1; - if (length > sourceEnd - source || !isLegalUTF8(source, length)) +Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { + while (*source != sourceEnd) { + int length = trailingBytesForUTF8[**source] + 1; + if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) return false; - source += length; + *source += length; } return true; } |