aboutsummaryrefslogtreecommitdiff
path: root/include/clang/Basic/ConvertUTF.h
diff options
context:
space:
mode:
authorJordan Rose <jordan_rose@apple.com>2013-01-24 20:50:46 +0000
committerJordan Rose <jordan_rose@apple.com>2013-01-24 20:50:46 +0000
commitc7629d941557f7179eb8fa8a2e2a74d749cbaf7c (patch)
tree5d1833c5ca29ada22f679ebee8488273fcaf9777 /include/clang/Basic/ConvertUTF.h
parent5209e2bc4d18e679dcacfd6f6a0120aa1d4a757f (diff)
Handle universal character names and Unicode characters outside of literals.
This is a missing piece for C99 conformance. This patch handles UCNs by adding a '\\' case to LexTokenInternal and LexIdentifier -- if we see a backslash, we tentatively try to read in a UCN. If the UCN is not syntactically well-formed, we fall back to the old treatment: a backslash followed by an identifier beginning with 'u' (or 'U'). Because the spelling of an identifier with UCNs still has the UCN in it, we need to convert that to UTF-8 in Preprocessor::LookUpIdentifierInfo. Of course, valid code that does *not* use UCNs will see only a very minimal performance hit (checks after each identifier for non-ASCII characters, checks when converting raw_identifiers to identifiers that they do not contain UCNs, and checks when getting the spelling of an identifier that it does not contain a UCN). This patch also adds basic support for actual UTF-8 in the source. This is treated almost exactly the same as UCNs except that we consider stray Unicode characters to be mistakes and offer a fixit to remove them. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173369 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/ConvertUTF.h')
-rw-r--r--include/clang/Basic/ConvertUTF.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h
index fb05afdae7..38956ee340 100644
--- a/include/clang/Basic/ConvertUTF.h
+++ b/include/clang/Basic/ConvertUTF.h
@@ -161,6 +161,16 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
unsigned getNumBytesForUTF8(UTF8 firstByte);
+static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
+ const UTF8 *sourceEnd,
+ UTF32 *target,
+ ConversionFlags flags) {
+ unsigned size = getNumBytesForUTF8(**source);
+ if (size > sourceEnd - *source)
+ return sourceExhausted;
+ return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
+}
+
#ifdef __cplusplus
}