diff options
author | Jordan Rose <jordan_rose@apple.com> | 2013-01-24 20:50:50 +0000 |
---|---|---|
committer | Jordan Rose <jordan_rose@apple.com> | 2013-01-24 20:50:50 +0000 |
commit | fc12060ed595fd23d731b8a86adb21ddbb8c7bfb (patch) | |
tree | 9a891734a848b7f714b2cb7940079661b5cfbe5d /lib/Lex/Lexer.cpp | |
parent | c7629d941557f7179eb8fa8a2e2a74d749cbaf7c (diff) |
As an extension, treat Unicode whitespace characters as whitespace.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173370 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r-- | lib/Lex/Lexer.cpp | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index e6ffca9554..2a57e6fced 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2791,7 +2791,30 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, return CodePoint; } +static bool isUnicodeWhitespace(uint32_t C) { + return (C == 0x0085 || C == 0x00A0 || C == 0x1680 || + C == 0x180E || (C >= 0x2000 && C <= 0x200A) || + C == 0x2028 || C == 0x2029 || C == 0x202F || + C == 0x205F || C == 0x3000); +} + void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { + if (isUnicodeWhitespace(C)) { + if (!isLexingRawMode()) { + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::ext_unicode_whitespace) + << CharRange; + } + + Result.setFlag(Token::LeadingSpace); + if (SkipWhitespace(Result, CurPtr)) + return; // KeepWhitespaceMode + + return LexTokenInternal(Result); + } + if (isAllowedIDChar(C) && isAllowedInitiallyIDChar(C)) { MIOpt.ReadToken(); return LexIdentifier(Result, CurPtr); |