aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
authorJordan Rose <jordan_rose@apple.com>2013-01-24 20:50:50 +0000
committerJordan Rose <jordan_rose@apple.com>2013-01-24 20:50:50 +0000
commitfc12060ed595fd23d731b8a86adb21ddbb8c7bfb (patch)
tree9a891734a848b7f714b2cb7940079661b5cfbe5d /lib/Lex/Lexer.cpp
parentc7629d941557f7179eb8fa8a2e2a74d749cbaf7c (diff)
As an extension, treat Unicode whitespace characters as whitespace.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173370 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index e6ffca9554..2a57e6fced 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -2791,7 +2791,30 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
return CodePoint;
}
+static bool isUnicodeWhitespace(uint32_t C) {
+ return (C == 0x0085 || C == 0x00A0 || C == 0x1680 ||
+ C == 0x180E || (C >= 0x2000 && C <= 0x200A) ||
+ C == 0x2028 || C == 0x2029 || C == 0x202F ||
+ C == 0x205F || C == 0x3000);
+}
+
void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+ if (isUnicodeWhitespace(C)) {
+ if (!isLexingRawMode()) {
+ CharSourceRange CharRange =
+ CharSourceRange::getCharRange(getSourceLocation(),
+ getSourceLocation(CurPtr));
+ Diag(BufferPtr, diag::ext_unicode_whitespace)
+ << CharRange;
+ }
+
+ Result.setFlag(Token::LeadingSpace);
+ if (SkipWhitespace(Result, CurPtr))
+ return; // KeepWhitespaceMode
+
+ return LexTokenInternal(Result);
+ }
+
if (isAllowedIDChar(C) && isAllowedInitiallyIDChar(C)) {
MIOpt.ReadToken();
return LexIdentifier(Result, CurPtr);