diff options
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/Lexer.cpp | 37 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 57 |
2 files changed, 88 insertions, 6 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index c9f73764c9..2b24d1cc75 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1078,6 +1078,12 @@ static void InitCharacterInfo() { } +/// isIdentifierHead - Return true if this is the first character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierHead(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} + /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. static inline bool isIdentifierBody(unsigned char c) { @@ -1543,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; - while (isNumberBody(C)) { // FIXME: UCNs? + while (isNumberBody(C)) { // FIXME: UCNs. CurPtr = ConsumeChar(CurPtr, Size, Result); PrevCh = C; C = getCharAndSize(CurPtr, Size); @@ -1567,6 +1573,23 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { Result.setLiteralData(TokStart); } +/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes +/// in C++11. +const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { + assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11"); + + // Maximally munch an identifier. FIXME: UCNs. + unsigned Size; + char C = getCharAndSize(CurPtr, Size); + if (isIdentifierHead(C)) { + do { + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + } while (isIdentifierBody(C)); + } + return CurPtr; +} + /// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// either " or L" or u8" or u" or U". void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, @@ -1606,6 +1629,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_string); @@ -1685,6 +1712,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, } } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); @@ -1768,6 +1799,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the character, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_char); diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 547bd4e0c8..e3ff77f4f0 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { } -/// character-literal: [C++0x lex.ccon] +/// user-defined-character-literal: [C++11 lex.ext] +/// character-literal ud-suffix +/// ud-suffix: +/// identifier +/// character-literal: [C++11 lex.ccon] /// ' c-char-sequence ' /// u' c-char-sequence ' /// U' c-char-sequence ' @@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// backslash \, or new-line character /// escape-sequence /// universal-character-name -/// escape-sequence: [C++0x lex.ccon] +/// escape-sequence: /// simple-escape-sequence /// octal-escape-sequence /// hexadecimal-escape-sequence @@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// hexadecimal-escape-sequence: /// \x hexadecimal-digit /// hexadecimal-escape-sequence hexadecimal-digit -/// universal-character-name: +/// universal-character-name: [C++11 lex.charset] /// \u hex-quad /// \U hex-quad hex-quad /// hex-quad: @@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(begin[0] == '\'' && "Invalid token lexed"); ++begin; + // Remove an optional ud-suffix. + if (end[-1] != '\'') { + const char *UDSuffixEnd = end; + do { + --end; + } while (end[-1] != '\''); + UDSuffixBuf.assign(end, UDSuffixEnd); + } + // Trim the ending quote. - assert(end[-1] == '\'' && "Invalid token lexed"); + assert(end != begin && "Invalid token lexed"); --end; // FIXME: The "Value" is an uint64_t so we can handle char literals of @@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ Pascal = false; + SourceLocation UDSuffixTokLoc; + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know @@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ continue; } - const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + const char *ThisTokEnd = ThisTokBuf+ThisTokLen; + + // Remove an optional ud-suffix. + if (ThisTokEnd[-1] != '"') { + const char *UDSuffixEnd = ThisTokEnd; + do { + --ThisTokEnd; + } while (ThisTokEnd[-1] != '"'); + + StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); + + if (UDSuffixBuf.empty()) { + UDSuffixBuf.assign(UDSuffix); + UDSuffixTokLoc = StringToks[i].getLocation(); + } else if (!UDSuffixBuf.equals(UDSuffix)) { + // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the + // result of a concatenation involving at least one user-defined-string- + // literal, all the participating user-defined-string-literals shall + // have the same ud-suffix. + if (Diags) { + SourceLocation TokLoc = StringToks[i].getLocation(); + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) + << SourceRange(TokLoc, TokLoc); + } + hadError = true; + } + } + + // Strip the end quote. + --ThisTokEnd; + // TODO: Input character set mapping support. // Skip marker for wide or unicode strings. |