diff options
author | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-03-05 04:02:15 +0000 |
---|---|---|
committer | Richard Smith <richard-llvm@metafoo.co.uk> | 2012-03-05 04:02:15 +0000 |
commit | 5cc2c6eb67b6e5361bbe96f79b519fd62ec666d6 (patch) | |
tree | 2c1f49624f8fd182adf9d9473f9b9c6f1229183e | |
parent | 9d008fd572fa3411e93084d51f12ea12a998786c (diff) |
Lexing support for user-defined literals. Currently these lex as the same token
kinds as the underlying string literals, and we silently drop the ud-suffix;
those issues will be fixed by subsequent patches.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152012 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/DiagnosticLexKinds.td | 3 | ||||
-rw-r--r-- | include/clang/Lex/Lexer.h | 2 | ||||
-rw-r--r-- | include/clang/Lex/LiteralSupport.h | 5 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 37 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 57 | ||||
-rw-r--r-- | test/CXX/lex/lex.literal/lex.ext/p1.cpp | 4 | ||||
-rw-r--r-- | test/CXX/over/over.oper/over.literal/p5.cpp | 4 | ||||
-rw-r--r-- | test/CXX/over/over.oper/over.literal/p8.cpp | 4 | ||||
-rw-r--r-- | test/SemaCXX/cxx98-compat.cpp | 2 |
9 files changed, 107 insertions, 11 deletions
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td index 12f23cea50..96edbe040f 100644 --- a/include/clang/Basic/DiagnosticLexKinds.td +++ b/include/clang/Basic/DiagnosticLexKinds.td @@ -133,6 +133,9 @@ def warn_cxx98_compat_unicode_literal : Warning< InGroup<CXX98Compat>, DefaultIgnore; def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; +def err_string_concat_mixed_suffix : Error< + "differing user-defined suffixes ('%0' and '%1') in string literal " + "concatenation">; def err_bad_string_encoding : Error< "illegal character encoding in string literal">; def warn_bad_string_encoding : ExtWarn< diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index bad9844f66..df30394947 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -530,6 +530,8 @@ private: // Other lexer functions. void SkipBytes(unsigned Bytes, bool StartOfLine); + + const char *LexUDSuffix(Token &Result, const char *CurPtr); // Helper functions to lex the remainder of a token of the specific type. void LexIdentifier (Token &Result, const char *CurPtr); diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h index 6142f006c5..90ca58bff9 100644 --- a/include/clang/Lex/LiteralSupport.h +++ b/include/clang/Lex/LiteralSupport.h @@ -128,6 +128,7 @@ class CharLiteralParser { tok::TokenKind Kind; bool IsMultiChar; bool HadError; + SmallString<32> UDSuffixBuf; public: CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, @@ -140,6 +141,7 @@ public: bool isUTF32() const { return Kind == tok::utf32_char_constant; } bool isMultiChar() const { return IsMultiChar; } uint64_t getValue() const { return Value; } + StringRef getUDSuffix() const { return UDSuffixBuf; } }; /// StringLiteralParser - This decodes string escape characters and performs @@ -157,6 +159,7 @@ class StringLiteralParser { tok::TokenKind Kind; SmallString<512> ResultBuf; char *ResultPtr; // cursor + SmallString<32> UDSuffixBuf; public: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, Preprocessor &PP, bool Complain = true); @@ -196,6 +199,8 @@ public: bool isUTF32() const { return Kind == tok::utf32_string_literal; } bool isPascal() const { return Pascal; } + StringRef getUDSuffix() const { return UDSuffixBuf; } + private: void init(const Token *StringToks, unsigned NumStringToks); bool CopyStringFragment(StringRef Fragment); diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index c9f73764c9..2b24d1cc75 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -1078,6 +1078,12 @@ static void InitCharacterInfo() { } +/// isIdentifierHead - Return true if this is the first character of an +/// identifier, which is [a-zA-Z_]. +static inline bool isIdentifierHead(unsigned char c) { + return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false; +} + /// isIdentifierBody - Return true if this is the body character of an /// identifier, which is [a-zA-Z0-9_]. static inline bool isIdentifierBody(unsigned char c) { @@ -1543,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; - while (isNumberBody(C)) { // FIXME: UCNs? + while (isNumberBody(C)) { // FIXME: UCNs. CurPtr = ConsumeChar(CurPtr, Size, Result); PrevCh = C; C = getCharAndSize(CurPtr, Size); @@ -1567,6 +1573,23 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { Result.setLiteralData(TokStart); } +/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes +/// in C++11. +const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { + assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11"); + + // Maximally munch an identifier. FIXME: UCNs. + unsigned Size; + char C = getCharAndSize(CurPtr, Size); + if (isIdentifierHead(C)) { + do { + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + } while (isIdentifierBody(C)); + } + return CurPtr; +} + /// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// either " or L" or u8" or u" or U". void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, @@ -1606,6 +1629,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the string, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_string); @@ -1685,6 +1712,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, } } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Kind); @@ -1768,6 +1799,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, C = getAndAdvanceChar(CurPtr, Result); } + // If we are in C++11, lex the optional ud-suffix. + if (getFeatures().CPlusPlus0x) + CurPtr = LexUDSuffix(Result, CurPtr); + // If a nul character existed in the character, warn about it. if (NulCharacter && !isLexingRawMode()) Diag(NulCharacter, diag::null_in_char); diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 547bd4e0c8..e3ff77f4f0 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { } -/// character-literal: [C++0x lex.ccon] +/// user-defined-character-literal: [C++11 lex.ext] +/// character-literal ud-suffix +/// ud-suffix: +/// identifier +/// character-literal: [C++11 lex.ccon] /// ' c-char-sequence ' /// u' c-char-sequence ' /// U' c-char-sequence ' @@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// backslash \, or new-line character /// escape-sequence /// universal-character-name -/// escape-sequence: [C++0x lex.ccon] +/// escape-sequence: /// simple-escape-sequence /// octal-escape-sequence /// hexadecimal-escape-sequence @@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// hexadecimal-escape-sequence: /// \x hexadecimal-digit /// hexadecimal-escape-sequence hexadecimal-digit -/// universal-character-name: +/// universal-character-name: [C++11 lex.charset] /// \u hex-quad /// \U hex-quad hex-quad /// hex-quad: @@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, assert(begin[0] == '\'' && "Invalid token lexed"); ++begin; + // Remove an optional ud-suffix. + if (end[-1] != '\'') { + const char *UDSuffixEnd = end; + do { + --end; + } while (end[-1] != '\''); + UDSuffixBuf.assign(end, UDSuffixEnd); + } + // Trim the ending quote. - assert(end[-1] == '\'' && "Invalid token lexed"); + assert(end != begin && "Invalid token lexed"); --end; // FIXME: The "Value" is an uint64_t so we can handle char literals of @@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ Pascal = false; + SourceLocation UDSuffixTokLoc; + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know @@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ continue; } - const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + const char *ThisTokEnd = ThisTokBuf+ThisTokLen; + + // Remove an optional ud-suffix. + if (ThisTokEnd[-1] != '"') { + const char *UDSuffixEnd = ThisTokEnd; + do { + --ThisTokEnd; + } while (ThisTokEnd[-1] != '"'); + + StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd); + + if (UDSuffixBuf.empty()) { + UDSuffixBuf.assign(UDSuffix); + UDSuffixTokLoc = StringToks[i].getLocation(); + } else if (!UDSuffixBuf.equals(UDSuffix)) { + // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the + // result of a concatenation involving at least one user-defined-string- + // literal, all the participating user-defined-string-literals shall + // have the same ud-suffix. + if (Diags) { + SourceLocation TokLoc = StringToks[i].getLocation(); + Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix) + << UDSuffixBuf << UDSuffix + << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc) + << SourceRange(TokLoc, TokLoc); + } + hadError = true; + } + } + + // Strip the end quote. + --ThisTokEnd; + // TODO: Input character set mapping support. // Skip marker for wide or unicode strings. diff --git a/test/CXX/lex/lex.literal/lex.ext/p1.cpp b/test/CXX/lex/lex.literal/lex.ext/p1.cpp index 39812280c0..c167e822eb 100644 --- a/test/CXX/lex/lex.literal/lex.ext/p1.cpp +++ b/test/CXX/lex/lex.literal/lex.ext/p1.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -int * operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}} -long double operator "" _p31(long double); +void operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}} +void operator "" _p31(long double); long double operator "" pi(long double); // expected-warning{{user-defined literals not starting with '_' are reserved by the implementation}} float hexfloat = 0x1p31; // allow hexfloats diff --git a/test/CXX/over/over.oper/over.literal/p5.cpp b/test/CXX/over/over.oper/over.literal/p5.cpp index 7a782fca91..66f3f97eaa 100644 --- a/test/CXX/over/over.oper/over.literal/p5.cpp +++ b/test/CXX/over/over.oper/over.literal/p5.cpp @@ -7,9 +7,13 @@ template<char...> void operator "" _a(); template<char... C> S<C...> operator "" _a(); template<typename T> struct U { + friend int operator "" _a(const char *, size_t); // FIXME: It's not entirely clear whether this is intended to be legal. friend U operator "" _a(const T *, size_t); // expected-error {{parameter}} }; +template<char...> struct V { + friend void operator "" _b(); // expected-error {{parameter}} +}; template<char... C, int N = 0> void operator "" _b(); // expected-error {{parameter}} template<char... C> void operator "" _b(int N = 0); // expected-error {{parameter}} diff --git a/test/CXX/over/over.oper/over.literal/p8.cpp b/test/CXX/over/over.oper/over.literal/p8.cpp index fe94b5348b..69d4e761e5 100644 --- a/test/CXX/over/over.oper/over.literal/p8.cpp +++ b/test/CXX/over/over.oper/over.literal/p8.cpp @@ -9,8 +9,8 @@ void operator "" _km(long double); // ok string operator "" _i18n(const char*, std::size_t); // ok // FIXME: This should be accepted once we support UCNs template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}} -// FIXME: This should be rejected once we lex user-defined literal suffices -float operator ""E(const char *); // expected-warning {{hexfloat}} +// FIXME: Accept this as an extension, with a fix-it to add the space +float operator ""E(const char *); // expected-error {{must be '""'}} expected-error {{expected identifier}} float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{hexfloat}} string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}} double operator "" _miles(double); // expected-error {{parameter}} diff --git a/test/SemaCXX/cxx98-compat.cpp b/test/SemaCXX/cxx98-compat.cpp index 8c15f5adc1..e9ba0dffc3 100644 --- a/test/SemaCXX/cxx98-compat.cpp +++ b/test/SemaCXX/cxx98-compat.cpp @@ -50,7 +50,7 @@ int InitList() { return { 0 }; // expected-warning {{generalized initializer lists are incompatible with C++98}} } -int operator""_hello(const char *); // expected-warning {{literal operators are incompatible with C++98}} +int operator"" _hello(const char *); // expected-warning {{literal operators are incompatible with C++98}} enum EnumFixed : int { // expected-warning {{enumeration types with a fixed underlying type are incompatible with C++98}} }; |