aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/TokenConcatenation.cpp
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2012-03-06 03:21:47 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2012-03-06 03:21:47 +0000
commit99831e4677a7e2e051af636221694d60ba31fcdb (patch)
tree4684bde38a7659db0c97673dcab98cffce576f9b /lib/Lex/TokenConcatenation.cpp
parentc1b0f7fa9b755ab59129ae85187d0d4f91379995 (diff)
User-defined literals: reject string and character UDLs in all places where the
grammar requires a string-literal and not a user-defined-string-literal. The two constructs are still represented by the same TokenKind, in order to prevent a combinatorial explosion of different kinds of token. A flag on Token tracks whether a ud-suffix is present, in order to prevent clients from needing to look at the token's spelling. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152098 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/TokenConcatenation.cpp')
-rw-r--r--lib/Lex/TokenConcatenation.cpp39
1 files changed, 38 insertions, 1 deletions
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 335d864f3f..ca7e55d863 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -85,6 +85,19 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
TokenInfo[tok::hash ] |= aci_custom_firstchar;
TokenInfo[tok::arrow ] |= aci_custom_firstchar;
+ // These tokens have custom code in C++11 mode.
+ if (PP.getLangOptions().CPlusPlus0x) {
+ TokenInfo[tok::string_literal ] |= aci_custom;
+ TokenInfo[tok::wide_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf8_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf16_string_literal] |= aci_custom;
+ TokenInfo[tok::utf32_string_literal] |= aci_custom;
+ TokenInfo[tok::char_constant ] |= aci_custom;
+ TokenInfo[tok::wide_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf16_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf32_char_constant ] |= aci_custom;
+ }
+
// These tokens change behavior if followed by an '='.
TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
@@ -183,6 +196,28 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
case tok::raw_identifier:
llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
+ case tok::string_literal:
+ case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
+ case tok::char_constant:
+ case tok::wide_char_constant:
+ case tok::utf16_char_constant:
+ case tok::utf32_char_constant:
+ if (!PP.getLangOptions().CPlusPlus0x)
+ return false;
+
+ // In C++11, a string or character literal followed by an identifier is a
+ // single token.
+ if (Tok.getIdentifierInfo())
+ return true;
+
+ // A ud-suffix is an identifier. If the previous token ends with one, treat
+ // it as an identifier.
+ if (!PrevTok.hasUDSuffix())
+ return false;
+ // FALL THROUGH.
case tok::identifier: // id+id or id+number or id+L"foo".
// id+'.'... will not append.
if (Tok.is(tok::numeric_constant))
@@ -201,9 +236,11 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
// Otherwise, this is a narrow character or string. If the *identifier*
// is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
return IsIdentifierStringPrefix(PrevTok);
+
case tok::numeric_constant:
return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
- FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+ FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
+ (PP.getLangOptions().CPlusPlus0x && FirstChar == '_');
case tok::period: // ..., .*, .1234
return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
isdigit(FirstChar) ||