aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/TokenConcatenation.cpp
diff options
context:
space:
mode:
authorDouglas Gregor <dgregor@apple.com>2011-07-27 05:40:30 +0000
committerDouglas Gregor <dgregor@apple.com>2011-07-27 05:40:30 +0000
commit5cee1195584fa8672253139c86e922daeda69b9e (patch)
treee1b36e0f628359bb42d22d78c74e931057b962de /lib/Lex/TokenConcatenation.cpp
parent6fa8f86b8188c6d3c4d6616122a71ccd72a0c78a (diff)
Add support for C++0x unicode string and character literals, from Craig Topper!
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@136210 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/TokenConcatenation.cpp')
-rw-r--r--lib/Lex/TokenConcatenation.cpp64
1 files changed, 28 insertions, 36 deletions
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 3e9e855031..19baf80aad 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -17,42 +17,39 @@
using namespace clang;
-/// StartsWithL - Return true if the spelling of this token starts with 'L'.
-bool TokenConcatenation::StartsWithL(const Token &Tok) const {
- if (!Tok.needsCleaning()) {
- SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
- }
-
- if (Tok.getLength() < 256) {
- char Buffer[256];
- const char *TokPtr = Buffer;
- PP.getSpelling(Tok, TokPtr);
- return TokPtr[0] == 'L';
- }
-
- return PP.getSpelling(Tok)[0] == 'L';
-}
+/// IsIdentifierStringPrefix - Return true if the spelling of the token
+/// is literally 'L', 'u', 'U', or 'u8'.
+bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
+ const LangOptions &LangOpts = PP.getLangOptions();
-/// IsIdentifierL - Return true if the spelling of this token is literally
-/// 'L'.
-bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
if (!Tok.needsCleaning()) {
- if (Tok.getLength() != 1)
+ if (Tok.getLength() != 1 && Tok.getLength() != 2)
return false;
SourceManager &SM = PP.getSourceManager();
- return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+ const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+ if (Tok.getLength() == 1)
+ return Ptr[0] == 'L' ||
+ (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
+ if (Tok.getLength() == 2)
+ return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
- if (PP.getSpelling(Tok, TokPtr) != 1)
- return false;
- return TokPtr[0] == 'L';
+ unsigned length = PP.getSpelling(Tok, TokPtr);
+ if (length == 1)
+ return TokPtr[0] == 'L' ||
+ (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
+ if (length == 2)
+ return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
+ return false;
}
- return PP.getSpelling(Tok) == "L";
+ std::string TokStr = PP.getSpelling(Tok);
+ return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
+ TokStr == "u" ||
+ TokStr == "U"));
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
@@ -179,24 +176,19 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
if (Tok.is(tok::numeric_constant))
return GetFirstChar(PP, Tok) != '.';
- if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
- Tok.is(tok::wide_char_literal)*/)
+ if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
+ Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
+ Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
+ Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
return true;
// If this isn't identifier + string, we're done.
if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
return false;
- // FIXME: need a wide_char_constant!
-
- // If the string was a wide string L"foo" or wide char L'f', it would
- // concat with the previous identifier into fooL"bar". Avoid this.
- if (StartsWithL(Tok))
- return true;
-
// Otherwise, this is a narrow character or string. If the *identifier*
- // is a literal 'L', avoid pasting L "foo" -> L"foo".
- return IsIdentifierL(PrevTok);
+ // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
+ return IsIdentifierStringPrefix(PrevTok);
case tok::numeric_constant:
return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
FirstChar == '+' || FirstChar == '-' || FirstChar == '.';