diff options
-rw-r--r-- | include/clang/Basic/TokenKinds.def | 1 | ||||
-rw-r--r-- | include/clang/Lex/Preprocessor.h | 8 | ||||
-rw-r--r-- | include/clang/Lex/Token.h | 23 | ||||
-rw-r--r-- | lib/Frontend/CacheTokens.cpp | 8 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 49 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 10 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 4 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 35 | ||||
-rw-r--r-- | lib/Lex/TokenConcatenation.cpp | 10 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 9 | ||||
-rw-r--r-- | lib/Rewrite/HTMLRewrite.cpp | 22 | ||||
-rw-r--r-- | lib/Rewrite/RewriteMacros.cpp | 2 | ||||
-rw-r--r-- | lib/Rewrite/TokenRewriter.cpp | 4 | ||||
-rw-r--r-- | tools/libclang/CIndex.cpp | 19 |
14 files changed, 115 insertions, 89 deletions
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def index abdd5b4b7a..c61930e67a 100644 --- a/include/clang/Basic/TokenKinds.def +++ b/include/clang/Basic/TokenKinds.def @@ -103,6 +103,7 @@ TOK(comment) // Comment (only in -E -C[C] mode) // C99 6.4.2: Identifiers. TOK(identifier) // abcde123 +TOK(raw_identifier) // Used only in raw lexing mode. // C99 6.4.4.1: Integer Constants // C99 6.4.4.2: Floating Constants diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 1ced6a5800..ca4b9fddab 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -751,10 +751,10 @@ public: // Preprocessor callback methods. These are invoked by a lexer as various // directives and events are found. - /// LookUpIdentifierInfo - Given a tok::identifier token, look up the - /// identifier information for the token and install it into the token. - IdentifierInfo *LookUpIdentifierInfo(Token &Identifier, - const char *BufPtr = 0) const; + /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the + /// identifier information for the token and install it into the token, + /// updating the token kind accordingly. + IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; /// HandleIdentifier - This callback is invoked when the lexer reads an /// identifier and has filled in the tokens IdentifierInfo member. This diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h index 2a19083906..ef4f02cb57 100644 --- a/include/clang/Lex/Token.h +++ b/include/clang/Lex/Token.h @@ -88,6 +88,12 @@ public: bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } + /// isAnyIdentifier - Return true if this is a raw identifier (when lexing + /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). + bool isAnyIdentifier() const { + return is(tok::identifier) || is(tok::raw_identifier); + } + /// isLiteral - Return true if this is a "literal", like a numeric /// constant, string, etc. bool isLiteral() const { @@ -154,7 +160,10 @@ public: } IdentifierInfo *getIdentifierInfo() const { - assert(!isAnnotation() && "Used IdentInfo on annotation token!"); + assert(isNot(tok::raw_identifier) && + "getIdentifierInfo() on a tok::raw_identifier token!"); + assert(!isAnnotation() && + "getIdentifierInfo() on an annotation token!"); if (isLiteral()) return 0; return (IdentifierInfo*) PtrData; } @@ -162,6 +171,18 @@ public: PtrData = (void*) II; } + /// getRawIdentifierData - For a raw identifier token (i.e., an identifier + /// lexed in raw mode), returns a pointer to the start of it in the text + /// buffer if known, null otherwise. + const char *getRawIdentifierData() const { + assert(is(tok::raw_identifier)); + return reinterpret_cast<const char*>(PtrData); + } + void setRawIdentifierData(const char *Ptr) { + assert(is(tok::raw_identifier)); + PtrData = const_cast<char*>(Ptr); + } + /// getLiteralData - For a literal token (numeric constant, string, etc), this /// returns a pointer to the start of it in the text buffer if known, null /// otherwise. diff --git a/lib/Frontend/CacheTokens.cpp b/lib/Frontend/CacheTokens.cpp index aae572cb98..ee3fdd8343 100644 --- a/lib/Frontend/CacheTokens.cpp +++ b/lib/Frontend/CacheTokens.cpp @@ -300,7 +300,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) { ParsingPreprocessorDirective = false; } - if (Tok.is(tok::identifier)) { + if (Tok.is(tok::raw_identifier)) { PP.LookUpIdentifierInfo(Tok); EmitToken(Tok); continue; @@ -320,13 +320,13 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) { // this case, discard both tokens. if (NextTok.isAtStartOfLine()) goto NextToken; - + // The token is the start of a directive. Emit it. EmitToken(Tok); Tok = NextTok; // Did we see 'include'/'import'/'include_next'? - if (Tok.isNot(tok::identifier)) { + if (Tok.isNot(tok::raw_identifier)) { EmitToken(Tok); continue; } @@ -353,7 +353,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) { L.LexIncludeFilename(Tok); L.setParsingPreprocessorDirective(false); assert(!Tok.isAtStartOfLine()); - if (Tok.is(tok::identifier)) + if (Tok.is(tok::raw_identifier)) PP.LookUpIdentifierInfo(Tok); break; diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index da68495663..5d9536f40d 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -266,21 +266,23 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &Features, bool *Invalid) { assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); - - // If this token is an identifier, just return the string from the identifier - // table, which is very quick. - if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + + const char *TokStart = 0; + // NOTE: this has to be checked *before* testing for an IdentifierInfo. + if (Tok.is(tok::raw_identifier)) + TokStart = Tok.getRawIdentifierData(); + else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + // Just return the string from the identifier table, which is very quick. Buffer = II->getNameStart(); return II->getLength(); } - - // Otherwise, compute the start of the token in the input lexer buffer. - const char *TokStart = 0; - + + // NOTE: this can be checked even after testing for an IdentifierInfo. if (Tok.isLiteral()) TokStart = Tok.getLiteralData(); - + if (TokStart == 0) { + // Compute the start of the token in the input lexer buffer. bool CharDataInvalid = false; TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid); if (Invalid) @@ -290,13 +292,13 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, return 0; } } - + // If this token contains nothing interesting, return it directly. if (!Tok.needsCleaning()) { Buffer = TokStart; return Tok.getLength(); } - + // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast<char*>(Buffer); for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); @@ -307,7 +309,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, } assert(unsigned(OutBuf-Buffer) != Tok.getLength() && "NeedsCleaning flag set on something that didn't need cleaning!"); - + return OutBuf-Buffer; } @@ -473,10 +475,9 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) { // we don't have an identifier table available. Instead, just look at // the raw identifier to recognize and categorize preprocessor directives. TheLexer.LexFromRawLexer(TheTok); - if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) { - const char *IdStart = Buffer->getBufferStart() - + TheTok.getLocation().getRawEncoding() - 1; - llvm::StringRef Keyword(IdStart, TheTok.getLength()); + if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) { + llvm::StringRef Keyword(TheTok.getRawIdentifierData(), + TheTok.getLength()); PreambleDirectiveKind PDK = llvm::StringSwitch<PreambleDirectiveKind>(Keyword) .Case("include", PDK_Skipped) @@ -1046,19 +1047,17 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) { if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { FinishIdentifier: const char *IdStart = BufferPtr; - FormTokenWithChars(Result, CurPtr, tok::identifier); + FormTokenWithChars(Result, CurPtr, tok::raw_identifier); + Result.setRawIdentifierData(IdStart); // If we are in raw mode, return this identifier raw. There is no need to // look up identifier information or attempt to macro expand it. - if (LexingRawMode) return; - - // Fill in Result.IdentifierInfo, looking up the identifier in the - // identifier table. - IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart); + if (LexingRawMode) + return; - // Change the kind of this identifier to the appropriate token kind, e.g. - // turning "for" into a keyword. - Result.setKind(II->getTokenID()); + // Fill in Result.IdentifierInfo and update the token kind, + // looking up the identifier in the identifier table. + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); // Finally, now that we know we have an identifier, pass this off to the // preprocessor, which may macro expand it or something. diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 467d485888..5b65fd3034 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -245,7 +245,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or // something bogus), skip it. - if (Tok.isNot(tok::identifier)) { + if (Tok.isNot(tok::raw_identifier)) { CurPPLexer->ParsingPreprocessorDirective = false; // Restore comment saving mode. if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments); @@ -257,12 +257,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // to spell an i/e in a strange way that is another letter. Skipping this // allows us to avoid looking up the identifier info for #define/#undef and // other common directives. - bool Invalid = false; - const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation(), - &Invalid); - if (Invalid) - return; - + const char *RawCharData = Tok.getRawIdentifierData(); + char FirstChar = RawCharData[0]; if (FirstChar >= 'a' && FirstChar <= 'z' && FirstChar != 'i' && FirstChar != 'e') { diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index e6a53a1043..da66b502b2 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -292,7 +292,7 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { if (Tok.is(tok::eom)) return; // Can only poison identifiers. - if (Tok.isNot(tok::identifier)) { + if (Tok.isNot(tok::raw_identifier)) { Diag(Tok, diag::err_pp_invalid_poison); return; } @@ -599,7 +599,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { // Create a Token from the string. Token MacroTok; MacroTok.startToken(); - MacroTok.setKind(tok::identifier); + MacroTok.setKind(tok::raw_identifier); CreateString(&StrVal[1], StrVal.size() - 2, MacroTok); // Get the IdentifierInfo of MacroToPushTok. diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 2d8f1a5aa7..6fe414b664 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -285,9 +285,12 @@ void Preprocessor::CodeCompleteNaturalLanguage() { llvm::StringRef Preprocessor::getSpelling(const Token &Tok, llvm::SmallVectorImpl<char> &Buffer, bool *Invalid) const { - // Try the fast path. - if (const IdentifierInfo *II = Tok.getIdentifierInfo()) - return II->getName(); + // NOTE: this has to be checked *before* testing for an IdentifierInfo. + if (Tok.isNot(tok::raw_identifier)) { + // Try the fast path. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) + return II->getName(); + } // Resize the buffer if we need to copy into it. if (Tok.needsCleaning()) @@ -313,8 +316,10 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, InstantiationLoc, Len); Tok.setLocation(Loc); - // If this is a literal token, set the pointer data. - if (Tok.isLiteral()) + // If this is a raw identifier or a literal token, set the pointer data. + if (Tok.is(tok::raw_identifier)) + Tok.setRawIdentifierData(DestPtr); + else if (Tok.isLiteral()) Tok.setLiteralData(DestPtr); } @@ -369,25 +374,29 @@ void Preprocessor::EndSourceFile() { // Lexer Event Handling. //===----------------------------------------------------------------------===// -/// LookUpIdentifierInfo - Given a tok::identifier token, look up the -/// identifier information for the token and install it into the token. -IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier, - const char *BufPtr) const { - assert(Identifier.is(tok::identifier) && "Not an identifier!"); - assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); +/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the +/// identifier information for the token and install it into the token, +/// updating the token kind accordingly. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { + assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!"); // Look up this token, see if it is a macro, or if it is a language keyword. IdentifierInfo *II; - if (BufPtr && !Identifier.needsCleaning()) { + if (!Identifier.needsCleaning()) { // No cleaning needed, just use the characters from the lexed buffer. - II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength())); + II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(), + Identifier.getLength())); } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. llvm::SmallString<64> IdentifierBuffer; llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); II = getIdentifierInfo(CleanedStr); } + + // Update the token info (identifier info and appropriate token kind). Identifier.setIdentifierInfo(II); + Identifier.setKind(II->getTokenID()); + return II; } diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index fc6db2151a..3e9e855031 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -13,6 +13,7 @@ #include "clang/Lex/TokenConcatenation.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/Support/ErrorHandling.h" using namespace clang; @@ -165,7 +166,14 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, } switch (PrevKind) { - default: assert(0 && "InitAvoidConcatTokenInfo built wrong"); + default: + llvm_unreachable("InitAvoidConcatTokenInfo built wrong"); + return true; + + case tok::raw_identifier: + llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); + return true; + case tok::identifier: // id+id or id+number or id+L"foo". // id+'.'... will not append. if (Tok.is(tok::numeric_constant)) diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index a0e5ae33b2..ea39b47904 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -435,12 +435,13 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Lex the resultant pasted token into Result. Token Result; - if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) { + if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { // Common paste case: identifier+identifier = identifier. Avoid creating // a lexer and other overhead. PP.IncrementPasteCounter(true); Result.startToken(); - Result.setKind(tok::identifier); + Result.setKind(tok::raw_identifier); + Result.setRawIdentifierData(ResultTokStrPtr); Result.setLocation(ResultTokLoc); Result.setLength(LHSLen+RHSLen); } else { @@ -524,10 +525,10 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. - if (Tok.is(tok::identifier)) { + if (Tok.is(tok::raw_identifier)) { // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. - PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr); + PP.LookUpIdentifierInfo(Tok); } return false; } diff --git a/lib/Rewrite/HTMLRewrite.cpp b/lib/Rewrite/HTMLRewrite.cpp index e6b9aa367a..df08cd7cbf 100644 --- a/lib/Rewrite/HTMLRewrite.cpp +++ b/lib/Rewrite/HTMLRewrite.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -378,14 +379,16 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { unsigned TokLen = Tok.getLength(); switch (Tok.getKind()) { default: break; - case tok::identifier: { - // Fill in Result.IdentifierInfo, looking up the identifier in the - // identifier table. - const IdentifierInfo *II = - PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs); + case tok::identifier: + llvm_unreachable("tok::identifier in raw lexing mode!"); + break; + case tok::raw_identifier: { + // Fill in Result.IdentifierInfo and update the token kind, + // looking up the identifier in the identifier table. + PP.LookUpIdentifierInfo(Tok); // If this is a pp-identifier, for a keyword, highlight it as such. - if (II->getTokenID() != tok::identifier) + if (Tok.isNot(tok::identifier)) HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, "<span class='keyword'>", "</span>"); break; @@ -473,11 +476,8 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { // If this raw token is an identifier, the raw lexer won't have looked up // the corresponding identifier info for it. Do this now so that it will be // macro expanded when we re-preprocess it. - if (Tok.is(tok::identifier)) { - // Change the kind of this identifier to the appropriate token kind, e.g. - // turning "for" into a keyword. - Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID()); - } + if (Tok.is(tok::raw_identifier)) + PP.LookUpIdentifierInfo(Tok); TokenStream.push_back(Tok); diff --git a/lib/Rewrite/RewriteMacros.cpp b/lib/Rewrite/RewriteMacros.cpp index 2efa69479c..0453098a56 100644 --- a/lib/Rewrite/RewriteMacros.cpp +++ b/lib/Rewrite/RewriteMacros.cpp @@ -78,7 +78,7 @@ static void LexRawTokensFromMainFile(Preprocessor &PP, // If we have an identifier with no identifier info for our raw token, look // up the indentifier info. This is important for equality comparison of // identifier tokens. - if (RawTok.is(tok::identifier) && !RawTok.getIdentifierInfo()) + if (RawTok.is(tok::raw_identifier)) PP.LookUpIdentifierInfo(RawTok); RawTokens.push_back(RawTok); diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp index 789d53f4af..b5f616fbfe 100644 --- a/lib/Rewrite/TokenRewriter.cpp +++ b/lib/Rewrite/TokenRewriter.cpp @@ -34,10 +34,10 @@ TokenRewriter::TokenRewriter(FileID FID, SourceManager &SM, RawLex.LexFromRawLexer(RawTok); while (RawTok.isNot(tok::eof)) { #if 0 - if (Tok.is(tok::identifier)) { + if (Tok.is(tok::raw_identifier)) { // Look up the identifier info for the token. This should use // IdentifierTable directly instead of PP. - Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); + PP.LookUpIdentifierInfo(Tok); } #endif diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index bd39925dad..61ff611a66 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -4010,27 +4010,18 @@ void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range, if (Tok.isLiteral()) { CXTok.int_data[0] = CXToken_Literal; CXTok.ptr_data = (void *)Tok.getLiteralData(); - } else if (Tok.is(tok::identifier)) { + } else if (Tok.is(tok::raw_identifier)) { // Lookup the identifier to determine whether we have a keyword. - std::pair<FileID, unsigned> LocInfo - = SourceMgr.getDecomposedLoc(Tok.getLocation()); - bool Invalid = false; - llvm::StringRef Buf - = CXXUnit->getSourceManager().getBufferData(LocInfo.first, &Invalid); - if (Invalid) - return; - - const char *StartPos = Buf.data() + LocInfo.second; IdentifierInfo *II - = CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok, StartPos); + = CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok); if ((II->getObjCKeywordID() != tok::objc_not_keyword) && previousWasAt) { CXTok.int_data[0] = CXToken_Keyword; } else { - CXTok.int_data[0] = II->getTokenID() == tok::identifier? - CXToken_Identifier - : CXToken_Keyword; + CXTok.int_data[0] = Tok.is(tok::identifier) + ? CXToken_Identifier + : CXToken_Keyword; } CXTok.ptr_data = II; } else if (Tok.is(tok::comment)) { |