diff options
-rw-r--r-- | Driver/CacheTokens.cpp | 37 | ||||
-rw-r--r-- | Driver/PrintPreprocessedOutput.cpp | 2 | ||||
-rw-r--r-- | include/clang/Lex/Preprocessor.h | 12 | ||||
-rw-r--r-- | include/clang/Lex/ScratchBuffer.h | 8 | ||||
-rw-r--r-- | include/clang/Lex/Token.h | 66 | ||||
-rw-r--r-- | include/clang/Parse/Parser.h | 5 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 13 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 3 | ||||
-rw-r--r-- | lib/Lex/PPCaching.cpp | 2 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 24 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 5 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 28 | ||||
-rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 16 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 39 | ||||
-rw-r--r-- | lib/Rewrite/TokenRewriter.cpp | 5 |
15 files changed, 153 insertions, 112 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp index 1d33105668..fcb4d2f12d 100644 --- a/Driver/CacheTokens.cpp +++ b/Driver/CacheTokens.cpp @@ -140,32 +140,19 @@ void PTHWriter::EmitToken(const Token& T) { Emit32(ResolveID(T.getIdentifierInfo())); Emit32(fpos); - // For specific tokens we cache their spelling. - if (T.getIdentifierInfo()) - return; - - switch (T.getKind()) { - default: - break; - case tok::string_literal: - case tok::wide_string_literal: - case tok::angle_string_literal: - case tok::numeric_constant: - case tok::char_constant: { - // FIXME: This uses the slow getSpelling(). Perhaps we do better - // in the future? This only slows down PTH generation. - const std::string& spelling = PP.getSpelling(T); - const char* s = spelling.c_str(); - - // Get the string entry. - llvm::StringMapEntry<Offset> *E = - &CachedStrs.GetOrCreateValue(s, s+spelling.size()); - - // Store the address of the string entry in our spelling map. - (*CurSpellMap).push_back(std::make_pair(fpos, E)); + // Literals (strings, numbers, characters) get cached spellings. + if (T.isLiteral()) { + // FIXME: This uses the slow getSpelling(). Perhaps we do better + // in the future? This only slows down PTH generation. + const std::string &spelling = PP.getSpelling(T); + const char* s = spelling.c_str(); + + // Get the string entry. + llvm::StringMapEntry<Offset> *E = + &CachedStrs.GetOrCreateValue(s, s+spelling.size()); - break; - } + // Store the address of the string entry in our spelling map. + CurSpellMap->push_back(std::make_pair(fpos, E)); } } diff --git a/Driver/PrintPreprocessedOutput.cpp b/Driver/PrintPreprocessedOutput.cpp index 64cc4c8be6..deecacb1f4 100644 --- a/Driver/PrintPreprocessedOutput.cpp +++ b/Driver/PrintPreprocessedOutput.cpp @@ -430,6 +430,7 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok, // Avoid spelling identifiers, the most common form of token. FirstChar = II->getName()[0]; } else if (!Tok.needsCleaning()) { + // FIXME: SPEED UP LITERALS! SourceManager &SrcMgr = PP.getSourceManager(); FirstChar = *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation())); @@ -556,6 +557,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, const char *Str = II->getName(); unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength(); OS.write(Str, Len); + // FIXME: ACCELERATE LITERALS } else if (Tok.getLength() < 256) { const char *TokPtr = Buffer; unsigned Len = PP.getSpelling(Tok, TokPtr); diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index bf47bab295..f6391d9781 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -415,7 +415,7 @@ public: /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is /// invoked. void AnnotateCachedTokens(const Token &Tok) { - assert(Tok.isAnnotationToken() && "Expected annotation token"); + assert(Tok.isAnnotation() && "Expected annotation token"); if (CachedLexPos != 0 && isBacktrackEnabled()) AnnotatePreviousCachedTokens(Tok); } @@ -463,11 +463,11 @@ public: return *SourceMgr.getCharacterData(SL); } - /// CreateString - Plop the specified string into a scratch buffer and return - /// a location for it. If specified, the source location provides a source - /// location for the token. - SourceLocation CreateString(const char *Buf, unsigned Len, - SourceLocation SourceLoc = SourceLocation()); + /// CreateString - Plop the specified string into a scratch buffer and set the + /// specified token's location and length to it. If specified, the source + /// location provides a location of the instantiation point of the token. + void CreateString(const char *Buf, unsigned Len, + Token &Tok, SourceLocation SourceLoc = SourceLocation()); /// DumpToken - Print the token to stderr, used for debugging. /// diff --git a/include/clang/Lex/ScratchBuffer.h b/include/clang/Lex/ScratchBuffer.h index eec6a251a5..6506f92629 100644 --- a/include/clang/Lex/ScratchBuffer.h +++ b/include/clang/Lex/ScratchBuffer.h @@ -31,16 +31,10 @@ public: ScratchBuffer(SourceManager &SM); /// getToken - Splat the specified text into a temporary MemoryBuffer and - /// return a SourceLocation that refers to the token. The SourceLoc value - /// gives a virtual location that the token will appear to be from. - SourceLocation getToken(const char *Buf, unsigned Len, - SourceLocation SourceLoc); - - /// getToken - Splat the specified text into a temporary MemoryBuffer and /// return a SourceLocation that refers to the token. This is just like the /// previous method, but returns a location that indicates the physloc of the /// token. - SourceLocation getToken(const char *Buf, unsigned Len); + SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr); private: void AllocScratchBuffer(unsigned RequestLen); diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h index c0dd2b7837..73e087ecb6 100644 --- a/include/clang/Lex/Token.h +++ b/include/clang/Lex/Token.h @@ -34,21 +34,28 @@ class Token { /// The location of the token. SourceLocation Loc; - // Conceptually these next two fields could be in a union with - // access depending on isAnnotationToken(). However, this causes gcc - // 4.2 to pessimize LexTokenInternal, a very performance critical - // routine. Keeping as separate members with casts until a more - // beautiful fix presents itself. + // Conceptually these next two fields could be in a union. However, this + // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical + // routine. Keeping as separate members with casts until a more beautiful fix + // presents itself. /// UintData - This holds either the length of the token text, when /// a normal token, or the end of the SourceRange when an annotation /// token. unsigned UintData; - /// PtrData - For normal tokens, this points to the uniqued - /// information for the identifier (if an identifier token) or - /// null. For annotation tokens, this points to information specific - /// to the annotation token. + /// PtrData - This is a union of four different pointer types, which depends + /// on what type of token this is: + /// Identifiers, keywords, etc: + /// This is an IdentifierInfo*, which contains the uniqued identifier + /// spelling. + /// Literals: isLiteral() returns true. + /// This is a pointer to the start of the token in a text buffer, which + /// may be dirty (have trigraphs / escaped newlines). + /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). + /// This is a pointer to sema-specific data for the annotation token. + /// Other: + /// This is null. void *PtrData; /// Kind - The actual flavor of token this is. @@ -77,32 +84,40 @@ public: bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } - bool isAnnotationToken() const { + /// isLiteral - Return true if this is a "literal", like a numeric + /// constant, string, etc. + bool isLiteral() const { + return is(tok::numeric_constant) || is(tok::char_constant) || + is(tok::string_literal) || is(tok::wide_string_literal) || + is(tok::angle_string_literal); + } + + bool isAnnotation() const { return is(tok::annot_typename) || is(tok::annot_cxxscope) || is(tok::annot_template_id); } - + /// getLocation - Return a source location identifier for the specified /// offset in the current file. SourceLocation getLocation() const { return Loc; } unsigned getLength() const { - assert(!isAnnotationToken() && "Annotation tokens have no length field"); + assert(!isAnnotation() && "Annotation tokens have no length field"); return UintData; } void setLocation(SourceLocation L) { Loc = L; } void setLength(unsigned Len) { - assert(!isAnnotationToken() && "Annotation tokens have no length field"); + assert(!isAnnotation() && "Annotation tokens have no length field"); UintData = Len; } SourceLocation getAnnotationEndLoc() const { - assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token"); + assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); return SourceLocation::getFromRawEncoding(UintData); } void setAnnotationEndLoc(SourceLocation L) { - assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token"); + assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); UintData = L.getRawEncoding(); } @@ -130,19 +145,32 @@ public: } IdentifierInfo *getIdentifierInfo() const { - assert(!isAnnotationToken() && "Used IdentInfo on annotation token"); + assert(!isAnnotation() && "Used IdentInfo on annotation token!"); + if (isLiteral()) return 0; return (IdentifierInfo*) PtrData; } void setIdentifierInfo(IdentifierInfo *II) { PtrData = (void*) II; } - + + /// getLiteralData - For a literal token (numeric constant, string, etc), this + /// returns a pointer to the start of it in the text buffer if known, null + /// otherwise. + const char *getLiteralData() const { + assert(isLiteral() && "Cannot get literal data of non-literal"); + return reinterpret_cast<const char*>(PtrData); + } + void setLiteralData(const char *Ptr) { + assert(isLiteral() && "Cannot set literal data of non-literal"); + PtrData = (void*)Ptr; + } + void *getAnnotationValue() const { - assert(isAnnotationToken() && "Used AnnotVal on non-annotation token"); + assert(isAnnotation() && "Used AnnotVal on non-annotation token"); return PtrData; } void setAnnotationValue(void *val) { - assert(isAnnotationToken() && "Used AnnotVal on non-annotation token"); + assert(isAnnotation() && "Used AnnotVal on non-annotation token"); PtrData = val; } diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 191613bbcb..df8646df79 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -690,10 +690,11 @@ private: if (!Tok.is(tok::identifier)) return false; - if (Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope)) + IdentifierInfo *II = Tok.getIdentifierInfo(); + if (Actions.isTypeName(*II, CurScope)) return true; - return Tok.getIdentifierInfo() == Ident_super; + return II == Ident_super; } OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation); diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 9e8d1aa740..03d81b3b9a 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -624,7 +624,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); // Update the location of token as well as BufferPtr. + const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::numeric_constant); + Result.setLiteralData(TokStart); } /// LexStringLiteral - Lex the remainder of a string literal, after having lexed @@ -655,8 +657,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) { Diag(NulCharacter, diag::null_in_string); // Update the location of the token as well as the BufferPtr instance var. + const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, Wide ? tok::wide_string_literal : tok::string_literal); + Result.setLiteralData(TokStart); } /// LexAngledStringLiteral - Lex the remainder of an angled string literal, @@ -687,7 +691,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { Diag(NulCharacter, diag::null_in_string); // Update the location of token as well as BufferPtr. + const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); + Result.setLiteralData(TokStart); } @@ -735,7 +741,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) { Diag(NulCharacter, diag::null_in_char); // Update the location of token as well as BufferPtr. + const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::char_constant); + Result.setLiteralData(TokStart); } /// SkipWhitespace - Efficiently skip over a series of whitespace characters. @@ -901,9 +909,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { Spelling += "*/"; // add suffix. Result.setKind(tok::comment); - Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(), - Result.getLocation())); - Result.setLength(Spelling.size()); + PP->CreateString(&Spelling[0], Spelling.size(), Result, + Result.getLocation()); return true; } diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp index 77c96e0ab3..c3d7738afe 100644 --- a/lib/Lex/MacroArgs.cpp +++ b/lib/Lex/MacroArgs.cpp @@ -225,8 +225,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, } } - Tok.setLength(Result.size()); - Tok.setLocation(PP.CreateString(&Result[0], Result.size())); + PP.CreateString(&Result[0], Result.size(), Tok); return Tok; } diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp index ed67754e6e..53aa09c130 100644 --- a/lib/Lex/PPCaching.cpp +++ b/lib/Lex/PPCaching.cpp @@ -89,7 +89,7 @@ const Token &Preprocessor::PeekAhead(unsigned N) { } void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) { - assert(Tok.isAnnotationToken() && "Expected annotation token"); + assert(Tok.isAnnotation() && "Expected annotation token"); assert(CachedLexPos != 0 && "Expected to have some cached tokens"); assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc() && "The annotation should be until the most recent cached token"); diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index b14df735ad..b3e921c368 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -429,10 +429,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, char TmpBuffer[100]; sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, TM->tm_year+1900); - DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); + + Token TmpTok; + TmpTok.startToken(); + PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok); + DATELoc = TmpTok.getLocation(); sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec); - TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); + PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok); + TIMELoc = TmpTok.getLocation(); } /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded @@ -463,8 +468,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { SourceMgr.getInstantiationLineNumber(Tok.getLocation())); unsigned Length = strlen(TmpBuffer)-1; Tok.setKind(tok::numeric_constant); - Tok.setLength(Length); - Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation())); + CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation()); + Tok.setLength(Length); // Trim off space. } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { SourceLocation Loc = Tok.getLocation(); if (II == Ident__BASE_FILE__) { @@ -480,8 +485,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc)); FN = '"' + Lexer::Stringify(FN) + '"'; Tok.setKind(tok::string_literal); - Tok.setLength(FN.size()); - Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation())); + CreateString(&FN[0], FN.size(), Tok, Tok.getLocation()); } else if (II == Ident__DATE__) { if (!DATELoc.isValid()) ComputeDATE_TIME(DATELoc, TIMELoc, *this); @@ -511,8 +515,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { sprintf(TmpBuffer, "%u ", Depth); unsigned Length = strlen(TmpBuffer)-1; Tok.setKind(tok::numeric_constant); - Tok.setLength(Length); - Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + CreateString(TmpBuffer, Length, Tok, Tok.getLocation()); + Tok.setLength(Length); // Trim off space. } else if (II == Ident__TIMESTAMP__) { // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. @@ -540,8 +544,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { unsigned Len = strlen(TmpBuffer); TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. Tok.setKind(tok::string_literal); - Tok.setLength(Len); - Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation())); + CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation()); + Tok.setLength(Len); // Trim off space. } else { assert(0 && "Unknown identifier!"); } diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index 922af09e50..a80ba6bd1f 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -156,7 +156,10 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Plop the string (including the newline and trailing null) into a buffer // where we can lex it. - SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size()); + Token TmpTok; + TmpTok.startToken(); + CreateString(&StrVal[0], StrVal.size(), TmpTok); + SourceLocation TokLoc = TmpTok.getLocation(); // Make and enter a lexer object so that we lex and expand the tokens just // like any others. diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index d0a15e45c4..cb0c850e7e 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -266,13 +266,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok, } // Otherwise, compute the start of the token in the input lexer buffer. - const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + const char *TokStart = 0; + + if (Tok.isLiteral()) + TokStart = Tok.getLiteralData(); + + if (TokStart == 0) + TokStart = SourceMgr.getCharacterData(Tok.getLocation()); // If this token contains nothing interesting, return it directly. if (!Tok.needsCleaning()) { Buffer = TokStart; return Tok.getLength(); } + // Otherwise, hard case, relex the characters into the string. char *OutBuf = const_cast<char*>(Buffer); for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); @@ -291,11 +298,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok, /// CreateString - Plop the specified string into a scratch buffer and return a /// location for it. If specified, the source location provides a source /// location for the token. -SourceLocation Preprocessor:: -CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { - if (SLoc.isValid()) - return ScratchBuf->getToken(Buf, Len, SLoc); - return ScratchBuf->getToken(Buf, Len); +void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, + SourceLocation InstantiationLoc) { + Tok.setLength(Len); + + const char *DestPtr; + SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr); + + if (InstantiationLoc.isValid()) + Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len); + Tok.setLocation(Loc); + + // If this is a literal token, set the pointer data. + if (Tok.isLiteral()) + Tok.setLiteralData(DestPtr); } diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp index 695a5365fa..9253bc0944 100644 --- a/lib/Lex/ScratchBuffer.cpp +++ b/lib/Lex/ScratchBuffer.cpp @@ -30,10 +30,14 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) { /// return a SourceLocation that refers to the token. This is just like the /// method below, but returns a location that indicates the physloc of the /// token. -SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, + const char *&DestPtr) { if (BytesUsed+Len > ScratchBufSize) AllocScratchBuffer(Len); + // Return a pointer to the character data. + DestPtr = CurBuffer+BytesUsed; + // Copy the token data into the buffer. memcpy(CurBuffer+BytesUsed, Buf, Len); @@ -43,16 +47,6 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len); } - -/// getToken - Splat the specified text into a temporary MemoryBuffer and -/// return a SourceLocation that refers to the token. The SourceLoc value -/// gives a virtual location that the token will appear to be from. -SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, - SourceLocation SourceLoc) { - // Map the physloc to the specified sourceloc. - return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len); -} - void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { // Only pay attention to the requested length if it is larger than our default // page size. If it is, we allocate an entire chunk for it. This is to diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 3707ef1ed6..7ae61beb88 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -352,6 +352,7 @@ void TokenLexer::Lex(Token &Tok) { /// If this returns true, the caller should immediately return the token. bool TokenLexer::PasteTokens(Token &Tok) { llvm::SmallVector<char, 128> Buffer; + const char *ResultTokStrPtr = 0; do { // Consume the ## operator. SourceLocation PasteOpLoc = Tokens[CurToken].getLocation(); @@ -386,8 +387,16 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Plop the pasted result (including the trailing newline and null) into a // scratch buffer where we can lex it. - SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size()); + Token ResultTokTmp; + ResultTokTmp.startToken(); + // Claim that the tmp token is a string_literal so that we can get the + // character pointer back from CreateString. + ResultTokTmp.setKind(tok::string_literal); + PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp); + SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); + ResultTokStrPtr = ResultTokTmp.getLiteralData(); + // Lex the resultant pasted token into Result. Token Result; @@ -405,20 +414,16 @@ bool TokenLexer::PasteTokens(Token &Tok) { assert(ResultTokLoc.isFileID() && "Should be a raw location into scratch buffer"); SourceManager &SourceMgr = PP.getSourceManager(); - std::pair<FileID, unsigned> LocInfo = - SourceMgr.getDecomposedLoc(ResultTokLoc); + FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); - const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first; + const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first; // Make a lexer to lex this string from. Lex just this one token. - const char *ResultStrData = ScratchBufStart+LocInfo.second; - // Make a lexer object so that we lex and expand the paste result. - Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first), - PP.getLangOptions(), - ScratchBufStart, - ResultStrData, - ResultStrData+LHSLen+RHSLen /*don't include null*/); + Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), + PP.getLangOptions(), ScratchBufStart, + ResultTokStrPtr, + ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/); // Lex a token in raw mode. This way it won't look up identifiers // automatically, lexing off the end will return an eof token, and @@ -442,12 +447,12 @@ bool TokenLexer::PasteTokens(Token &Tok) { RHS.is(tok::slash)) { HandleMicrosoftCommentPaste(Tok); return true; - } else { - // TODO: If not in assembler language mode. - PP.Diag(PasteOpLoc, diag::err_pp_bad_paste) - << std::string(Buffer.begin(), Buffer.end()-1); - return false; } + + // TODO: If not in assembler language mode. + PP.Diag(PasteOpLoc, diag::err_pp_bad_paste) + << std::string(Buffer.begin(), Buffer.end()-1); + return false; } // Turn ## into 'unknown' to avoid # ## # from looking like a paste @@ -471,7 +476,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { if (Tok.is(tok::identifier)) { // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. - Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); + Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr)); } return false; } diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp index aab6fb0cce..e17e80133b 100644 --- a/lib/Rewrite/TokenRewriter.cpp +++ b/lib/Rewrite/TokenRewriter.cpp @@ -78,14 +78,15 @@ TokenRewriter::AddToken(const Token &T, TokenRefTy Where) { TokenRewriter::token_iterator -TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){ +TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) { unsigned Len = strlen(Val); // Plop the string into the scratch buffer, then create a token for this // string. Token Tok; Tok.startToken(); - Tok.setLocation(ScratchBuf->getToken(Val, Len)); + const char *Spelling; + Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling)); Tok.setLength(Len); // TODO: Form a whole lexer around this and relex the token! For now, just |