diff options
-rw-r--r-- | include/clang/Lex/Lexer.h | 2 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 1 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 23 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 6 | ||||
-rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 16 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 63 |
6 files changed, 50 insertions, 61 deletions
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 3950e17705..8077934a61 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -138,7 +138,7 @@ public: /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no /// associated preprocessor object. Return true if the 'next character to - /// read' pointer points and the end of the lexer buffer, false otherwise. + /// read' pointer points at the end of the lexer buffer, false otherwise. bool LexFromRawLexer(Token &Result) { assert(LexingRawMode && "Not already in raw mode!"); Lex(Result); diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index efbd84c879..c68d21c7e7 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -167,6 +167,7 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc, L->BufferPtr = StrData; L->BufferEnd = StrData+TokLen; + assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); // Set the SourceLocation with the remapping information. This ensures that // GetMappedTokenLoc will remap the tokens as they are lexed. diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index 6cb1908330..84056c3f4b 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -475,14 +475,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Loc = SourceMgr.getInstantiationRange(Loc).second; PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc); - // __LINE__ expands to a simple numeric value. Add a space after it so that - // it will tokenize as a number (and not run into stuff after it in the temp - // buffer). - sprintf(TmpBuffer, "%u ", PLoc.getLine()); - unsigned Length = strlen(TmpBuffer)-1; + // __LINE__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", PLoc.getLine()); Tok.setKind(tok::numeric_constant); - CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation()); - Tok.setLength(Length); // Trim off space. + CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation()); } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { // C99 6.10.8: "__FILE__: The presumed name of the current source file (a // character string literal)". This can be affected by #line. @@ -532,14 +528,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { for (; PLoc.isValid(); ++Depth) PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc()); - // __INCLUDE_LEVEL__ expands to a simple numeric value. Add a space after - // it so that it will tokenize as a number (and not run into stuff after it - // in the temp buffer). - sprintf(TmpBuffer, "%u ", Depth); - unsigned Length = strlen(TmpBuffer)-1; + // __INCLUDE_LEVEL__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", Depth); Tok.setKind(tok::numeric_constant); - CreateString(TmpBuffer, Length, Tok, Tok.getLocation()); - Tok.setLength(Length); // Trim off space. + CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation()); } else if (II == Ident__TIMESTAMP__) { // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. @@ -565,10 +557,9 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { TmpBuffer[0] = '"'; strcpy(TmpBuffer+1, Result); unsigned Len = strlen(TmpBuffer); - TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. + TmpBuffer[Len] = '"'; // Replace the newline with a quote. Tok.setKind(tok::string_literal); CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation()); - Tok.setLength(Len); // Trim off space. } else { assert(0 && "Unknown identifier!"); } diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index 73d36414ba..b4fb47cb31 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -141,9 +141,8 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // contents appear to have a space before them. StrVal[0] = ' '; - // Replace the terminating quote with a \n\0. + // Replace the terminating quote with a \n. StrVal[StrVal.size()-1] = '\n'; - StrVal += '\0'; // Remove escaped quotes and escapes. for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { @@ -165,8 +164,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Make and enter a lexer object so that we lex and expand the tokens just // like any others. Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc, - // do not include the null in the count. - StrVal.size()-1, *this); + StrVal.size(), *this); EnterSourceFileWithLexer(TL, 0); diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp index 9253bc0944..c73cd68a4c 100644 --- a/lib/Lex/ScratchBuffer.cpp +++ b/lib/Lex/ScratchBuffer.cpp @@ -32,7 +32,7 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) { /// token. SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, const char *&DestPtr) { - if (BytesUsed+Len > ScratchBufSize) + if (BytesUsed+Len+1 > ScratchBufSize) AllocScratchBuffer(Len); // Return a pointer to the character data. @@ -42,16 +42,21 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, memcpy(CurBuffer+BytesUsed, Buf, Len); // Remember that we used these bytes. - BytesUsed += Len; + BytesUsed += Len+1; + + // Add a NUL terminator to the token. This keeps the tokens separated, in + // case they get relexed, and puts them on their own virtual lines in case a + // diagnostic points to one. + CurBuffer[BytesUsed-1] = '\0'; - return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len); + return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1); } void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { // Only pay attention to the requested length if it is larger than our default // page size. If it is, we allocate an entire chunk for it. This is to // support gigantic tokens, which almost certainly won't happen. :) - if (RequestLen < ScratchBufSize) + if (RequestLen+1 < ScratchBufSize) RequestLen = ScratchBufSize; llvm::MemoryBuffer *Buf = @@ -59,5 +64,6 @@ void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { FileID FID = SourceMgr.createFileIDForMemBuffer(Buf); BufferStartLoc = SourceMgr.getLocForStartOfFile(FID); CurBuffer = const_cast<char*>(Buf->getBufferStart()); - BytesUsed = 0; + BytesUsed = 1; + CurBuffer[0] = '0'; // Start out with a \0 for cleanliness. } diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 898b3a780d..8178168ef5 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -365,11 +365,9 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Get the RHS token. const Token &RHS = Tokens[CurToken]; - bool isInvalid = false; - // Allocate space for the result token. This is guaranteed to be enough for - // the two tokens and a null terminator. - Buffer.resize(Tok.getLength() + RHS.getLength() + 1); + // the two tokens. + Buffer.resize(Tok.getLength() + RHS.getLength()); // Get the spelling of the LHS token in Buffer. const char *BufPtr = &Buffer[0]; @@ -382,11 +380,8 @@ bool TokenLexer::PasteTokens(Token &Tok) { if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! memcpy(&Buffer[LHSLen], BufPtr, RHSLen); - // Add null terminator. - Buffer[LHSLen+RHSLen] = '\0'; - // Trim excess space. - Buffer.resize(LHSLen+RHSLen+1); + Buffer.resize(LHSLen+RHSLen); // Plop the pasted result (including the trailing newline and null) into a // scratch buffer where we can lex it. @@ -425,45 +420,43 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Make a lexer object so that we lex and expand the paste result. Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), PP.getLangOptions(), ScratchBufStart, - ResultTokStrPtr, - ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/); + ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); // Lex a token in raw mode. This way it won't look up identifiers // automatically, lexing off the end will return an eof token, and // warnings are disabled. This returns true if the result token is the // entire buffer. - bool IsComplete = TL.LexFromRawLexer(Result); + bool isInvalid = !TL.LexFromRawLexer(Result); // If we got an EOF token, we didn't form even ONE token. For example, we // did "/ ## /" to get "//". - IsComplete &= Result.isNot(tok::eof); - isInvalid = !IsComplete; - } + isInvalid |= Result.is(tok::eof); - // If pasting the two tokens didn't form a full new token, this is an error. - // This occurs with "x ## +" and other stuff. Return with Tok unmodified - // and with RHS as the next token to lex. - if (isInvalid) { - // Test for the Microsoft extension of /##/ turning into // here on the - // error path. - if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && - RHS.is(tok::slash)) { - HandleMicrosoftCommentPaste(Tok); - return true; - } + // If pasting the two tokens didn't form a full new token, this is an + // error. This occurs with "x ## +" and other stuff. Return with Tok + // unmodified and with RHS as the next token to lex. + if (isInvalid) { + // Test for the Microsoft extension of /##/ turning into // here on the + // error path. + if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && + RHS.is(tok::slash)) { + HandleMicrosoftCommentPaste(Tok); + return true; + } - // TODO: If not in assembler language mode. - PP.Diag(PasteOpLoc, diag::err_pp_bad_paste) + // TODO: If not in assembler language mode. + PP.Diag(PasteOpLoc, diag::err_pp_bad_paste) << std::string(Buffer.begin(), Buffer.end()-1); - return false; - } - - // Turn ## into 'unknown' to avoid # ## # from looking like a paste - // operator. - if (Result.is(tok::hashhash)) - Result.setKind(tok::unknown); - // FIXME: Turn __VA_ARGS__ into "not a token"? + return false; + } + // Turn ## into 'unknown' to avoid # ## # from looking like a paste + // operator. + if (Result.is(tok::hashhash)) + Result.setKind(tok::unknown); + // FIXME: Turn __VA_ARGS__ into "not a token"? + } + // Transfer properties of the LHS over the the Result. Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); |