6 files changed, 50 insertions, 61 deletions
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 3950e17705..8077934a61 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -138,7 +138,7 @@ public:
   
   /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no
   /// associated preprocessor object.  Return true if the 'next character to
-  /// read' pointer points and the end of the lexer buffer, false otherwise.
+  /// read' pointer points at the end of the lexer buffer, false otherwise.
   bool LexFromRawLexer(Token &Result) {
     assert(LexingRawMode && "Not already in raw mode!");
     Lex(Result);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index efbd84c879..c68d21c7e7 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -167,6 +167,7 @@ Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
   
   L->BufferPtr = StrData;
   L->BufferEnd = StrData+TokLen;
+  assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
 
   // Set the SourceLocation with the remapping information.  This ensures that
   // GetMappedTokenLoc will remap the tokens as they are lexed.
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 6cb1908330..84056c3f4b 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -475,14 +475,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     Loc = SourceMgr.getInstantiationRange(Loc).second;
     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
     
-    // __LINE__ expands to a simple numeric value.  Add a space after it so that
-    // it will tokenize as a number (and not run into stuff after it in the temp
-    // buffer).
-    sprintf(TmpBuffer, "%u ", PLoc.getLine());
-    unsigned Length = strlen(TmpBuffer)-1;
+    // __LINE__ expands to a simple numeric value.
+    sprintf(TmpBuffer, "%u", PLoc.getLine());
     Tok.setKind(tok::numeric_constant);
-    CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
-    Tok.setLength(Length);  // Trim off space.
+    CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation());
   } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
     // C99 6.10.8: "__FILE__: The presumed name of the current source file (a
     // character string literal)". This can be affected by #line.
@@ -532,14 +528,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     for (; PLoc.isValid(); ++Depth)
       PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
     
-    // __INCLUDE_LEVEL__ expands to a simple numeric value.  Add a space after
-    // it so that it will tokenize as a number (and not run into stuff after it
-    // in the temp buffer).
-    sprintf(TmpBuffer, "%u ", Depth);
-    unsigned Length = strlen(TmpBuffer)-1;
+    // __INCLUDE_LEVEL__ expands to a simple numeric value.
+    sprintf(TmpBuffer, "%u", Depth);
     Tok.setKind(tok::numeric_constant);
-    CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
-    Tok.setLength(Length);  // Trim off space.
+    CreateString(TmpBuffer, strlen(TmpBuffer), Tok, Tok.getLocation());
   } else if (II == Ident__TIMESTAMP__) {
     // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
     // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
@@ -565,10 +557,9 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     TmpBuffer[0] = '"';
     strcpy(TmpBuffer+1, Result);
     unsigned Len = strlen(TmpBuffer);
-    TmpBuffer[Len-1] = '"';  // Replace the newline with a quote.
+    TmpBuffer[Len] = '"';  // Replace the newline with a quote.
     Tok.setKind(tok::string_literal);
     CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
-    Tok.setLength(Len);  // Trim off space.
   } else {
     assert(0 && "Unknown identifier!");
   }
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 73d36414ba..b4fb47cb31 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -141,9 +141,8 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   // contents appear to have a space before them.
   StrVal[0] = ' ';
   
-  // Replace the terminating quote with a \n\0.
+  // Replace the terminating quote with a \n.
   StrVal[StrVal.size()-1] = '\n';
-  StrVal += '\0';
   
   // Remove escaped quotes and escapes.
   for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
@@ -165,8 +164,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   // Make and enter a lexer object so that we lex and expand the tokens just
   // like any others.
   Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc,
-                                        // do not include the null in the count.
-                                        StrVal.size()-1, *this);
+                                        StrVal.size(), *this);
 
   EnterSourceFileWithLexer(TL, 0);
 
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index 9253bc0944..c73cd68a4c 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -32,7 +32,7 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
 /// token.
 SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
                                        const char *&DestPtr) {
-  if (BytesUsed+Len > ScratchBufSize)
+  if (BytesUsed+Len+1 > ScratchBufSize)
     AllocScratchBuffer(Len);
   
   // Return a pointer to the character data.
@@ -42,16 +42,21 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
   memcpy(CurBuffer+BytesUsed, Buf, Len);
 
   // Remember that we used these bytes.
-  BytesUsed += Len;
+  BytesUsed += Len+1;
+  
+  // Add a NUL terminator to the token.  This keeps the tokens separated, in
+  // case they get relexed, and puts them on their own virtual lines in case a
+  // diagnostic points to one.
+  CurBuffer[BytesUsed-1] = '\0';
 
-  return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
+  return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1);
 }
 
 void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
   // Only pay attention to the requested length if it is larger than our default
   // page size.  If it is, we allocate an entire chunk for it.  This is to
   // support gigantic tokens, which almost certainly won't happen. :)
-  if (RequestLen < ScratchBufSize)
+  if (RequestLen+1 < ScratchBufSize)
     RequestLen = ScratchBufSize;
   
   llvm::MemoryBuffer *Buf = 
@@ -59,5 +64,6 @@ void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
   FileID FID = SourceMgr.createFileIDForMemBuffer(Buf);
   BufferStartLoc = SourceMgr.getLocForStartOfFile(FID);
   CurBuffer = const_cast<char*>(Buf->getBufferStart());
-  BytesUsed = 0;
+  BytesUsed = 1;
+  CurBuffer[0] = '0';  // Start out with a \0 for cleanliness.
 }
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 898b3a780d..8178168ef5 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -365,11 +365,9 @@ bool TokenLexer::PasteTokens(Token &Tok) {
     // Get the RHS token.
     const Token &RHS = Tokens[CurToken];
   
-    bool isInvalid = false;
-
     // Allocate space for the result token.  This is guaranteed to be enough for
-    // the two tokens and a null terminator.
-    Buffer.resize(Tok.getLength() + RHS.getLength() + 1);
+    // the two tokens.
+    Buffer.resize(Tok.getLength() + RHS.getLength());
     
     // Get the spelling of the LHS token in Buffer.
     const char *BufPtr = &Buffer[0];
@@ -382,11 +380,8 @@ bool TokenLexer::PasteTokens(Token &Tok) {
     if (BufPtr != &Buffer[LHSLen])   // Really, we want the chars in Buffer!
       memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
     
-    // Add null terminator.
-    Buffer[LHSLen+RHSLen] = '\0';
-    
     // Trim excess space.
-    Buffer.resize(LHSLen+RHSLen+1);
+    Buffer.resize(LHSLen+RHSLen);
     
     // Plop the pasted result (including the trailing newline and null) into a
     // scratch buffer where we can lex it.
@@ -425,45 +420,43 @@ bool TokenLexer::PasteTokens(Token &Tok) {
       // Make a lexer object so that we lex and expand the paste result.
       Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
                PP.getLangOptions(), ScratchBufStart,
-               ResultTokStrPtr, 
-               ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
+               ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
       
       // Lex a token in raw mode.  This way it won't look up identifiers
       // automatically, lexing off the end will return an eof token, and
       // warnings are disabled.  This returns true if the result token is the
       // entire buffer.
-      bool IsComplete = TL.LexFromRawLexer(Result);
+      bool isInvalid = !TL.LexFromRawLexer(Result);
       
       // If we got an EOF token, we didn't form even ONE token.  For example, we
       // did "/ ## /" to get "//".
-      IsComplete &= Result.isNot(tok::eof);
-      isInvalid = !IsComplete;
-    }
+      isInvalid |= Result.is(tok::eof);
     
-    // If pasting the two tokens didn't form a full new token, this is an error.
-    // This occurs with "x ## +"  and other stuff.  Return with Tok unmodified
-    // and with RHS as the next token to lex.
-    if (isInvalid) {
-      // Test for the Microsoft extension of /##/ turning into // here on the
-      // error path.
-      if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && 
-          RHS.is(tok::slash)) {
-        HandleMicrosoftCommentPaste(Tok);
-        return true;
-      }
+      // If pasting the two tokens didn't form a full new token, this is an
+      // error.  This occurs with "x ## +"  and other stuff.  Return with Tok
+      // unmodified and with RHS as the next token to lex.
+      if (isInvalid) {
+        // Test for the Microsoft extension of /##/ turning into // here on the
+        // error path.
+        if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && 
+            RHS.is(tok::slash)) {
+          HandleMicrosoftCommentPaste(Tok);
+          return true;
+        }
       
-      // TODO: If not in assembler language mode.
-      PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
+        // TODO: If not in assembler language mode.
+        PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
         << std::string(Buffer.begin(), Buffer.end()-1);
-      return false;
-    }
-    
-    // Turn ## into 'unknown' to avoid # ## # from looking like a paste
-    // operator.
-    if (Result.is(tok::hashhash))
-      Result.setKind(tok::unknown);
-    // FIXME: Turn __VA_ARGS__ into "not a token"?
+        return false;
+      }
     
+      // Turn ## into 'unknown' to avoid # ## # from looking like a paste
+      // operator.
+      if (Result.is(tok::hashhash))
+        Result.setKind(tok::unknown);
+      // FIXME: Turn __VA_ARGS__ into "not a token"?
+    }
+      
     // Transfer properties of the LHS over the the Result.
     Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
     Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());