3 files changed, 35 insertions, 28 deletions
diff --git a/lib/Frontend/PrintPreprocessedOutput.cpp b/lib/Frontend/PrintPreprocessedOutput.cpp
index c85945b894..3d55adceff 100644
--- a/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -548,7 +548,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
-      if (Tok.getKind() == tok::comment)
+      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
         Callbacks->HandleNewlinesInToken(TokPtr, Len);
     } else {
       std::string S = PP.getSpelling(Tok);
@@ -556,7 +556,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
 
       // Tokens that can contain embedded newlines need to adjust our current
       // line number.
-      if (Tok.getKind() == tok::comment)
+      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
         Callbacks->HandleNewlinesInToken(&S[0], S.size());
     }
     Callbacks->setEmittedTokensOnThisLine();
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 0590d9e114..65ea5e3996 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
   InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
             InputFile->getBufferEnd());
 
-  // Default to keeping comments if the preprocessor wants them.
-  SetCommentRetentionState(PP.getCommentRetentionState());
+  resetExtendedTokenMode();
+}
+
+void Lexer::resetExtendedTokenMode() {
+  assert(PP && "Cannot reset token mode without a preprocessor");
+  if (LangOpts.TraditionalCPP)
+    SetKeepWhitespaceMode(true);
+  else
+    SetCommentRetentionState(PP->getCommentRetentionState());
 }
 
 /// Lexer constructor - Create a new raw lexer object.  This object is only
@@ -1844,6 +1851,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 ///
 bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   // Whitespace - Skip it, then return the token after the whitespace.
+  bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
   unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
   while (1) {
     // Skip horizontal whitespace very aggressively.
@@ -1851,7 +1860,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
       Char = *++CurPtr;
 
     // Otherwise if we have something other than whitespace, we're done.
-    if (Char != '\n' && Char != '\r')
+    if (!isVerticalWhitespace(Char))
       break;
 
     if (ParsingPreprocessorDirective) {
@@ -1861,24 +1870,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
     }
 
     // ok, but handle newline.
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
-    // No leading whitespace seen so far.
-    Result.clearFlag(Token::LeadingSpace);
+    SawNewline = true;
     Char = *++CurPtr;
   }
 
-  // If this isn't immediately after a newline, there is leading space.
-  char PrevChar = CurPtr[-1];
-  if (PrevChar != '\n' && PrevChar != '\r')
-    Result.setFlag(Token::LeadingSpace);
-
   // If the client wants us to return whitespace, return it now.
   if (isKeepWhitespaceMode()) {
     FormTokenWithChars(Result, CurPtr, tok::unknown);
+    if (SawNewline)
+      IsAtStartOfLine = true;
+    // FIXME: The next token will not have LeadingSpace set.
     return true;
   }
 
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+  Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+  if (SawNewline)
+    Result.setFlag(Token::StartOfLine);
+
   BufferPtr = CurPtr;
   return false;
 }
@@ -2269,7 +2281,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    Result.setFlag(Token::LeadingSpace);
     SkipWhitespace(Result, CurPtr+1);
     return false;
   }
@@ -2351,7 +2362,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
     FormTokenWithChars(Result, CurPtr, tok::eod);
 
     // Restore comment saving mode, in case it was disabled for directive.
-    SetCommentRetentionState(PP->getCommentRetentionState());
+    resetExtendedTokenMode();
     return true;  // Have a token.
   }
  
@@ -2718,6 +2729,7 @@ LexNextToken:
     // whitespace.
     if (isKeepWhitespaceMode()) {
       FormTokenWithChars(Result, CurPtr, tok::unknown);
+      // FIXME: The next token will not have LeadingSpace set.
       return;
     }
 
@@ -2785,7 +2797,7 @@ LexNextToken:
 
       // Restore comment saving mode, in case it was disabled for directive.
       if (PP)
-        SetCommentRetentionState(PP->getCommentRetentionState());
+        resetExtendedTokenMode();
 
       // Since we consumed a newline, we are back at the start of a line.
       IsAtStartOfLine = true;
@@ -2793,8 +2805,7 @@ LexNextToken:
       Kind = tok::eod;
       break;
     }
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
+
     // No leading whitespace seen so far.
     Result.clearFlag(Token::LeadingSpace);
 
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 18250281c1..54457c36cf 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -269,7 +269,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
     if (Tok.isNot(tok::raw_identifier)) {
       CurPPLexer->ParsingPreprocessorDirective = false;
       // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
       continue;
     }
 
@@ -285,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
         FirstChar != 'i' && FirstChar != 'e') {
       CurPPLexer->ParsingPreprocessorDirective = false;
       // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
       continue;
     }
 
@@ -302,7 +302,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
       if (IdLen >= 20) {
         CurPPLexer->ParsingPreprocessorDirective = false;
         // Restore comment saving mode.
-        if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+        if (CurLexer) CurLexer->resetExtendedTokenMode();
         continue;
       }
       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
@@ -408,7 +408,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
 
     CurPPLexer->ParsingPreprocessorDirective = false;
     // Restore comment saving mode.
-    if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+    if (CurLexer) CurLexer->resetExtendedTokenMode();
   }
 
   // Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -594,6 +594,7 @@ void Preprocessor::HandleDirective(Token &Result) {
   // mode.  Tell the lexer this so any newlines we see will be converted into an
   // EOD token (which terminates the directive).
   CurPPLexer->ParsingPreprocessorDirective = true;
+  if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
 
   ++NumDirectives;
 
@@ -638,14 +639,9 @@ void Preprocessor::HandleDirective(Token &Result) {
   // and reset to previous state when returning from this function.
   ResetMacroExpansionHelper helper(this);
 
-TryAgain:
   switch (Result.getKind()) {
   case tok::eod:
     return;   // null directive.
-  case tok::comment:
-    // Handle stuff like "# /*foo*/ define X" in -E -C mode.
-    LexUnexpandedToken(Result);
-    goto TryAgain;
   case tok::code_completion:
     if (CodeComplete)
       CodeComplete->CodeCompleteDirective(