15 files changed, 153 insertions, 112 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp
index 1d33105668..fcb4d2f12d 100644
--- a/Driver/CacheTokens.cpp
+++ b/Driver/CacheTokens.cpp
@@ -140,32 +140,19 @@ void PTHWriter::EmitToken(const Token& T) {
   Emit32(ResolveID(T.getIdentifierInfo()));
   Emit32(fpos);
   
-  // For specific tokens we cache their spelling.
-  if (T.getIdentifierInfo())
-    return;
-
-  switch (T.getKind()) {
-    default:
-      break;
-    case tok::string_literal:     
-    case tok::wide_string_literal:
-    case tok::angle_string_literal:
-    case tok::numeric_constant:
-    case tok::char_constant: {
-      // FIXME: This uses the slow getSpelling().  Perhaps we do better
-      // in the future?  This only slows down PTH generation.
-      const std::string& spelling = PP.getSpelling(T);
-      const char* s = spelling.c_str();
-      
-      // Get the string entry.
-      llvm::StringMapEntry<Offset> *E =
-        &CachedStrs.GetOrCreateValue(s, s+spelling.size());
-
-      // Store the address of the string entry in our spelling map.
-      (*CurSpellMap).push_back(std::make_pair(fpos, E));
+  // Literals (strings, numbers, characters) get cached spellings.
+  if (T.isLiteral()) {
+    // FIXME: This uses the slow getSpelling().  Perhaps we do better
+    // in the future?  This only slows down PTH generation.
+    const std::string &spelling = PP.getSpelling(T);
+    const char* s = spelling.c_str();
+    
+    // Get the string entry.
+    llvm::StringMapEntry<Offset> *E =
+      &CachedStrs.GetOrCreateValue(s, s+spelling.size());
 
-      break;
-    }
+    // Store the address of the string entry in our spelling map.
+    CurSpellMap->push_back(std::make_pair(fpos, E));
   }
 }
 
diff --git a/Driver/PrintPreprocessedOutput.cpp b/Driver/PrintPreprocessedOutput.cpp
index 64cc4c8be6..deecacb1f4 100644
--- a/Driver/PrintPreprocessedOutput.cpp
+++ b/Driver/PrintPreprocessedOutput.cpp
@@ -430,6 +430,7 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
     // Avoid spelling identifiers, the most common form of token.
     FirstChar = II->getName()[0];
   } else if (!Tok.needsCleaning()) {
+    // FIXME: SPEED UP LITERALS!
     SourceManager &SrcMgr = PP.getSourceManager();
     FirstChar =
       *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
@@ -556,6 +557,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP,
       const char *Str = II->getName();
       unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
       OS.write(Str, Len);
+      // FIXME: ACCELERATE LITERALS
     } else if (Tok.getLength() < 256) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index bf47bab295..f6391d9781 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -415,7 +415,7 @@ public:
   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
   /// invoked.
   void AnnotateCachedTokens(const Token &Tok) {
-    assert(Tok.isAnnotationToken() && "Expected annotation token");
+    assert(Tok.isAnnotation() && "Expected annotation token");
     if (CachedLexPos != 0 && isBacktrackEnabled())
       AnnotatePreviousCachedTokens(Tok);
   }
@@ -463,11 +463,11 @@ public:
     return *SourceMgr.getCharacterData(SL);
   }
   
-  /// CreateString - Plop the specified string into a scratch buffer and return
-  /// a location for it.  If specified, the source location provides a source
-  /// location for the token.
-  SourceLocation CreateString(const char *Buf, unsigned Len,
-                              SourceLocation SourceLoc = SourceLocation());
+  /// CreateString - Plop the specified string into a scratch buffer and set the
+  /// specified token's location and length to it.  If specified, the source
+  /// location provides a location of the instantiation point of the token.
+  void CreateString(const char *Buf, unsigned Len,
+                    Token &Tok, SourceLocation SourceLoc = SourceLocation());
   
   /// DumpToken - Print the token to stderr, used for debugging.
   ///
diff --git a/include/clang/Lex/ScratchBuffer.h b/include/clang/Lex/ScratchBuffer.h
index eec6a251a5..6506f92629 100644
--- a/include/clang/Lex/ScratchBuffer.h
+++ b/include/clang/Lex/ScratchBuffer.h
@@ -31,16 +31,10 @@ public:
   ScratchBuffer(SourceManager &SM);
   
   /// getToken - Splat the specified text into a temporary MemoryBuffer and
-  /// return a SourceLocation that refers to the token.  The SourceLoc value
-  /// gives a virtual location that the token will appear to be from.
-  SourceLocation getToken(const char *Buf, unsigned Len,
-                          SourceLocation SourceLoc);
-  
-  /// getToken - Splat the specified text into a temporary MemoryBuffer and
   /// return a SourceLocation that refers to the token.  This is just like the
   /// previous method, but returns a location that indicates the physloc of the
   /// token.
-  SourceLocation getToken(const char *Buf, unsigned Len);
+  SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr);
                           
 private:
   void AllocScratchBuffer(unsigned RequestLen);
diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h
index c0dd2b7837..73e087ecb6 100644
--- a/include/clang/Lex/Token.h
+++ b/include/clang/Lex/Token.h
@@ -34,21 +34,28 @@ class Token {
   /// The location of the token.
   SourceLocation Loc;
 
-  // Conceptually these next two fields could be in a union with
-  // access depending on isAnnotationToken(). However, this causes gcc
-  // 4.2 to pessimize LexTokenInternal, a very performance critical
-  // routine. Keeping as separate members with casts until a more
-  // beautiful fix presents itself.
+  // Conceptually these next two fields could be in a union.  However, this
+  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
+  // routine. Keeping as separate members with casts until a more beautiful fix
+  // presents itself.
 
   /// UintData - This holds either the length of the token text, when
   /// a normal token, or the end of the SourceRange when an annotation
   /// token.
   unsigned UintData;
 
-  /// PtrData - For normal tokens, this points to the uniqued
-  /// information for the identifier (if an identifier token) or
-  /// null. For annotation tokens, this points to information specific
-  /// to the annotation token.
+  /// PtrData - This is a union of four different pointer types, which depends
+  /// on what type of token this is:
+  ///  Identifiers, keywords, etc:
+  ///    This is an IdentifierInfo*, which contains the uniqued identifier
+  ///    spelling.
+  ///  Literals:  isLiteral() returns true.
+  ///    This is a pointer to the start of the token in a text buffer, which
+  ///    may be dirty (have trigraphs / escaped newlines).
+  ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
+  ///    This is a pointer to sema-specific data for the annotation token.
+  ///  Other:
+  ///    This is null.
   void *PtrData;
 
   /// Kind - The actual flavor of token this is.
@@ -77,32 +84,40 @@ public:
   bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
   bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
 
-  bool isAnnotationToken() const { 
+  /// isLiteral - Return true if this is a "literal", like a numeric
+  /// constant, string, etc.
+  bool isLiteral() const {
+    return is(tok::numeric_constant) || is(tok::char_constant) ||
+           is(tok::string_literal) || is(tok::wide_string_literal) ||
+           is(tok::angle_string_literal);
+  }
+
+  bool isAnnotation() const { 
     return is(tok::annot_typename) || 
            is(tok::annot_cxxscope) ||
            is(tok::annot_template_id);
   }
-
+  
   /// getLocation - Return a source location identifier for the specified
   /// offset in the current file.
   SourceLocation getLocation() const { return Loc; }
   unsigned getLength() const {
-    assert(!isAnnotationToken() && "Annotation tokens have no length field");
+    assert(!isAnnotation() && "Annotation tokens have no length field");
     return UintData;
   }
 
   void setLocation(SourceLocation L) { Loc = L; }
   void setLength(unsigned Len) {
-    assert(!isAnnotationToken() && "Annotation tokens have no length field");
+    assert(!isAnnotation() && "Annotation tokens have no length field");
     UintData = Len;
   }
 
   SourceLocation getAnnotationEndLoc() const {
-    assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
     return SourceLocation::getFromRawEncoding(UintData);
   }
   void setAnnotationEndLoc(SourceLocation L) {
-    assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
     UintData = L.getRawEncoding();
   }
 
@@ -130,19 +145,32 @@ public:
   }
   
   IdentifierInfo *getIdentifierInfo() const {
-    assert(!isAnnotationToken() && "Used IdentInfo on annotation token");
+    assert(!isAnnotation() && "Used IdentInfo on annotation token!");
+    if (isLiteral()) return 0;
     return (IdentifierInfo*) PtrData;
   }
   void setIdentifierInfo(IdentifierInfo *II) {
     PtrData = (void*) II;
   }
-
+  
+  /// getLiteralData - For a literal token (numeric constant, string, etc), this
+  /// returns a pointer to the start of it in the text buffer if known, null
+  /// otherwise.
+  const char *getLiteralData() const {
+    assert(isLiteral() && "Cannot get literal data of non-literal");
+    return reinterpret_cast<const char*>(PtrData);
+  }
+  void setLiteralData(const char *Ptr) {
+    assert(isLiteral() && "Cannot set literal data of non-literal");
+    PtrData = (void*)Ptr;
+  }
+  
   void *getAnnotationValue() const {
-    assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     return PtrData;
   }
   void setAnnotationValue(void *val) {
-    assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     PtrData = val;
   }
   
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 191613bbcb..df8646df79 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -690,10 +690,11 @@ private:
     if (!Tok.is(tok::identifier))
       return false;
     
-    if (Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope))
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+    if (Actions.isTypeName(*II, CurScope))
       return true;
     
-    return Tok.getIdentifierInfo() == Ident_super;
+    return II == Ident_super;
   }
 
   OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 9e8d1aa740..03d81b3b9a 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -624,7 +624,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
   
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+  Result.setLiteralData(TokStart);
 }
 
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
@@ -655,8 +657,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
     Diag(NulCharacter, diag::null_in_string);
 
   // Update the location of the token as well as the BufferPtr instance var.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr,
                      Wide ? tok::wide_string_literal : tok::string_literal);
+  Result.setLiteralData(TokStart);
 }
 
 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
@@ -687,7 +691,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
     Diag(NulCharacter, diag::null_in_string);
   
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+  Result.setLiteralData(TokStart);
 }
 
 
@@ -735,7 +741,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
     Diag(NulCharacter, diag::null_in_char);
 
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::char_constant);
+  Result.setLiteralData(TokStart);
 }
 
 /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -901,9 +909,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
   Spelling += "*/";    // add suffix.
   
   Result.setKind(tok::comment);
-  Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
-                                      Result.getLocation()));
-  Result.setLength(Spelling.size());
+  PP->CreateString(&Spelling[0], Spelling.size(), Result,
+                   Result.getLocation());
   return true;
 }
 
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 77c96e0ab3..c3d7738afe 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -225,8 +225,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
     }
   }
   
-  Tok.setLength(Result.size());
-  Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+  PP.CreateString(&Result[0], Result.size(), Tok);
   return Tok;
 }
 
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index ed67754e6e..53aa09c130 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -89,7 +89,7 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
 }
 
 void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
-  assert(Tok.isAnnotationToken() && "Expected annotation token");
+  assert(Tok.isAnnotation() && "Expected annotation token");
   assert(CachedLexPos != 0 && "Expected to have some cached tokens");
   assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc()
          && "The annotation should be until the most recent cached token");
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index b14df735ad..b3e921c368 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -429,10 +429,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
   char TmpBuffer[100];
   sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, 
           TM->tm_year+1900);
-  DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  
+  Token TmpTok;
+  TmpTok.startToken();
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  DATELoc = TmpTok.getLocation();
 
   sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
-  TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  TIMELoc = TmpTok.getLocation();
 }
 
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
@@ -463,8 +468,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
             SourceMgr.getInstantiationLineNumber(Tok.getLocation()));
     unsigned Length = strlen(TmpBuffer)-1;
     Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
   } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
     SourceLocation Loc = Tok.getLocation();
     if (II == Ident__BASE_FILE__) {
@@ -480,8 +485,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc));
     FN = '"' + Lexer::Stringify(FN) + '"';
     Tok.setKind(tok::string_literal);
-    Tok.setLength(FN.size());
-    Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+    CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
   } else if (II == Ident__DATE__) {
     if (!DATELoc.isValid())
       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
@@ -511,8 +515,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     sprintf(TmpBuffer, "%u ", Depth);
     unsigned Length = strlen(TmpBuffer)-1;
     Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+    CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
   } else if (II == Ident__TIMESTAMP__) {
     // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
     // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
@@ -540,8 +544,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     unsigned Len = strlen(TmpBuffer);
     TmpBuffer[Len-1] = '"';  // Replace the newline with a quote.
     Tok.setKind(tok::string_literal);
-    Tok.setLength(Len);
-    Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
+    Tok.setLength(Len);  // Trim off space.
   } else {
     assert(0 && "Unknown identifier!");
   }
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 922af09e50..a80ba6bd1f 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -156,7 +156,10 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   
   // Plop the string (including the newline and trailing null) into a buffer
   // where we can lex it.
-  SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size());
+  Token TmpTok;
+  TmpTok.startToken();
+  CreateString(&StrVal[0], StrVal.size(), TmpTok);
+  SourceLocation TokLoc = TmpTok.getLocation();
 
   // Make and enter a lexer object so that we lex and expand the tokens just
   // like any others.
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index d0a15e45c4..cb0c850e7e 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -266,13 +266,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   }
 
   // Otherwise, compute the start of the token in the input lexer buffer.
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  const char *TokStart = 0;
+  
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+  
+  if (TokStart == 0)
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
     Buffer = TokStart;
     return Tok.getLength();
   }
+  
   // Otherwise, hard case, relex the characters into the string.
   char *OutBuf = const_cast<char*>(Buffer);
   for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@@ -291,11 +298,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
 /// CreateString - Plop the specified string into a scratch buffer and return a
 /// location for it.  If specified, the source location provides a source
 /// location for the token.
-SourceLocation Preprocessor::
-CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
-  if (SLoc.isValid())
-    return ScratchBuf->getToken(Buf, Len, SLoc);
-  return ScratchBuf->getToken(Buf, Len);
+void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
+                                SourceLocation InstantiationLoc) {
+  Tok.setLength(Len);
+  
+  const char *DestPtr;
+  SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
+  
+  if (InstantiationLoc.isValid())
+    Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len);
+  Tok.setLocation(Loc);
+  
+  // If this is a literal token, set the pointer data.
+  if (Tok.isLiteral())
+    Tok.setLiteralData(DestPtr);
 }
 
 
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index 695a5365fa..9253bc0944 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -30,10 +30,14 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
 /// return a SourceLocation that refers to the token.  This is just like the
 /// method below, but returns a location that indicates the physloc of the
 /// token.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+                                       const char *&DestPtr) {
   if (BytesUsed+Len > ScratchBufSize)
     AllocScratchBuffer(Len);
   
+  // Return a pointer to the character data.
+  DestPtr = CurBuffer+BytesUsed;
+  
   // Copy the token data into the buffer.
   memcpy(CurBuffer+BytesUsed, Buf, Len);
 
@@ -43,16 +47,6 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
   return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
 }
 
-
-/// getToken - Splat the specified text into a temporary MemoryBuffer and
-/// return a SourceLocation that refers to the token.  The SourceLoc value
-/// gives a virtual location that the token will appear to be from.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
-                                       SourceLocation SourceLoc) {
-  // Map the physloc to the specified sourceloc.
-  return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
-}
-
 void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
   // Only pay attention to the requested length if it is larger than our default
   // page size.  If it is, we allocate an entire chunk for it.  This is to
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 3707ef1ed6..7ae61beb88 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -352,6 +352,7 @@ void TokenLexer::Lex(Token &Tok) {
 /// If this returns true, the caller should immediately return the token.
 bool TokenLexer::PasteTokens(Token &Tok) {
   llvm::SmallVector<char, 128> Buffer;
+  const char *ResultTokStrPtr = 0;
   do {
     // Consume the ## operator.
     SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
@@ -386,8 +387,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
     
     // Plop the pasted result (including the trailing newline and null) into a
     // scratch buffer where we can lex it.
-    SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+    Token ResultTokTmp;
+    ResultTokTmp.startToken();
     
+    // Claim that the tmp token is a string_literal so that we can get the
+    // character pointer back from CreateString.
+    ResultTokTmp.setKind(tok::string_literal);
+    PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
+    SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+    ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
     // Lex the resultant pasted token into Result.
     Token Result;
     
@@ -405,20 +414,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
       assert(ResultTokLoc.isFileID() &&
              "Should be a raw location into scratch buffer");
       SourceManager &SourceMgr = PP.getSourceManager();
-      std::pair<FileID, unsigned> LocInfo =
-        SourceMgr.getDecomposedLoc(ResultTokLoc);
+      FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
       
-      const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first;
+      const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
       
       // Make a lexer to lex this string from.  Lex just this one token.
-      const char *ResultStrData = ScratchBufStart+LocInfo.second;
-      
       // Make a lexer object so that we lex and expand the paste result.
-      Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first),
-               PP.getLangOptions(), 
-               ScratchBufStart,
-               ResultStrData, 
-               ResultStrData+LHSLen+RHSLen /*don't include null*/);
+      Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+               PP.getLangOptions(), ScratchBufStart,
+               ResultTokStrPtr, 
+               ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
       
       // Lex a token in raw mode.  This way it won't look up identifiers
       // automatically, lexing off the end will return an eof token, and
@@ -442,12 +447,12 @@ bool TokenLexer::PasteTokens(Token &Tok) {
           RHS.is(tok::slash)) {
         HandleMicrosoftCommentPaste(Tok);
         return true;
-      } else {
-        // TODO: If not in assembler language mode.
-        PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
-          << std::string(Buffer.begin(), Buffer.end()-1);
-        return false;
       }
+      
+      // TODO: If not in assembler language mode.
+      PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
+        << std::string(Buffer.begin(), Buffer.end()-1);
+      return false;
     }
     
     // Turn ## into 'unknown' to avoid # ## # from looking like a paste
@@ -471,7 +476,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
   if (Tok.is(tok::identifier)) {
     // Look up the identifier info for the token.  We disabled identifier lookup
     // by saying we're skipping contents, so we need to do this manually.
-    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr));
   }
   return false;
 }
diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp
index aab6fb0cce..e17e80133b 100644
--- a/lib/Rewrite/TokenRewriter.cpp
+++ b/lib/Rewrite/TokenRewriter.cpp
@@ -78,14 +78,15 @@ TokenRewriter::AddToken(const Token &T, TokenRefTy Where) {
   
 
 TokenRewriter::token_iterator
-TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){
+TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
   unsigned Len = strlen(Val);
   
   // Plop the string into the scratch buffer, then create a token for this
   // string.
   Token Tok;
   Tok.startToken();
-  Tok.setLocation(ScratchBuf->getToken(Val, Len));
+  const char *Spelling;
+  Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
   Tok.setLength(Len);
   
   // TODO: Form a whole lexer around this and relex the token!  For now, just