3 files changed, 49 insertions, 13 deletions
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 004e6755e5..1cfa0e3745 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -806,7 +806,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    bool StringInvalid = false;
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
+                                         &StringInvalid);
+    if (StringInvalid) {
+      hadError = 1;
+      continue;
+    }
+
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
 
     // TODO: Input character set mapping support.
@@ -904,8 +911,12 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
   llvm::SmallString<16> SpellingBuffer;
   SpellingBuffer.resize(Tok.getLength());
 
+  bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr, &StringInvalid);
+  if (StringInvalid) {
+    return 0;
+  }
 
   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
 
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index ede129edcb..756ce27a93 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -170,7 +170,12 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     return true;
   case tok::numeric_constant: {
     llvm::SmallString<64> IntegerBuffer;
-    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer);
+    bool NumberInvalid = false;
+    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
+                                              &NumberInvalid);
+    if (NumberInvalid)
+      return true; // a diagnostic was already reported
+
     NumericLiteralParser Literal(Spelling.begin(), Spelling.end(),
                                  PeekTok.getLocation(), PP);
     if (Literal.hadError)
@@ -216,7 +221,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   }
   case tok::char_constant: {   // 'x'
     llvm::SmallString<32> CharBuffer;
-    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer);
+    bool CharInvalid = false;
+    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+    if (CharInvalid)
+      return true;
 
     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                               PeekTok.getLocation(), PP);
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index a6efe7f5bc..5584b18da1 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -282,11 +282,19 @@ bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok,
                                       const SourceManager &SourceMgr,
-                                      const LangOptions &Features) {
+                                      const LangOptions &Features, 
+                                      bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token contains nothing interesting, return it directly.
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
 
@@ -310,8 +318,8 @@ std::string Preprocessor::getSpelling(const Token &Tok,
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok) const {
-  return getSpelling(Tok, SourceMgr, Features);
+std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
+  return getSpelling(Tok, SourceMgr, Features, Invalid);
 }
 
 /// getSpelling - This method is used to get the spelling of a token into a
@@ -325,7 +333,7 @@ std::string Preprocessor::getSpelling(const Token &Tok) const {
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
 unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer) const {
+                                   const char *&Buffer, bool *Invalid) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token is an identifier, just return the string from the identifier
@@ -341,8 +349,16 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
 
-  if (TokStart == 0)
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
@@ -368,7 +384,8 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
-                                    llvm::SmallVectorImpl<char> &Buffer) const {
+                                          llvm::SmallVectorImpl<char> &Buffer,
+                                          bool *Invalid) const {
   // Try the fast path.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo())
     return II->getName();
@@ -378,7 +395,7 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
     Buffer.resize(Tok.getLength());
 
   const char *Ptr = Buffer.data();
-  unsigned Len = getSpelling(Tok, Ptr);
+  unsigned Len = getSpelling(Tok, Ptr, Invalid);
   return llvm::StringRef(Ptr, Len);
 }