Audit all Preprocessor::getSpelling() callers, improving failure

recovery for those that need it. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98689 91177308-0d34-0410-b5e6-96231b3b80d8
author: Douglas Gregor <dgregor@apple.com> 2010-03-16 22:30:13 +0000
committer: Douglas Gregor <dgregor@apple.com> 2010-03-16 22:30:13 +0000
commit: 453091cc2082e207ea2c2dda645a9bc01b37fb0c (patch)
tree: 96026d218bae429d6706d3783cd94e86b578dc47
parent: a98c27ba83d25d878473ed8c6a34b40b27d323fd (diff)
9 files changed, 77 insertions, 27 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 0b8b6242db..60a1707fb3 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1037,7 +1037,11 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
 
   // If this BCPL-style comment is in a macro definition, transmogrify it into
   // a C-style block comment.
-  std::string Spelling = PP->getSpelling(Result);
+  bool Invalid = false;
+  std::string Spelling = PP->getSpelling(Result, &Invalid);
+  if (Invalid)
+    return true;
+  
   assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
   Spelling[1] = '*';   // Change prefix to "/*".
   Spelling += "*/";    // add suffix.
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 2f1a34c832..89f6368a27 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -208,24 +208,31 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
     if (Tok.is(tok::string_literal) ||       // "foo"
         Tok.is(tok::wide_string_literal) ||  // L"foo"
         Tok.is(tok::char_constant)) {        // 'x' and L'x'.
-      std::string Str = Lexer::Stringify(PP.getSpelling(Tok));
-      Result.append(Str.begin(), Str.end());
+      bool Invalid = false;
+      std::string TokStr = PP.getSpelling(Tok, &Invalid);
+      if (!Invalid) {
+        std::string Str = Lexer::Stringify(TokStr);
+        Result.append(Str.begin(), Str.end());
+      }
     } else {
       // Otherwise, just append the token.  Do some gymnastics to get the token
       // in place and avoid copies where possible.
       unsigned CurStrLen = Result.size();
       Result.resize(CurStrLen+Tok.getLength());
       const char *BufPtr = &Result[CurStrLen];
-      unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr);
-
-      // If getSpelling returned a pointer to an already uniqued version of the
-      // string instead of filling in BufPtr, memcpy it onto our string.
-      if (BufPtr != &Result[CurStrLen])
-        memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
-
-      // If the token was dirty, the spelling may be shorter than the token.
-      if (ActualTokLen != Tok.getLength())
-        Result.resize(CurStrLen+ActualTokLen);
+      bool Invalid = false;
+      unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
+
+      if (!Invalid) {
+        // If getSpelling returned a pointer to an already uniqued version of
+        // the string instead of filling in BufPtr, memcpy it onto our string.
+        if (BufPtr != &Result[CurStrLen])
+          memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
+
+        // If the token was dirty, the spelling may be shorter than the token.
+        if (ActualTokLen != Tok.getLength())
+          Result.resize(CurStrLen+ActualTokLen);
+      }
     }
   }
 
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 3bf3fc4af9..7b601010b2 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -71,7 +71,11 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
 
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   if (II == 0) {
-    std::string Spelling = getSpelling(MacroNameTok);
+    bool Invalid = false;
+    std::string Spelling = getSpelling(MacroNameTok, &Invalid);
+    if (Invalid)
+      return;
+    
     const IdentifierInfo &Info = Identifiers.get(Spelling);
     if (Info.isCPlusPlusOperatorKeyword())
       // C++ 2.5p2: Alternative tokens behave the same as its primary token
@@ -619,8 +623,11 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val,
   llvm::SmallString<64> IntegerBuffer;
   IntegerBuffer.resize(DigitTok.getLength());
   const char *DigitTokBegin = &IntegerBuffer[0];
-  unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin);
-
+  bool Invalid = false;
+  unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
+  if (Invalid)
+    return true;
+  
   // Verify that we have a simple digit-sequence, and compute the value.  This
   // is always a simple digit string computed in decimal, so we do this manually
   // here.
@@ -905,8 +912,12 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
   // Verify that there is nothing after the string, other than EOM.
   CheckEndOfDirective("ident");
 
-  if (Callbacks)
-    Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok));
+  if (Callbacks) {
+    bool Invalid = false;
+    std::string Str = getSpelling(StrTok, &Invalid);
+    if (!Invalid)
+      Callbacks->Ident(Tok.getLocation(), Str);
+  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 5fe2ef172e..ffae8ab6af 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -542,9 +542,13 @@ static bool EvaluateHasIncludeCommon(bool &Result, Token &Tok,
     return false;
 
   case tok::angle_string_literal:
-  case tok::string_literal:
-    Filename = PP.getSpelling(Tok, FilenameBuffer);
+  case tok::string_literal: {
+    bool Invalid = false;
+    Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
+    if (Invalid)
+      return false;
     break;
+  }
 
   case tok::less:
     // This could be a <foo/bar.h> file coming from a macro expansion.  In this
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 654d4606a9..92332a0068 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -287,7 +287,10 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
 
   // Reserve a buffer to get the spelling.
   llvm::SmallString<128> FilenameBuffer;
-  llvm::StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
+  bool Invalid = false;
+  llvm::StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);
+  if (Invalid)
+    return;
 
   bool isAngled =
     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index dbd1b8400d..56bb073e59 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -396,12 +396,17 @@ bool TokenLexer::PasteTokens(Token &Tok) {
 
     // Get the spelling of the LHS token in Buffer.
     const char *BufPtr = &Buffer[0];
-    unsigned LHSLen = PP.getSpelling(Tok, BufPtr);
+    bool Invalid = false;
+    unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid);
     if (BufPtr != &Buffer[0])   // Really, we want the chars in Buffer!
       memcpy(&Buffer[0], BufPtr, LHSLen);
-
+    if (Invalid)
+      return true;
+    
     BufPtr = &Buffer[LHSLen];
-    unsigned RHSLen = PP.getSpelling(RHS, BufPtr);
+    unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
+    if (Invalid)
+      return true;
     if (BufPtr != &Buffer[LHSLen])   // Really, we want the chars in Buffer!
       memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
 
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index b92e7539de..11f84edcf1 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -167,7 +167,10 @@ Parser::DeclPtrTy Parser::ParseLinkage(ParsingDeclSpec &DS,
   assert(Tok.is(tok::string_literal) && "Not a string literal!");
   llvm::SmallString<8> LangBuffer;
   // LangBuffer is guaranteed to be big enough.
-  llvm::StringRef Lang = PP.getSpelling(Tok, LangBuffer);
+  bool Invalid = false;
+  llvm::StringRef Lang = PP.getSpelling(Tok, LangBuffer, &Invalid);
+  if (Invalid)
+    return DeclPtrTy();
 
   SourceLocation Loc = ConsumeStringToken();
 
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index a39ba2f7d0..32ed3e0006 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -1751,7 +1751,10 @@ Sema::OwningExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc,
 
 Sema::OwningExprResult Sema::ActOnCharacterConstant(const Token &Tok) {
   llvm::SmallString<16> CharBuffer;
-  llvm::StringRef ThisTok = PP.getSpelling(Tok, CharBuffer);
+  bool Invalid = false;
+  llvm::StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid);
+  if (Invalid)
+    return ExprError();
 
   CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(),
                             PP);
@@ -1789,7 +1792,10 @@ Action::OwningExprResult Sema::ActOnNumericConstant(const Token &Tok) {
   const char *ThisTokBegin = &IntegerBuffer[0];
 
   // Get the spelling of the token, which eliminates trigraphs, etc.
-  unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin);
+  bool Invalid = false;
+  unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin, &Invalid);
+  if (Invalid)
+    return ExprError();
 
   NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength,
                                Tok.getLocation(), PP);
diff --git a/test/PCH/changed-files.c b/test/PCH/changed-files.c
index 36453c48e7..4ef80ffb04 100644
--- a/test/PCH/changed-files.c
+++ b/test/PCH/changed-files.c
@@ -1,4 +1,5 @@
 const char *s0 = m0;
+int s1 = m1;
 
 // RUN: echo '#define m0 ""' > %t.h
 // RUN: %clang_cc1 -emit-pch -o %t.h.pch %t.h
@@ -9,3 +10,9 @@ const char *s0 = m0;
 // RUN: %clang_cc1 -emit-pch -o %t.h.pch %t.h
 // RUN: echo '' > %t.h
 // RUN: not %clang_cc1 -include-pch %t.h.pch %s 2>&1 | grep "size of file"
+
+// RUN: echo '#define m0 000' > %t.h
+// RUN: echo "#define m1 'abcd'" >> %t.h
+// RUN: %clang_cc1 -emit-pch -o %t.h.pch %t.h
+// RUN: echo '' > %t.h
+// RUN: not %clang_cc1 -include-pch %t.h.pch %s 2>&1 | grep "size of file"
author	Douglas Gregor <dgregor@apple.com>	2010-03-16 22:30:13 +0000
committer	Douglas Gregor <dgregor@apple.com>	2010-03-16 22:30:13 +0000
commit	453091cc2082e207ea2c2dda645a9bc01b37fb0c (patch)
tree	96026d218bae429d6706d3783cd94e86b578dc47
parent	a98c27ba83d25d878473ed8c6a34b40b27d323fd (diff)