Introduce optional "Invalid" parameters to routines that invoke the

SourceManager's getBuffer() and, therefore, could fail, along with Preprocessor::getSpelling(). Use the Invalid parameters in the literal parsers (string, floating point, integral, character) to make them robust against errors that stem from, e.g., PCH files that are not consistent with the underlying file system. I still need to audit every use caller to all of these routines, to determine which ones need specific handling of error conditions. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@98608 91177308-0d34-0410-b5e6-96231b3b80d8
author: Douglas Gregor <dgregor@apple.com> 2010-03-16 05:20:39 +0000
committer: Douglas Gregor <dgregor@apple.com> 2010-03-16 05:20:39 +0000
commit: 50f6af7a6d6951a63f3da7d4c5a7d3965bf73b63 (patch)
tree: 28f78b0fe61c0b7a80cf3ccf0d1c39a884986120 /lib
parent: 36c35ba0aca641e60e5dbee8efbc620c08b9bd61 (diff)
4 files changed, 106 insertions, 38 deletions
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index f0a0d4c1bb..6335504a7d 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -457,10 +457,11 @@ SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
 }
 
 const llvm::MemoryBuffer *
-SourceManager::getMemoryBufferForFile(const FileEntry *File) {
+SourceManager::getMemoryBufferForFile(const FileEntry *File,
+                                      bool *Invalid) {
   const SrcMgr::ContentCache *IR = getOrCreateContentCache(File);
   assert(IR && "getOrCreateContentCache() cannot return NULL");
-  return IR->getBuffer(Diag);
+  return IR->getBuffer(Diag, Invalid);
 }
 
 bool SourceManager::overrideFileContents(const FileEntry *SourceFile,
@@ -701,21 +702,34 @@ SourceManager::getInstantiationRange(SourceLocation Loc) const {
 
 /// getCharacterData - Return a pointer to the start of the specified location
 /// in the appropriate MemoryBuffer.
-const char *SourceManager::getCharacterData(SourceLocation SL) const {
+const char *SourceManager::getCharacterData(SourceLocation SL,
+                                            bool *Invalid) const {
   // Note that this is a hot function in the getSpelling() path, which is
   // heavily used by -E mode.
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
 
   // Note that calling 'getBuffer()' may lazily page in a source file.
-  return getSLocEntry(LocInfo.first).getFile().getContentCache()
-              ->getBuffer(Diag)->getBufferStart() + LocInfo.second;
+  bool CharDataInvalid = false;
+  const llvm::MemoryBuffer *Buffer
+    = getSLocEntry(LocInfo.first).getFile().getContentCache()->getBuffer(Diag, 
+                                                              &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
 }
 
 
 /// getColumnNumber - Return the column # for the specified file position.
 /// this is significantly cheaper to compute than the line number.
-unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
-  const char *Buf = getBuffer(FID)->getBufferStart();
+unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
+                                        bool *Invalid) const {
+  bool MyInvalid = false;
+  const char *Buf = getBuffer(FID, &MyInvalid)->getBufferStart();
+  if (Invalid)
+    *Invalid = MyInvalid;
+
+  if (MyInvalid)
+    return 1;
 
   unsigned LineStart = FilePos;
   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
@@ -723,27 +737,30 @@ unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
   return FilePos-LineStart+1;
 }
 
-unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
+unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc,
+                                                bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
-  return getColumnNumber(LocInfo.first, LocInfo.second);
+  return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
 }
 
-unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
+unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc,
+                                                     bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
-  return getColumnNumber(LocInfo.first, LocInfo.second);
+  return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
 }
 
-
-
 static DISABLE_INLINE void ComputeLineNumbers(Diagnostic &Diag,
                                               ContentCache* FI,
-                                              llvm::BumpPtrAllocator &Alloc);
+                                              llvm::BumpPtrAllocator &Alloc,
+                                              bool &Invalid);
 static void ComputeLineNumbers(Diagnostic &Diag, ContentCache* FI, 
-                               llvm::BumpPtrAllocator &Alloc){
+                               llvm::BumpPtrAllocator &Alloc, bool &Invalid) {
   // Note that calling 'getBuffer()' may lazily page in the file.
-  const MemoryBuffer *Buffer = FI->getBuffer(Diag);
+  const MemoryBuffer *Buffer = FI->getBuffer(Diag, &Invalid);
+  if (Invalid)
+    return;
 
   // Find the file offsets of all of the *physical* source lines.  This does
   // not look at trigraphs, escaped newlines, or anything else tricky.
@@ -789,7 +806,8 @@ static void ComputeLineNumbers(Diagnostic &Diag, ContentCache* FI,
 /// for the position indicated.  This requires building and caching a table of
 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
 /// about to emit a diagnostic.
-unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
+unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos, 
+                                      bool *Invalid) const {
   ContentCache *Content;
   if (LastLineNoFileIDQuery == FID)
     Content = LastLineNoContentCache;
@@ -799,8 +817,15 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
 
   // If this is the first use of line information for this buffer, compute the
   /// SourceLineCache for it on demand.
-  if (Content->SourceLineCache == 0)
-    ComputeLineNumbers(Diag, Content, ContentCacheAlloc);
+  if (Content->SourceLineCache == 0) {
+    bool MyInvalid = false;
+    ComputeLineNumbers(Diag, Content, ContentCacheAlloc, MyInvalid);
+    if (Invalid)
+      *Invalid = MyInvalid;
+    if (MyInvalid)
+      return 1;
+  } else if (Invalid)
+    *Invalid = false;
 
   // Okay, we know we have a line number table.  Do a binary search to find the
   // line number that this character position lands on.
@@ -886,12 +911,14 @@ unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
   return LineNo;
 }
 
-unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
+unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc, 
+                                                   bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
   return getLineNumber(LocInfo.first, LocInfo.second);
 }
-unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
+unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc, 
+                                              bool *Invalid) const {
   if (Loc.isInvalid()) return 0;
   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
   return getLineNumber(LocInfo.first, LocInfo.second);
@@ -931,10 +958,11 @@ SourceManager::getFileCharacteristic(SourceLocation Loc) const {
 /// Return the filename or buffer identifier of the buffer the location is in.
 /// Note that this name does not respect #line directives.  Use getPresumedLoc
 /// for normal clients.
-const char *SourceManager::getBufferName(SourceLocation Loc) const {
+const char *SourceManager::getBufferName(SourceLocation Loc, 
+                                         bool *Invalid) const {
   if (Loc.isInvalid()) return "<invalid loc>";
 
-  return getBuffer(getFileID(Loc))->getBufferIdentifier();
+  return getBuffer(getFileID(Loc), Invalid)->getBufferIdentifier();
 }
 
 
@@ -1014,8 +1042,12 @@ SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
 
   // If this is the first use of line information for this buffer, compute the
   /// SourceLineCache for it on demand.
-  if (Content->SourceLineCache == 0)
-    ComputeLineNumbers(Diag, Content, ContentCacheAlloc);
+  if (Content->SourceLineCache == 0) {
+    bool MyInvalid = false;
+    ComputeLineNumbers(Diag, Content, ContentCacheAlloc, MyInvalid);
+    if (MyInvalid)
+      return SourceLocation();
+  }
 
   // Find the first file ID that corresponds to the given file.
   FileID FirstFID;
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 004e6755e5..1cfa0e3745 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -806,7 +806,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
     // Get the spelling of the token, which eliminates trigraphs, etc.  We know
     // that ThisTokBuf points to a buffer that is big enough for the whole token
     // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    bool StringInvalid = false;
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf, 
+                                         &StringInvalid);
+    if (StringInvalid) {
+      hadError = 1;
+      continue;
+    }
+
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
 
     // TODO: Input character set mapping support.
@@ -904,8 +911,12 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
   llvm::SmallString<16> SpellingBuffer;
   SpellingBuffer.resize(Tok.getLength());
 
+  bool StringInvalid = false;
   const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr, &StringInvalid);
+  if (StringInvalid) {
+    return 0;
+  }
 
   assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
 
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index ede129edcb..756ce27a93 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -170,7 +170,12 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     return true;
   case tok::numeric_constant: {
     llvm::SmallString<64> IntegerBuffer;
-    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer);
+    bool NumberInvalid = false;
+    llvm::StringRef Spelling = PP.getSpelling(PeekTok, IntegerBuffer, 
+                                              &NumberInvalid);
+    if (NumberInvalid)
+      return true; // a diagnostic was already reported
+
     NumericLiteralParser Literal(Spelling.begin(), Spelling.end(),
                                  PeekTok.getLocation(), PP);
     if (Literal.hadError)
@@ -216,7 +221,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   }
   case tok::char_constant: {   // 'x'
     llvm::SmallString<32> CharBuffer;
-    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer);
+    bool CharInvalid = false;
+    llvm::StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
+    if (CharInvalid)
+      return true;
 
     CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
                               PeekTok.getLocation(), PP);
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index a6efe7f5bc..5584b18da1 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -282,11 +282,19 @@ bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok,
                                       const SourceManager &SourceMgr,
-                                      const LangOptions &Features) {
+                                      const LangOptions &Features, 
+                                      bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token contains nothing interesting, return it directly.
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
 
@@ -310,8 +318,8 @@ std::string Preprocessor::getSpelling(const Token &Tok,
 /// after trigraph expansion and escaped-newline folding.  In particular, this
 /// wants to get the true, uncanonicalized, spelling of things like digraphs
 /// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok) const {
-  return getSpelling(Tok, SourceMgr, Features);
+std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
+  return getSpelling(Tok, SourceMgr, Features, Invalid);
 }
 
 /// getSpelling - This method is used to get the spelling of a token into a
@@ -325,7 +333,7 @@ std::string Preprocessor::getSpelling(const Token &Tok) const {
 /// copy).  The caller is not allowed to modify the returned buffer pointer
 /// if an internal buffer is returned.
 unsigned Preprocessor::getSpelling(const Token &Tok,
-                                   const char *&Buffer) const {
+                                   const char *&Buffer, bool *Invalid) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
 
   // If this token is an identifier, just return the string from the identifier
@@ -341,8 +349,16 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
 
-  if (TokStart == 0)
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
@@ -368,7 +384,8 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
 /// SmallVector. Note that the returned StringRef may not point to the
 /// supplied buffer if a copy can be avoided.
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
-                                    llvm::SmallVectorImpl<char> &Buffer) const {
+                                          llvm::SmallVectorImpl<char> &Buffer,
+                                          bool *Invalid) const {
   // Try the fast path.
   if (const IdentifierInfo *II = Tok.getIdentifierInfo())
     return II->getName();
@@ -378,7 +395,7 @@ llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
     Buffer.resize(Tok.getLength());
 
   const char *Ptr = Buffer.data();
-  unsigned Len = getSpelling(Tok, Ptr);
+  unsigned Len = getSpelling(Tok, Ptr, Invalid);
   return llvm::StringRef(Ptr, Len);
 }
author	Douglas Gregor <dgregor@apple.com>	2010-03-16 05:20:39 +0000
committer	Douglas Gregor <dgregor@apple.com>	2010-03-16 05:20:39 +0000
commit	50f6af7a6d6951a63f3da7d4c5a7d3965bf73b63 (patch)
tree	28f78b0fe61c0b7a80cf3ccf0d1c39a884986120 /lib
parent	36c35ba0aca641e60e5dbee8efbc620c08b9bd61 (diff)