diff options
-rw-r--r-- | Basic/SourceManager.cpp | 147 | ||||
-rw-r--r-- | clang.xcodeproj/project.pbxproj | 2 | ||||
-rw-r--r-- | include/clang/Basic/SourceManager.h | 10 |
3 files changed, 106 insertions, 53 deletions
diff --git a/Basic/SourceManager.cpp b/Basic/SourceManager.cpp index be8eeee408..05ead2d786 100644 --- a/Basic/SourceManager.cpp +++ b/Basic/SourceManager.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/FileManager.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/System/Path.h" #include <algorithm> @@ -236,6 +237,50 @@ std::string SourceManager::getSourceName(SourceLocation Loc) { return getFileInfo(FileID)->Buffer->getBufferIdentifier(); } +static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE; +static void ComputeLineNumbers(FileInfo *FI) { + const MemoryBuffer *Buffer = FI->Buffer; + + // Find the file offsets of all of the *physical* source lines. This does + // not look at trigraphs, escaped newlines, or anything else tricky. + std::vector<unsigned> LineOffsets; + + // Line #1 starts at char 0. + LineOffsets.push_back(0); + + const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); + unsigned Offs = 0; + while (1) { + // Skip over the contents of the line. + // TODO: Vectorize this? This is very performance sensitive for programs + // with lots of diagnostics and in -E mode. + const unsigned char *NextBuf = (const unsigned char *)Buf; + while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') + ++NextBuf; + Offs += NextBuf-Buf; + Buf = NextBuf; + + if (Buf[0] == '\n' || Buf[0] == '\r') { + // If this is \n\r or \r\n, skip both characters. + if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) + ++Offs, ++Buf; + ++Offs, ++Buf; + LineOffsets.push_back(Offs); + } else { + // Otherwise, this is a null. If end of file, exit. + if (Buf == End) break; + // Otherwise, skip the null. + ++Offs, ++Buf; + } + } + LineOffsets.push_back(Offs); + + // Copy the offsets into the FileInfo structure. + FI->NumLines = LineOffsets.size(); + FI->SourceLineCache = new unsigned[LineOffsets.size()]; + std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); +} /// getLineNumber - Given a SourceLocation, return the physical line number /// for the position indicated. This requires building and caching a table of @@ -244,66 +289,66 @@ std::string SourceManager::getSourceName(SourceLocation Loc) { unsigned SourceManager::getLineNumber(SourceLocation Loc) { unsigned FileID = Loc.getFileID(); if (FileID == 0) return 0; - FileInfo *FileInfo = getFileInfo(FileID); + FileInfo *FileInfo; + + if (LastLineNoFileIDQuery == FileID) + FileInfo = LastLineNoFileInfo; + else + FileInfo = getFileInfo(FileID); // If this is the first use of line information for this buffer, compute the - /// SourceLineCache for it on demand. - if (FileInfo->SourceLineCache == 0) { - const MemoryBuffer *Buffer = FileInfo->Buffer; - - // Find the file offsets of all of the *physical* source lines. This does - // not look at trigraphs, escaped newlines, or anything else tricky. - std::vector<unsigned> LineOffsets; - - // Line #1 starts at char 0. - LineOffsets.push_back(0); - - const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); - const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); - unsigned Offs = 0; - while (1) { - // Skip over the contents of the line. - // TODO: Vectorize this? This is very performance sensitive for programs - // with lots of diagnostics and in -E mode. - const unsigned char *NextBuf = (const unsigned char *)Buf; - while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') - ++NextBuf; - Offs += NextBuf-Buf; - Buf = NextBuf; - - if (Buf[0] == '\n' || Buf[0] == '\r') { - // If this is \n\r or \r\n, skip both characters. - if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) - ++Offs, ++Buf; - ++Offs, ++Buf; - LineOffsets.push_back(Offs); - } else { - // Otherwise, this is a null. If end of file, exit. - if (Buf == End) break; - // Otherwise, skip the null. - ++Offs, ++Buf; - } - } - LineOffsets.push_back(Offs); - - // Copy the offsets into the FileInfo structure. - FileInfo->NumLines = LineOffsets.size(); - FileInfo->SourceLineCache = new unsigned[LineOffsets.size()]; - std::copy(LineOffsets.begin(), LineOffsets.end(), - FileInfo->SourceLineCache); - } + /// SourceLineCache for it on demand. + if (FileInfo->SourceLineCache == 0) + ComputeLineNumbers(FileInfo); // Okay, we know we have a line number table. Do a binary search to find the // line number that this character position lands on. - unsigned NumLines = FileInfo->NumLines; unsigned *SourceLineCache = FileInfo->SourceLineCache; - + unsigned *SourceLineCacheStart = SourceLineCache; + unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines; + + unsigned QueriedFilePos = getFullFilePos(Loc)+1; + + // If the previous query was to the same file, we know both the file pos from + // that query and the line number returned. This allows us to narrow the + // search space from the entire file to something near the match. + if (LastLineNoFileIDQuery == FileID) { + if (QueriedFilePos >= LastLineNoFilePos) { + SourceLineCache = SourceLineCache+LastLineNoResult-1; + + // The query is likely to be nearby the previous one. Here we check to + // see if it is within 5, 10 or 20 lines. It can be far away in cases + // where big comment blocks and vertical whitespace eat up lines but + // contribute no tokens. + if (SourceLineCache+5 < SourceLineCacheEnd) { + if (SourceLineCache[5] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+5; + else if (SourceLineCache+10 < SourceLineCacheEnd) { + if (SourceLineCache[10] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+10; + else if (SourceLineCache+20 < SourceLineCacheEnd) { + if (SourceLineCache[20] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+20; + } + } + } + } else { + SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; + } + } + + unsigned *Pos; // TODO: If this is performance sensitive, we could try doing simple radix // type approaches to make good (tight?) initial guesses based on the // assumption that all lines are the same average size. - unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines, - getFullFilePos(Loc)+1); - return Pos-SourceLineCache; + Pos = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); + unsigned LineNo = Pos-SourceLineCacheStart; + + LastLineNoFileIDQuery = FileID; + LastLineNoFileInfo = FileInfo; + LastLineNoFilePos = QueriedFilePos; + LastLineNoResult = LineNo; + return LineNo; } /// PrintStats - Print statistics to stderr. diff --git a/clang.xcodeproj/project.pbxproj b/clang.xcodeproj/project.pbxproj index bb772fa182..ff84ac7c23 100644 --- a/clang.xcodeproj/project.pbxproj +++ b/clang.xcodeproj/project.pbxproj @@ -191,7 +191,7 @@ 1A869AA70BA21ABA008DA07A /* LiteralSupport.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LiteralSupport.cpp; sourceTree = "<group>"; }; 84D9A8870C1A57E100AC7ABC /* AttributeList.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = AttributeList.cpp; path = Parse/AttributeList.cpp; sourceTree = "<group>"; }; 84D9A88B0C1A581300AC7ABC /* AttributeList.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = AttributeList.h; path = clang/Parse/AttributeList.h; sourceTree = "<group>"; }; - 8DD76F6C0486A84900D96B5E /* clang */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = clang; sourceTree = BUILT_PRODUCTS_DIR; }; + 8DD76F6C0486A84900D96B5E /* clang */ = {isa = PBXFileReference; includeInIndex = 0; lastKnownFileType = "compiled.mach-o.executable"; path = clang; sourceTree = BUILT_PRODUCTS_DIR; }; DE01DA480B12ADA300AC22CE /* PPCallbacks.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = PPCallbacks.h; sourceTree = "<group>"; }; DE06756B0C051CFE00EBBFD8 /* ParseExprCXX.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseExprCXX.cpp; path = Parse/ParseExprCXX.cpp; sourceTree = "<group>"; }; DE06B73D0A8307640050E87E /* LangOptions.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LangOptions.h; sourceTree = "<group>"; }; diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h index f06a6fbf54..dd8a86cc7e 100644 --- a/include/clang/Basic/SourceManager.h +++ b/include/clang/Basic/SourceManager.h @@ -154,13 +154,21 @@ class SourceManager { /// MacroIDs - Information about each MacroID. std::vector<SrcMgr::MacroIDInfo> MacroIDs; + /// LastLineNo - These ivars serve as a cache used in the getLineNumber + /// method which is used to speedup getLineNumber calls to nearby locations. + unsigned LastLineNoFileIDQuery; + SrcMgr::FileInfo *LastLineNoFileInfo; + unsigned LastLineNoFilePos; + unsigned LastLineNoResult; public: - SourceManager() {} + SourceManager() : LastLineNoFileIDQuery(~0U) {} ~SourceManager(); void clearIDTables() { FileIDs.clear(); MacroIDs.clear(); + LastLineNoFileIDQuery = ~0U; + LastLineNoFileInfo = 0; } /// createFileID - Create a new FileID that represents the specified file |