diff options
author | Chris Lattner <sabre@nondot.org> | 2007-07-24 05:57:19 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2007-07-24 05:57:19 +0000 |
commit | 5e36a7a89da5d69ece23fc8228624a053f75c645 (patch) | |
tree | 094c1ab2abd4c1ddf3974d2cfca2e8d3fc7d473c /Basic/SourceManager.cpp | |
parent | b638a309b61ccb80cd4c437b578d25d92f948fbf (diff) |
Add a cache to SourceManager to accellerate line # lookup. This is a
bottleneck for -E computation, because every token that starts a line needs
to determine *which* line it is on (so -E mode can insert the appropriate
vertical whitespace). This optimization improves this common case where
it is striding through the line # table.
This speeds up -E on xalancbmk by 3.2%
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@40459 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'Basic/SourceManager.cpp')
-rw-r--r-- | Basic/SourceManager.cpp | 147 |
1 files changed, 96 insertions, 51 deletions
diff --git a/Basic/SourceManager.cpp b/Basic/SourceManager.cpp index be8eeee408..05ead2d786 100644 --- a/Basic/SourceManager.cpp +++ b/Basic/SourceManager.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/FileManager.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/System/Path.h" #include <algorithm> @@ -236,6 +237,50 @@ std::string SourceManager::getSourceName(SourceLocation Loc) { return getFileInfo(FileID)->Buffer->getBufferIdentifier(); } +static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE; +static void ComputeLineNumbers(FileInfo *FI) { + const MemoryBuffer *Buffer = FI->Buffer; + + // Find the file offsets of all of the *physical* source lines. This does + // not look at trigraphs, escaped newlines, or anything else tricky. + std::vector<unsigned> LineOffsets; + + // Line #1 starts at char 0. + LineOffsets.push_back(0); + + const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); + unsigned Offs = 0; + while (1) { + // Skip over the contents of the line. + // TODO: Vectorize this? This is very performance sensitive for programs + // with lots of diagnostics and in -E mode. + const unsigned char *NextBuf = (const unsigned char *)Buf; + while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') + ++NextBuf; + Offs += NextBuf-Buf; + Buf = NextBuf; + + if (Buf[0] == '\n' || Buf[0] == '\r') { + // If this is \n\r or \r\n, skip both characters. + if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) + ++Offs, ++Buf; + ++Offs, ++Buf; + LineOffsets.push_back(Offs); + } else { + // Otherwise, this is a null. If end of file, exit. + if (Buf == End) break; + // Otherwise, skip the null. + ++Offs, ++Buf; + } + } + LineOffsets.push_back(Offs); + + // Copy the offsets into the FileInfo structure. + FI->NumLines = LineOffsets.size(); + FI->SourceLineCache = new unsigned[LineOffsets.size()]; + std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); +} /// getLineNumber - Given a SourceLocation, return the physical line number /// for the position indicated. This requires building and caching a table of @@ -244,66 +289,66 @@ std::string SourceManager::getSourceName(SourceLocation Loc) { unsigned SourceManager::getLineNumber(SourceLocation Loc) { unsigned FileID = Loc.getFileID(); if (FileID == 0) return 0; - FileInfo *FileInfo = getFileInfo(FileID); + FileInfo *FileInfo; + + if (LastLineNoFileIDQuery == FileID) + FileInfo = LastLineNoFileInfo; + else + FileInfo = getFileInfo(FileID); // If this is the first use of line information for this buffer, compute the - /// SourceLineCache for it on demand. - if (FileInfo->SourceLineCache == 0) { - const MemoryBuffer *Buffer = FileInfo->Buffer; - - // Find the file offsets of all of the *physical* source lines. This does - // not look at trigraphs, escaped newlines, or anything else tricky. - std::vector<unsigned> LineOffsets; - - // Line #1 starts at char 0. - LineOffsets.push_back(0); - - const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); - const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); - unsigned Offs = 0; - while (1) { - // Skip over the contents of the line. - // TODO: Vectorize this? This is very performance sensitive for programs - // with lots of diagnostics and in -E mode. - const unsigned char *NextBuf = (const unsigned char *)Buf; - while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') - ++NextBuf; - Offs += NextBuf-Buf; - Buf = NextBuf; - - if (Buf[0] == '\n' || Buf[0] == '\r') { - // If this is \n\r or \r\n, skip both characters. - if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) - ++Offs, ++Buf; - ++Offs, ++Buf; - LineOffsets.push_back(Offs); - } else { - // Otherwise, this is a null. If end of file, exit. - if (Buf == End) break; - // Otherwise, skip the null. - ++Offs, ++Buf; - } - } - LineOffsets.push_back(Offs); - - // Copy the offsets into the FileInfo structure. - FileInfo->NumLines = LineOffsets.size(); - FileInfo->SourceLineCache = new unsigned[LineOffsets.size()]; - std::copy(LineOffsets.begin(), LineOffsets.end(), - FileInfo->SourceLineCache); - } + /// SourceLineCache for it on demand. + if (FileInfo->SourceLineCache == 0) + ComputeLineNumbers(FileInfo); // Okay, we know we have a line number table. Do a binary search to find the // line number that this character position lands on. - unsigned NumLines = FileInfo->NumLines; unsigned *SourceLineCache = FileInfo->SourceLineCache; - + unsigned *SourceLineCacheStart = SourceLineCache; + unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines; + + unsigned QueriedFilePos = getFullFilePos(Loc)+1; + + // If the previous query was to the same file, we know both the file pos from + // that query and the line number returned. This allows us to narrow the + // search space from the entire file to something near the match. + if (LastLineNoFileIDQuery == FileID) { + if (QueriedFilePos >= LastLineNoFilePos) { + SourceLineCache = SourceLineCache+LastLineNoResult-1; + + // The query is likely to be nearby the previous one. Here we check to + // see if it is within 5, 10 or 20 lines. It can be far away in cases + // where big comment blocks and vertical whitespace eat up lines but + // contribute no tokens. + if (SourceLineCache+5 < SourceLineCacheEnd) { + if (SourceLineCache[5] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+5; + else if (SourceLineCache+10 < SourceLineCacheEnd) { + if (SourceLineCache[10] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+10; + else if (SourceLineCache+20 < SourceLineCacheEnd) { + if (SourceLineCache[20] > QueriedFilePos) + SourceLineCacheEnd = SourceLineCache+20; + } + } + } + } else { + SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; + } + } + + unsigned *Pos; // TODO: If this is performance sensitive, we could try doing simple radix // type approaches to make good (tight?) initial guesses based on the // assumption that all lines are the same average size. - unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines, - getFullFilePos(Loc)+1); - return Pos-SourceLineCache; + Pos = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); + unsigned LineNo = Pos-SourceLineCacheStart; + + LastLineNoFileIDQuery = FileID; + LastLineNoFileInfo = FileInfo; + LastLineNoFilePos = QueriedFilePos; + LastLineNoResult = LineNo; + return LineNo; } /// PrintStats - Print statistics to stderr. |