diff options
Diffstat (limited to 'lib/Lex')
-rw-r--r-- | lib/Lex/HeaderMap.cpp | 242 | ||||
-rw-r--r-- | lib/Lex/HeaderSearch.cpp | 425 | ||||
-rw-r--r-- | lib/Lex/Lexer.cpp | 1661 | ||||
-rw-r--r-- | lib/Lex/LiteralSupport.cpp | 691 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.cpp | 225 | ||||
-rw-r--r-- | lib/Lex/MacroArgs.h | 109 | ||||
-rw-r--r-- | lib/Lex/MacroInfo.cpp | 70 | ||||
-rw-r--r-- | lib/Lex/Makefile | 28 | ||||
-rw-r--r-- | lib/Lex/PPDirectives.cpp | 1153 | ||||
-rw-r--r-- | lib/Lex/PPExpressions.cpp | 639 | ||||
-rw-r--r-- | lib/Lex/PPLexerChange.cpp | 401 | ||||
-rw-r--r-- | lib/Lex/PPMacroExpansion.cpp | 523 | ||||
-rw-r--r-- | lib/Lex/Pragma.cpp | 386 | ||||
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 560 | ||||
-rw-r--r-- | lib/Lex/ScratchBuffer.cpp | 72 | ||||
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 488 |
16 files changed, 7673 insertions, 0 deletions
diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp new file mode 100644 index 0000000000..282e742b4c --- /dev/null +++ b/lib/Lex/HeaderMap.cpp @@ -0,0 +1,242 @@ +//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HeaderMap interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderMap.h" +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Data Structures and Manifest Constants +//===----------------------------------------------------------------------===// + +enum { + HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', + HMAP_HeaderVersion = 1, + + HMAP_EmptyBucketKey = 0 +}; + +namespace clang { +struct HMapBucket { + uint32_t Key; // Offset (into strings) of key. + + uint32_t Prefix; // Offset (into strings) of value prefix. + uint32_t Suffix; // Offset (into strings) of value suffix. +}; + +struct HMapHeader { + uint32_t Magic; // Magic word, also indicates byte order. + uint16_t Version; // Version number -- currently 1. + uint16_t Reserved; // Reserved for future use - zero for now. + uint32_t StringsOffset; // Offset to start of string pool. + uint32_t NumEntries; // Number of entries in the string table. + uint32_t NumBuckets; // Number of buckets (always a power of 2). + uint32_t MaxValueLength; // Length of longest result path (excluding nul). + // An array of 'NumBuckets' HMapBucket objects follows this header. + // Strings follow the buckets, at StringsOffset. +}; +} // end namespace clang. + +/// HashHMapKey - This is the 'well known' hash function required by the file +/// format, used to look up keys in the hash table. The hash table uses simple +/// linear probing based on this function. +static inline unsigned HashHMapKey(const char *S, const char *End) { + unsigned Result = 0; + + for (; S != End; S++) + Result += tolower(*S) * 13; + return Result; +} + + + +//===----------------------------------------------------------------------===// +// Verification and Construction +//===----------------------------------------------------------------------===// + +/// HeaderMap::Create - This attempts to load the specified file as a header +/// map. If it doesn't look like a HeaderMap, it gives up and returns null. +/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason +/// into the string error argument and returns null. +const HeaderMap *HeaderMap::Create(const FileEntry *FE) { + // If the file is too small to be a header map, ignore it. + unsigned FileSize = FE->getSize(); + if (FileSize <= sizeof(HMapHeader)) return 0; + + llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( + llvm::MemoryBuffer::getFile(FE->getName(), strlen(FE->getName()), 0, + FE->getSize())); + if (FileBuffer == 0) return 0; // Unreadable file? + const char *FileStart = FileBuffer->getBufferStart(); + + // We know the file is at least as big as the header, check it now. + const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart); + + // Sniff it to see if it's a headermap by checking the magic number and + // version. + bool NeedsByteSwap; + if (Header->Magic == HMAP_HeaderMagicNumber && + Header->Version == HMAP_HeaderVersion) + NeedsByteSwap = false; + else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) && + Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) + NeedsByteSwap = true; // Mixed endianness headermap. + else + return 0; // Not a header map. + + if (Header->Reserved != 0) return 0; + + // Okay, everything looks good, create the header map. + return new HeaderMap(FileBuffer.take(), NeedsByteSwap); +} + +HeaderMap::~HeaderMap() { + delete FileBuffer; +} + +//===----------------------------------------------------------------------===// +// Utility Methods +//===----------------------------------------------------------------------===// + + +/// getFileName - Return the filename of the headermap. +const char *HeaderMap::getFileName() const { + return FileBuffer->getBufferIdentifier(); +} + +unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const { + if (!NeedsBSwap) return X; + return llvm::ByteSwap_32(X); +} + +/// getHeader - Return a reference to the file header, in unbyte-swapped form. +/// This method cannot fail. +const HMapHeader &HeaderMap::getHeader() const { + // We know the file is at least as big as the header. Return it. + return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart()); +} + +/// getBucket - Return the specified hash table bucket from the header map, +/// bswap'ing its fields as appropriate. If the bucket number is not valid, +/// this return a bucket with an empty key (0). +HMapBucket HeaderMap::getBucket(unsigned BucketNo) const { + HMapBucket Result; + Result.Key = HMAP_EmptyBucketKey; + + const HMapBucket *BucketArray = + reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() + + sizeof(HMapHeader)); + + const HMapBucket *BucketPtr = BucketArray+BucketNo; + if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) + return Result; // Invalid buffer, corrupt hmap. + + // Otherwise, the bucket is valid. Load the values, bswapping as needed. + Result.Key = getEndianAdjustedWord(BucketPtr->Key); + Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix); + Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix); + return Result; +} + +/// getString - Look up the specified string in the string table. If the string +/// index is not valid, it returns an empty string. +const char *HeaderMap::getString(unsigned StrTabIdx) const { + // Add the start of the string table to the idx. + StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); + + // Check for invalid index. + if (StrTabIdx >= FileBuffer->getBufferSize()) + return 0; + + // Otherwise, we have a valid pointer into the file. Just return it. We know + // that the "string" can not overrun the end of the file, because the buffer + // is nul terminated by virtue of being a MemoryBuffer. + return FileBuffer->getBufferStart()+StrTabIdx; +} + +/// StringsEqualWithoutCase - Compare the specified two strings for case- +/// insensitive equality, returning true if they are equal. Both strings are +/// known to have the same length. +static bool StringsEqualWithoutCase(const char *S1, const char *S2, + unsigned Len) { + for (; Len; ++S1, ++S2, --Len) + if (tolower(*S1) != tolower(*S2)) + return false; + return true; +} + +//===----------------------------------------------------------------------===// +// The Main Drivers +//===----------------------------------------------------------------------===// + +/// dump - Print the contents of this headermap to stderr. +void HeaderMap::dump() const { + const HMapHeader &Hdr = getHeader(); + unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + + fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n", + getFileName(), NumBuckets, + getEndianAdjustedWord(Hdr.NumEntries)); + + for (unsigned i = 0; i != NumBuckets; ++i) { + HMapBucket B = getBucket(i); + if (B.Key == HMAP_EmptyBucketKey) continue; + + const char *Key = getString(B.Key); + const char *Prefix = getString(B.Prefix); + const char *Suffix = getString(B.Suffix); + fprintf(stderr, " %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix); + } +} + +/// LookupFile - Check to see if the specified relative filename is located in +/// this HeaderMap. If so, open it and return its FileEntry. +const FileEntry *HeaderMap::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + FileManager &FM) const { + const HMapHeader &Hdr = getHeader(); + unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); + + // If the number of buckets is not a power of two, the headermap is corrupt. + // Don't probe infinitely. + if (NumBuckets & (NumBuckets-1)) + return 0; + + // Linearly probe the hash table. + for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) { + HMapBucket B = getBucket(Bucket & (NumBuckets-1)); + if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss. + + // See if the key matches. If not, probe on. + const char *Key = getString(B.Key); + unsigned BucketKeyLen = strlen(Key); + if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart)) + continue; + + // See if the actual strings equal. + if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen)) + continue; + + // If so, we have a match in the hash table. Construct the destination + // path. + llvm::SmallString<1024> DestPath; + DestPath += getString(B.Prefix); + DestPath += getString(B.Suffix); + return FM.getFile(DestPath.begin(), DestPath.end()); + } +} diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp new file mode 100644 index 0000000000..44ae35c8b7 --- /dev/null +++ b/lib/Lex/HeaderSearch.cpp @@ -0,0 +1,425 @@ +//===--- HeaderSearch.cpp - Resolve Header File Locations ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DirectoryLookup and HeaderSearch interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderMap.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "llvm/System/Path.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) { + SystemDirIdx = 0; + NoCurDirSearch = false; + + NumIncluded = 0; + NumMultiIncludeFileOptzn = 0; + NumFrameworkLookups = NumSubFrameworkLookups = 0; +} + +HeaderSearch::~HeaderSearch() { + // Delete headermaps. + for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) + delete HeaderMaps[i].second; +} + +void HeaderSearch::PrintStats() { + fprintf(stderr, "\n*** HeaderSearch Stats:\n"); + fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size()); + unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0; + for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) { + NumOnceOnlyFiles += FileInfo[i].isImport; + if (MaxNumIncludes < FileInfo[i].NumIncludes) + MaxNumIncludes = FileInfo[i].NumIncludes; + NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1; + } + fprintf(stderr, " %d #import/#pragma once files.\n", NumOnceOnlyFiles); + fprintf(stderr, " %d included exactly once.\n", NumSingleIncludedFiles); + fprintf(stderr, " %d max times a file is included.\n", MaxNumIncludes); + + fprintf(stderr, " %d #include/#include_next/#import.\n", NumIncluded); + fprintf(stderr, " %d #includes skipped due to" + " the multi-include optimization.\n", NumMultiIncludeFileOptzn); + + fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups); + fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups); +} + +/// CreateHeaderMap - This method returns a HeaderMap for the specified +/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure. +const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { + // We expect the number of headermaps to be small, and almost always empty. + // If it ever grows, use of a linear search should be re-evaluated. + if (!HeaderMaps.empty()) { + for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i) + // Pointer equality comparison of FileEntries works because they are + // already uniqued by inode. + if (HeaderMaps[i].first == FE) + return HeaderMaps[i].second; + } + + if (const HeaderMap *HM = HeaderMap::Create(FE)) { + HeaderMaps.push_back(std::make_pair(FE, HM)); + return HM; + } + + return 0; +} + +//===----------------------------------------------------------------------===// +// File lookup within a DirectoryLookup scope +//===----------------------------------------------------------------------===// + +/// getName - Return the directory or filename corresponding to this lookup +/// object. +const char *DirectoryLookup::getName() const { + if (isNormalDir()) + return getDir()->getName(); + if (isFramework()) + return getFrameworkDir()->getName(); + assert(isHeaderMap() && "Unknown DirectoryLookup"); + return getHeaderMap()->getFileName(); +} + + +/// LookupFile - Lookup the specified file in this search path, returning it +/// if it exists or returning null if not. +const FileEntry *DirectoryLookup::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + HeaderSearch &HS) const { + llvm::SmallString<1024> TmpDir; + if (isNormalDir()) { + // Concatenate the requested file onto the directory. + // FIXME: Portability. Filename concatenation should be in sys::Path. + TmpDir += getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end()); + } + + if (isFramework()) + return DoFrameworkLookup(FilenameStart, FilenameEnd, HS); + + assert(isHeaderMap() && "Unknown directory lookup"); + return getHeaderMap()->LookupFile(FilenameStart, FilenameEnd,HS.getFileMgr()); +} + + +/// DoFrameworkLookup - Do a lookup of the specified file in the current +/// DirectoryLookup, which is a framework directory. +const FileEntry *DirectoryLookup::DoFrameworkLookup(const char *FilenameStart, + const char *FilenameEnd, + HeaderSearch &HS) const { + FileManager &FileMgr = HS.getFileMgr(); + + // Framework names must have a '/' in the filename. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + // Find out if this is the home for the specified framework, by checking + // HeaderSearch. Possible answer are yes/no and unknown. + const DirectoryEntry *&FrameworkDirCache = + HS.LookupFrameworkCache(FilenameStart, SlashPos); + + // If it is known and in some other directory, fail. + if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir()) + return 0; + + // Otherwise, construct the path to this framework dir. + + // FrameworkName = "/System/Library/Frameworks/" + llvm::SmallString<1024> FrameworkName; + FrameworkName += getFrameworkDir()->getName(); + if (FrameworkName.empty() || FrameworkName.back() != '/') + FrameworkName.push_back('/'); + + // FrameworkName = "/System/Library/Frameworks/Cocoa" + FrameworkName.append(FilenameStart, SlashPos); + + // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" + FrameworkName += ".framework/"; + + // If the cache entry is still unresolved, query to see if the cache entry is + // still unresolved. If so, check its existence now. + if (FrameworkDirCache == 0) { + HS.IncrementFrameworkLookupCount(); + + // If the framework dir doesn't exist, we fail. + // FIXME: It's probably more efficient to query this with FileMgr.getDir. + if (!llvm::sys::Path(std::string(FrameworkName.begin(), + FrameworkName.end())).exists()) + return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + FrameworkDirCache = getFrameworkDir(); + } + + // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" + unsigned OrigSize = FrameworkName.size(); + + FrameworkName += "Headers/"; + FrameworkName.append(SlashPos+1, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(), + FrameworkName.end())) { + return FE; + } + + // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" + const char *Private = "Private"; + FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, + Private+strlen(Private)); + return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end()); +} + + +//===----------------------------------------------------------------------===// +// Header File Location. +//===----------------------------------------------------------------------===// + + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). CurFileEnt, if +/// non-null, indicates where the #including file is, in case a relative search +/// is needed. +const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir, + const FileEntry *CurFileEnt) { + // If 'Filename' is absolute, check to see if it exists and no searching. + // FIXME: Portability. This should be a sys::Path interface, this doesn't + // handle things like C:\foo.txt right, nor win32 \\network\device\blah. + if (FilenameStart[0] == '/') { + CurDir = 0; + + // If this was an #include_next "/absolute/file", fail. + if (FromDir) return 0; + + // Otherwise, just return the file. + return FileMgr.getFile(FilenameStart, FilenameEnd); + } + + // Step #0, unless disabled, check to see if the file is in the #includer's + // directory. This has to be based on CurFileEnt, not CurDir, because + // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and + // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h". + // This search is not done for <> headers. + if (CurFileEnt && !isAngled && !NoCurDirSearch) { + llvm::SmallString<1024> TmpDir; + // Concatenate the requested file onto the directory. + // FIXME: Portability. Filename concatenation should be in sys::Path. + TmpDir += CurFileEnt->getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end())) { + // Leave CurDir unset. + // This file is a system header or C++ unfriendly if the old file is. + // + // Note that the temporary 'DirInfo' is required here, as either call to + // getFileInfo could resize the vector and we don't want to rely on order + // of evaluation. + unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo; + getFileInfo(FE).DirInfo = DirInfo; + return FE; + } + } + + CurDir = 0; + + // If this is a system #include, ignore the user #include locs. + unsigned i = isAngled ? SystemDirIdx : 0; + + // If this is a #include_next request, start searching after the directory the + // file was found in. + if (FromDir) + i = FromDir-&SearchDirs[0]; + + // Cache all of the lookups performed by this method. Many headers are + // multiply included, and the "pragma once" optimization prevents them from + // being relex/pp'd, but they would still have to search through a + // (potentially huge) series of SearchDirs to find it. + std::pair<unsigned, unsigned> &CacheLookup = + LookupFileCache.GetOrCreateValue(FilenameStart, FilenameEnd).getValue(); + + // If the entry has been previously looked up, the first value will be + // non-zero. If the value is equal to i (the start point of our search), then + // this is a matching hit. + if (CacheLookup.first == i+1) { + // Skip querying potentially lots of directories for this lookup. + i = CacheLookup.second; + } else { + // Otherwise, this is the first query, or the previous query didn't match + // our search start. We will fill in our found location below, so prime the + // start point value. + CacheLookup.first = i+1; + } + + // Check each directory in sequence to see if it contains this file. + for (; i != SearchDirs.size(); ++i) { + const FileEntry *FE = + SearchDirs[i].LookupFile(FilenameStart, FilenameEnd, *this); + if (!FE) continue; + + CurDir = &SearchDirs[i]; + + // This file is a system header or C++ unfriendly if the dir is. + getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic(); + + // Remember this location for the next lookup we do. + CacheLookup.second = i; + return FE; + } + + // Otherwise, didn't find it. Remember we didn't find this. + CacheLookup.second = SearchDirs.size(); + return 0; +} + +/// LookupSubframeworkHeader - Look up a subframework for the specified +/// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from +/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox +/// is a subframework within Carbon.framework. If so, return the FileEntry +/// for the designated file, otherwise return null. +const FileEntry *HeaderSearch:: +LookupSubframeworkHeader(const char *FilenameStart, + const char *FilenameEnd, + const FileEntry *ContextFileEnt) { + assert(ContextFileEnt && "No context file?"); + + // Framework names must have a '/' in the filename. Find it. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + // Look up the base framework name of the ContextFileEnt. + const char *ContextName = ContextFileEnt->getName(); + + // If the context info wasn't a framework, couldn't be a subframework. + const char *FrameworkPos = strstr(ContextName, ".framework/"); + if (FrameworkPos == 0) + return 0; + + llvm::SmallString<1024> FrameworkName(ContextName, + FrameworkPos+strlen(".framework/")); + + // Append Frameworks/HIToolbox.framework/ + FrameworkName += "Frameworks/"; + FrameworkName.append(FilenameStart, SlashPos); + FrameworkName += ".framework/"; + + llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = + FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos); + + // Some other location? + if (CacheLookup.getValue() && + CacheLookup.getKeyLength() == FrameworkName.size() && + memcmp(CacheLookup.getKeyData(), &FrameworkName[0], + CacheLookup.getKeyLength()) != 0) + return 0; + + // Cache subframework. + if (CacheLookup.getValue() == 0) { + ++NumSubFrameworkLookups; + + // If the framework dir doesn't exist, we fail. + const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(), + FrameworkName.end()); + if (Dir == 0) return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + CacheLookup.setValue(Dir); + } + + const FileEntry *FE = 0; + + // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" + llvm::SmallString<1024> HeadersFilename(FrameworkName); + HeadersFilename += "Headers/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), + HeadersFilename.end()))) { + + // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" + HeadersFilename = FrameworkName; + HeadersFilename += "PrivateHeaders/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end()))) + return 0; + } + + // This file is a system header or C++ unfriendly if the old file is. + // + // Note that the temporary 'DirInfo' is required here, as either call to + // getFileInfo could resize the vector and we don't want to rely on order + // of evaluation. + unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo; + getFileInfo(FE).DirInfo = DirInfo; + return FE; +} + +//===----------------------------------------------------------------------===// +// File Info Management. +//===----------------------------------------------------------------------===// + + +/// getFileInfo - Return the PerFileInfo structure for the specified +/// FileEntry. +HeaderSearch::PerFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { + if (FE->getUID() >= FileInfo.size()) + FileInfo.resize(FE->getUID()+1); + return FileInfo[FE->getUID()]; +} + +/// ShouldEnterIncludeFile - Mark the specified file as a target of of a +/// #include, #include_next, or #import directive. Return false if #including +/// the file will have no effect or true if we should include it. +bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ + ++NumIncluded; // Count # of attempted #includes. + + // Get information about this file. + PerFileInfo &FileInfo = getFileInfo(File); + + // If this is a #import directive, check that we have not already imported + // this header. + if (isImport) { + // If this has already been imported, don't import it again. + FileInfo.isImport = true; + + // Has this already been #import'ed or #include'd? + if (FileInfo.NumIncludes) return false; + } else { + // Otherwise, if this is a #include of a file that was previously #import'd + // or if this is the second #include of a #pragma once file, ignore it. + if (FileInfo.isImport) + return false; + } + + // Next, check to see if the file is wrapped with #ifndef guards. If so, and + // if the macro that guards it is defined, we know the #include has no effect. + if (FileInfo.ControllingMacro && + FileInfo.ControllingMacro->hasMacroDefinition()) { + ++NumMultiIncludeFileOptzn; + return false; + } + + // Increment the number of times this file has been included. + ++FileInfo.NumIncludes; + + return true; +} + + diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp new file mode 100644 index 0000000000..98bbb38630 --- /dev/null +++ b/lib/Lex/Lexer.cpp @@ -0,0 +1,1661 @@ +//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Lexer and Token interfaces. +// +//===----------------------------------------------------------------------===// +// +// TODO: GCC Diagnostics emitted by the lexer: +// PEDWARN: (form feed|vertical tab) in preprocessing directive +// +// Universal characters, unicode, char mapping: +// WARNING: `%.*s' is not in NFKC +// WARNING: `%.*s' is not in NFC +// +// Other: +// TODO: Options to support: +// -fexec-charset,-fwide-exec-charset +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cctype> +using namespace clang; + +static void InitCharacterInfo(); + +//===----------------------------------------------------------------------===// +// Token Class Implementation +//===----------------------------------------------------------------------===// + +/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier. +bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const { + return is(tok::identifier) && + getIdentifierInfo()->getObjCKeywordID() == objcKey; +} + +/// getObjCKeywordID - Return the ObjC keyword kind. +tok::ObjCKeywordKind Token::getObjCKeywordID() const { + IdentifierInfo *specId = getIdentifierInfo(); + return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; +} + +/// isNamedIdentifier - Return true if this token is a ppidentifier with the +/// specified name. For example, tok.isNamedIdentifier("this"). +bool Token::isNamedIdentifier(const char *Name) const { + return IdentInfo && !strcmp(IdentInfo->getName(), Name); +} + + +//===----------------------------------------------------------------------===// +// Lexer Class Implementation +//===----------------------------------------------------------------------===// + + +/// Lexer constructor - Create a new lexer object for the specified buffer +/// with the specified preprocessor managing the lexing process. This lexer +/// assumes that the associated file buffer and Preprocessor objects will +/// outlive it, so it doesn't take ownership of either of them. +Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp, + const char *BufStart, const char *BufEnd) + : FileLoc(fileloc), PP(&pp), Features(pp.getLangOptions()) { + + SourceManager &SourceMgr = PP->getSourceManager(); + unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID(); + const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID); + + Is_PragmaLexer = false; + InitCharacterInfo(); + + // BufferStart must always be InputFile->getBufferStart(). + BufferStart = InputFile->getBufferStart(); + + // BufferPtr and BufferEnd can start out somewhere inside the current buffer. + // If unspecified, they starts at the start/end of the buffer. + BufferPtr = BufStart ? BufStart : BufferStart; + BufferEnd = BufEnd ? BufEnd : InputFile->getBufferEnd(); + + assert(BufferEnd[0] == 0 && + "We assume that the input buffer has a null character at the end" + " to simplify lexing!"); + + // Start of the file is a start of line. + IsAtStartOfLine = true; + + // We are not after parsing a #. + ParsingPreprocessorDirective = false; + + // We are not after parsing #include. + ParsingFilename = false; + + // We are not in raw mode. Raw mode disables diagnostics and interpretation + // of tokens (e.g. identifiers, thus disabling macro expansion). It is used + // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block + // or otherwise skipping over tokens. + LexingRawMode = false; + + // Default to keeping comments if requested. + KeepCommentMode = PP->getCommentRetentionState(); +} + +/// Lexer constructor - Create a new raw lexer object. This object is only +/// suitable for calls to 'LexRawToken'. This lexer assumes that the +/// associated file buffer will outlive it, so it doesn't take ownership of +/// either of them. +Lexer::Lexer(SourceLocation fileloc, const LangOptions &features, + const char *BufStart, const char *BufEnd) + : FileLoc(fileloc), PP(0), Features(features) { + Is_PragmaLexer = false; + InitCharacterInfo(); + + BufferStart = BufStart; + BufferPtr = BufStart; + BufferEnd = BufEnd; + + assert(BufferEnd[0] == 0 && + "We assume that the input buffer has a null character at the end" + " to simplify lexing!"); + + // Start of the file is a start of line. + IsAtStartOfLine = true; + + // We are not after parsing a #. + ParsingPreprocessorDirective = false; + + // We are not after parsing #include. + ParsingFilename = false; + + // We *are* in raw mode. + LexingRawMode = true; + + // Never keep comments in raw mode. + KeepCommentMode = false; +} + + +/// Stringify - Convert the specified string into a C string, with surrounding +/// ""'s, and with escaped \ and " characters. +std::string Lexer::Stringify(const std::string &Str, bool Charify) { + std::string Result = Str; + char Quote = Charify ? '\'' : '"'; + for (unsigned i = 0, e = Result.size(); i != e; ++i) { + if (Result[i] == '\\' || Result[i] == Quote) { + Result.insert(Result.begin()+i, '\\'); + ++i; ++e; + } + } + return Result; +} + +/// Stringify - Convert the specified string into a C string by escaping '\' +/// and " characters. This does not add surrounding ""'s to the string. +void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] == '\\' || Str[i] == '"') { + Str.insert(Str.begin()+i, '\\'); + ++i; ++e; + } + } +} + + +/// MeasureTokenLength - Relex the token at the specified location and return +/// its length in bytes in the input file. If the token needs cleaning (e.g. +/// includes a trigraph or an escaped newline) then this count includes bytes +/// that are part of that. +unsigned Lexer::MeasureTokenLength(SourceLocation Loc, + const SourceManager &SM) { + // If this comes from a macro expansion, we really do want the macro name, not + // the token this macro expanded to. + Loc = SM.getLogicalLoc(Loc); + + const char *StrData = SM.getCharacterData(Loc); + + // TODO: thi |