diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
commit | 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e (patch) | |
tree | 8b6bfcb8783d16827f896d5facbd4549300e8a1e /include/clang/Basic/SourceManager.h | |
parent | a5f182095bf2065ca94f1c86957ee91f9068964b (diff) |
Stage two of getting CFE top correct.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39734 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/SourceManager.h')
-rw-r--r-- | include/clang/Basic/SourceManager.h | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h new file mode 100644 index 0000000000..ef0ac0ba98 --- /dev/null +++ b/include/clang/Basic/SourceManager.h @@ -0,0 +1,341 @@ +//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SourceManager interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SOURCEMANAGER_H +#define LLVM_CLANG_SOURCEMANAGER_H + +#include "clang/Basic/SourceLocation.h" +#include <vector> +#include <map> +#include <list> + +namespace llvm { +class MemoryBuffer; +} + +namespace clang { + +class SourceManager; +class FileEntry; +class IdentifierTokenInfo; + +/// SrcMgr - Private classes that are part of the SourceManager implementation. +/// +namespace SrcMgr { + /// FileInfo - Once instance of this struct is kept for every file loaded or + /// used. This object owns the MemoryBuffer object. + struct FileInfo { + /// Buffer - The actual buffer containing the characters from the input + /// file. + const llvm::MemoryBuffer *Buffer; + + /// SourceLineCache - A new[]'d array of offsets for each source line. This + /// is lazily computed. + /// + unsigned *SourceLineCache; + + /// NumLines - The number of lines in this FileInfo. This is only valid if + /// SourceLineCache is non-null. + unsigned NumLines; + }; + + typedef std::pair<const FileEntry * const, FileInfo> InfoRec; + + /// FileIDInfo - Information about a FileID, basically just the logical file + /// that it represents and include stack information. A SourceLocation is a + /// byte offset from the start of this. + /// + /// FileID's are used to compute the location of a character in memory as well + /// as the logical source location, which can be differ from the physical + /// location. It is different when #line's are active or when macros have + /// been expanded. + /// + /// Each FileID has include stack information, indicating where it came from. + /// For the primary translation unit, it comes from SourceLocation() aka 0. + /// + /// There are three types of FileID's: + /// 1. Normal MemoryBuffer (file). These are represented by a "InfoRec *", + /// describing the source file, and a Chunk number, which factors into + /// the SourceLocation's offset from the start of the buffer. + /// 2. Macro Expansions. These indicate that the logical location is + /// totally different than the physical location. The logical source + /// location is specified by the IncludeLoc. The physical location is + /// the FilePos of the token's SourceLocation combined with the FileID + /// from MacroTokenFileID. + /// + struct FileIDInfo { + enum FileIDType { + NormalBuffer, + MacroExpansion + }; + + /// The type of this FileID. + FileIDType IDType; + + /// IncludeLoc - The location of the #include that brought in this file. + /// This SourceLocation object has a FileId of 0 for the main file. + SourceLocation IncludeLoc; + + /// This union is discriminated by IDType. + /// + union { + struct NormalBufferInfo { + /// ChunkNo - Really large buffers are broken up into chunks that are + /// each (1 << SourceLocation::FilePosBits) in size. This specifies the + /// chunk number of this FileID. + unsigned ChunkNo; + + /// FileInfo - Information about the source buffer itself. + /// + const InfoRec *Info; + } NormalBuffer; + + /// MacroTokenFileID - This is the File ID that contains the characters + /// that make up the expanded token. + unsigned MacroTokenFileID; + } u; + + /// getNormalBuffer - Return a FileIDInfo object for a normal buffer + /// reference. + static FileIDInfo getNormalBuffer(SourceLocation IL, unsigned CN, + const InfoRec *Inf) { + FileIDInfo X; + X.IDType = NormalBuffer; + X.IncludeLoc = IL; + X.u.NormalBuffer.ChunkNo = CN; + X.u.NormalBuffer.Info = Inf; + return X; + } + + /// getMacroExpansion - Return a FileID for a macro expansion. IL specifies + /// the instantiation location, and MacroFID specifies the FileID that the + /// token's characters come from. + static FileIDInfo getMacroExpansion(SourceLocation IL, + unsigned MacroFID) { + FileIDInfo X; + X.IDType = MacroExpansion; + X.IncludeLoc = IL; + X.u.MacroTokenFileID = MacroFID; + return X; + } + + unsigned getNormalBufferChunkNo() const { + assert(IDType == NormalBuffer && "Not a normal buffer!"); + return u.NormalBuffer.ChunkNo; + } + + const InfoRec *getNormalBufferInfo() const { + assert(IDType == NormalBuffer && "Not a normal buffer!"); + return u.NormalBuffer.Info; + } + }; +} // end SrcMgr namespace. + + +/// SourceManager - This file handles loading and caching of source files into +/// memory. This object owns the MemoryBuffer objects for all of the loaded +/// files and assigns unique FileID's for each unique #include chain. +/// +/// The SourceManager can be queried for information about SourceLocation +/// objects, turning them into either physical or logical locations. Physical +/// locations represent where the bytes corresponding to a token came from and +/// logical locations represent where the location is in the user's view. In +/// the case of a macro expansion, for example, the physical location indicates +/// where the expanded token came from and the logical location specifies where +/// it was expanded. Logical locations are also influenced by #line directives, +/// etc. +class SourceManager { + /// FileInfos - Memoized information about all of the files tracked by this + /// SourceManager. + std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos; + + /// MemBufferInfos - Information about various memory buffers that we have + /// read in. This is a list, instead of a vector, because we need pointers to + /// the FileInfo objects to be stable. + std::list<SrcMgr::InfoRec> MemBufferInfos; + + /// FileIDs - Information about each FileID. FileID #0 is not valid, so all + /// entries are off by one. + std::vector<SrcMgr::FileIDInfo> FileIDs; + + /// LastInstantiationLoc_* - Cache the last instantiation request for fast + /// lookup. Macros often want many tokens instantated at the same location. + SourceLocation LastInstantiationLoc_InstantLoc; + unsigned LastInstantiationLoc_MacroFID; + unsigned LastInstantiationLoc_Result; +public: + SourceManager() { LastInstantiationLoc_MacroFID = ~0U; } + ~SourceManager(); + + /// createFileID - Create a new FileID that represents the specified file + /// being #included from the specified IncludePosition. This returns 0 on + /// error and translates NULL into standard input. + unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ + const SrcMgr::InfoRec *IR = getInfoRec(SourceFile); + if (IR == 0) return 0; // Error opening file? + return createFileID(IR, IncludePos); + } + + /// createFileIDForMemBuffer - Create a new FileID that represents the + /// specified memory buffer. This does no caching of the buffer and takes + /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. + unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { + return createFileID(createMemBufferInfoRec(Buffer), SourceLocation()); + } + + /// getInstantiationLoc - Return a new SourceLocation that encodes the fact + /// that a token from physloc PhysLoc should actually be referenced from + /// InstantiationLoc. + SourceLocation getInstantiationLoc(SourceLocation PhysLoc, + SourceLocation InstantiationLoc); + + /// getBuffer - Return the buffer for the specified FileID. + /// + const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { + return getFileInfo(FileID)->Buffer; + } + + /// getIncludeLoc - Return the location of the #include for the specified + /// FileID. + SourceLocation getIncludeLoc(unsigned FileID) const; + + /// getFilePos - This (efficient) method returns the offset from the start of + /// the file that the specified SourceLocation represents. This returns the + /// location of the physical character data, not the logical file position. + unsigned getFilePos(SourceLocation Loc) const { + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + + // For Macros, the physical loc is specified by the MacroTokenFileID. + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; + + // If this file has been split up into chunks, factor in the chunk number + // that the FileID references. + unsigned ChunkNo = FIDInfo->getNormalBufferChunkNo(); + return Loc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits); + } + + /// getCharacterData - Return a pointer to the start of the specified location + /// in the appropriate MemoryBuffer. + const char *getCharacterData(SourceLocation SL) const; + + /// getColumnNumber - Return the column # for the specified include position. + /// this is significantly cheaper to compute than the line number. This + /// returns zero if the column number isn't known. + unsigned getColumnNumber(SourceLocation Loc) const; + + /// getLineNumber - Given a SourceLocation, return the physical line number + /// for the position indicated. This requires building and caching a table of + /// line offsets for the MemoryBuffer, so this is not cheap: use only when + /// about to emit a diagnostic. + unsigned getLineNumber(SourceLocation Loc); + + /// getSourceFilePos - This method returns the *logical* offset from the start + /// of the file that the specified SourceLocation represents. This returns + /// the location of the *logical* character data, not the physical file + /// position. In the case of macros, for example, this returns where the + /// macro was instantiated, not where the characters for the macro can be + /// found. + unsigned getSourceFilePos(SourceLocation Loc) const; + + /// getSourceName - This method returns the name of the file or buffer that + /// the SourceLocation specifies. This can be modified with #line directives, + /// etc. + std::string getSourceName(SourceLocation Loc); + + /// getFileEntryForFileID - Return the FileEntry record for the specified + /// FileID if one exists. + const FileEntry *getFileEntryForFileID(unsigned FileID) const { + assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); + return FileIDs[FileID-1].getNormalBufferInfo()->first; + } + + /// Given a SourceLocation object, return the logical location referenced by + /// the ID. This logical location is subject to #line directives, etc. + SourceLocation getLogicalLoc(SourceLocation Loc) const { + if (Loc.getFileID() == 0) return Loc; + + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + return FIDInfo->IncludeLoc; + return Loc; + } + + /// getPhysicalLoc - Given a SourceLocation object, return the physical + /// location referenced by the ID. + SourceLocation getPhysicalLoc(SourceLocation Loc) const { + if (Loc.getFileID() == 0) return Loc; + + // For Macros, the physical loc is specified by the MacroTokenFileID. + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + return SourceLocation(FIDInfo->u.MacroTokenFileID, + Loc.getRawFilePos()); + return Loc; + } + + /// PrintStats - Print statistics to stderr. + /// + void PrintStats() const; +private: + /// createFileID - Create a new fileID for the specified InfoRec and include + /// position. This works regardless of whether the InfoRec corresponds to a + /// file or some other input source. + unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos); + + /// getInfoRec - Create or return a cached FileInfo for the specified file. + /// This returns null on failure. + const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile); + + /// createMemBufferInfoRec - Create a new info record for the specified memory + /// buffer. This does no caching. + const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf); + + const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const { + assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); + return &FileIDs[FileID-1]; + } + + /// Return the InfoRec structure for the specified FileID. This is always the + /// physical reference for the ID. + const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const { + // For Macros, the physical loc is specified by the MacroTokenFileID. + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; + return FIDInfo->getNormalBufferInfo(); + } + const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const { + return getInfoRec(getFIDInfo(FileID)); + } + + SrcMgr::FileInfo *getFileInfo(const SrcMgr::FileIDInfo *FIDInfo) const { + if (const SrcMgr::InfoRec *IR = getInfoRec(FIDInfo)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } + SrcMgr::FileInfo *getFileInfo(unsigned FileID) const { + if (const SrcMgr::InfoRec *IR = getInfoRec(FileID)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } + SrcMgr::FileInfo *getFileInfo(const FileEntry *SourceFile) { + if (const SrcMgr::InfoRec *IR = getInfoRec(SourceFile)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } +}; + + +} // end namespace clang + +#endif |