diff options
author | Ted Kremenek <kremenek@apple.com> | 2008-10-21 00:54:44 +0000 |
---|---|---|
committer | Ted Kremenek <kremenek@apple.com> | 2008-10-21 00:54:44 +0000 |
commit | 8588896b4779a617e5d257423ef6178431c8bda3 (patch) | |
tree | 056d04f30b01d6cd820f2603af3523049f7ec0b5 | |
parent | c13b6e251afb9530bbcc8c6f26dc4266f4f0c93b (diff) |
Added the start of a prototype implementation of PCH based on token caching.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@57863 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | Driver/CacheTokens.cpp | 177 | ||||
-rw-r--r-- | Driver/clang.cpp | 61 | ||||
-rw-r--r-- | Driver/clang.h | 2 |
3 files changed, 229 insertions, 11 deletions
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp new file mode 100644 index 0000000000..5aeb1a6da3 --- /dev/null +++ b/Driver/CacheTokens.cpp @@ -0,0 +1,177 @@ +//===--- CacheTokens.cpp - Caching of lexer tokens for PCH support --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This provides a possible implementation of PCH support for Clang that is +// based on caching lexed tokens and identifiers. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; + +typedef llvm::DenseMap<const FileEntry*,uint64_t> PCHMap; +typedef llvm::DenseMap<const IdentifierInfo*,uint64_t> IDMap; + +static void Emit32(llvm::raw_ostream& Out, uint32_t V) { + Out << (unsigned char)(V); + Out << (unsigned char)(V >> 8); + Out << (unsigned char)(V >> 16); + Out << (unsigned char)(V >> 24); +} + +static void Emit8(llvm::raw_ostream& Out, uint32_t V) { + Out << (unsigned char)(V); +} + +static void EmitBuf(llvm::raw_ostream& Out, const char* I, const char* E) { + for ( ; I != E ; ++I) Out << *I; +} + +static uint32_t ResolveID(IDMap& IM, uint32_t& idx, const IdentifierInfo* II) { + IDMap::iterator I = IM.find(II); + + if (I == IM.end()) { + IM[II] = idx; + return idx++; + } + + return I->second; +} + +static void EmitToken(llvm::raw_ostream& Out, const Token& T, + uint32_t& idcount, IDMap& IM) { + Emit8(Out, T.getKind()); + Emit8(Out, T.getFlags()); + Emit32(Out, ResolveID(IM, idcount, T.getIdentifierInfo())); + Emit32(Out, T.getLocation().getRawEncoding()); + Emit32(Out, T.getLength()); +} + + +static void EmitIdentifier(llvm::raw_ostream& Out, const IdentifierInfo& II) { + uint32_t X = (uint32_t) II.getTokenID() << 19; + X |= (uint32_t) II.getBuiltinID() << 9; + X |= (uint32_t) II.getObjCKeywordID() << 4; + if (II.hasMacroDefinition()) X |= 0x8; + if (II.isExtensionToken()) X |= 0x4; + if (II.isPoisoned()) X |= 0x2; + if (II.isCPlusPlusOperatorKeyword()) X |= 0x1; + + Emit32(Out, X); +} + +static void EmitIdentifierTable(llvm::raw_ostream& Out, + const IdentifierTable& T, const IDMap& IM) { + + for (IdentifierTable::const_iterator I=T.begin(), E=T.end(); I!=E; ++I) { + const IdentifierInfo& II = I->getValue(); + + // Write out the persistent identifier. + IDMap::const_iterator IItr = IM.find(&II); + if (IItr == IM.end()) continue; + Emit32(Out, IItr->second); + EmitIdentifier(Out, II); + + // Write out the keyword. + unsigned len = I->getKeyLength(); + Emit32(Out, len); + const char* buf = I->getKeyData(); + EmitBuf(Out, buf, buf+len); + } +} + + +void clang::CacheTokens(Preprocessor& PP, const std::string& OutFile) { + // Lex through the entire file. This will populate SourceManager with + // all of the header information. + Token Tok; + PP.EnterMainSourceFile(); + do { PP.Lex(Tok); } while (Tok.isNot(tok::eof)); + + // Iterate over all the files in SourceManager. Create a lexer + // for each file and cache the tokens. + SourceManager& SM = PP.getSourceManager(); + const LangOptions& LOpts = PP.getLangOptions(); + llvm::raw_ostream& os = llvm::errs(); + + PCHMap PM; + IDMap IM; + uint64_t tokIdx = 0; + uint32_t idcount = 0; + + std::string ErrMsg; + llvm::raw_fd_ostream Out(OutFile.c_str(), ErrMsg); + + if (!ErrMsg.empty()) { + os << "PCH error: " << ErrMsg << "\n"; + return; + } + + for (SourceManager::fileid_iterator I=SM.fileid_begin(), E=SM.fileid_end(); + I!=E; ++I) { + + const SrcMgr::ContentCache* C = I.getFileIDInfo().getContentCache(); + + if (!C) + continue; + + const FileEntry* FE = C->Entry; + + if (!FE) + continue; + + PCHMap::iterator PI = PM.find(FE); + if (PI != PM.end()) continue; + PM[FE] = tokIdx; + + // os << "Processing: " << FE->getName() << " : " << tokIdx << "\n"; + + const llvm::MemoryBuffer* B = C->Buffer; + + if (!B) + continue; + + // Create a raw lexer. + Lexer L(SourceLocation::getFileLoc(I.getFileID(), 0), LOpts, + B->getBufferStart(), B->getBufferEnd(), B); + + // Ignore whitespace. + L.SetKeepWhitespaceMode(false); + L.SetCommentRetentionState(false); + + // Lex the file, populating our data structures. + Token Tok; + L.LexFromRawLexer(Tok); + + while (Tok.isNot(tok::eof)) { + ++tokIdx; + + if (Tok.is(tok::identifier)) + Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); + + // Write the token to disk. + EmitToken(Out, Tok, idcount, IM); + + // Lex the next token. + L.LexFromRawLexer(Tok); + } + } + + // Now, write out the identifier table. + EmitIdentifierTable(Out, PP.getIdentifierTable(), IM); +} diff --git a/Driver/clang.cpp b/Driver/clang.cpp index 060fbf3f69..fcd4d5ab72 100644 --- a/Driver/clang.cpp +++ b/Driver/clang.cpp @@ -87,7 +87,8 @@ enum ProgActions { PrintPreprocessedInput, // -E mode. DumpTokens, // Dump out preprocessed tokens. DumpRawTokens, // Dump out raw tokens. - RunAnalysis // Run one or more source code analyses. + RunAnalysis, // Run one or more source code analyses. + GeneratePCH // Generate precompiled header. }; static llvm::cl::opt<ProgActions> @@ -203,10 +204,12 @@ enum LangKind { langkind_unspecified, langkind_c, langkind_c_cpp, + langkind_c_pch, langkind_cxx, langkind_cxx_cpp, langkind_objc, langkind_objc_cpp, + langkind_objc_pch, langkind_objcxx, langkind_objcxx_cpp }; @@ -226,11 +229,15 @@ BaseLang("x", llvm::cl::desc("Base language to compile"), clEnumValN(langkind_c_cpp, "c-cpp-output", "Preprocessed C"), clEnumValN(langkind_cxx_cpp, "c++-cpp-output", - "Preprocessed C++"), + "Preprocessed C++"), clEnumValN(langkind_objc_cpp, "objective-c-cpp-output", "Preprocessed Objective C"), clEnumValN(langkind_objcxx_cpp,"objective-c++-cpp-output", "Preprocessed Objective C++"), + clEnumValN(langkind_c_pch,"c-header", + "Precompiled C header"), + clEnumValN(langkind_objc_pch, "objective-c-header", + "Precompiled Objective C header"), clEnumValEnd)); static llvm::cl::opt<bool> @@ -286,16 +293,31 @@ static LangKind GetLanguage(const std::string &Filename) { } -static void InitializeLangOptions(LangOptions &Options, LangKind LK) { +static void InitializeCOptions(LangOptions &Options) { + // Do nothing. +} + +static void InitializeObjCOptions(LangOptions &Options) { + Options.ObjC1 = Options.ObjC2 = 1; +} + + +static bool InitializeLangOptions(LangOptions &Options, LangKind LK){ // FIXME: implement -fpreprocessed mode. bool NoPreprocess = false; + bool PCH = false; switch (LK) { default: assert(0 && "Unknown language kind!"); + case langkind_c_pch: + InitializeCOptions(Options); + PCH = true; + break; case langkind_c_cpp: NoPreprocess = true; // FALLTHROUGH case langkind_c: + InitializeCOptions(Options); break; case langkind_cxx_cpp: NoPreprocess = true; @@ -303,11 +325,15 @@ static void InitializeLangOptions(LangOptions &Options, LangKind LK) { case langkind_cxx: Options.CPlusPlus = 1; break; + case langkind_objc_pch: + InitializeObjCOptions(Options); + PCH = true; + break; case langkind_objc_cpp: NoPreprocess = true; // FALLTHROUGH case langkind_objc: - Options.ObjC1 = Options.ObjC2 = 1; + InitializeObjCOptions(Options); break; case langkind_objcxx_cpp: NoPreprocess = true; @@ -317,6 +343,8 @@ static void InitializeLangOptions(LangOptions &Options, LangKind LK) { Options.CPlusPlus = 1; break; } + + return PCH; } /// LangStds - Language standards we support. @@ -397,11 +425,13 @@ Exceptions("fexceptions", static llvm::cl::opt<bool> GNURuntime("fgnu-runtime", - llvm::cl::desc("Generate output compatible with the standard GNU Objective-C runtime.")); + llvm::cl::desc("Generate output compatible with the standard GNU " + "Objective-C runtime.")); static llvm::cl::opt<bool> NeXTRuntime("fnext-runtime", - llvm::cl::desc("Generate output compatible with the NeXT runtime.")); + llvm::cl::desc("Generate output compatible with the NeXT " + "runtime.")); @@ -426,8 +456,10 @@ static void InitializeLanguageStandard(LangOptions &Options, LangKind LK, default: assert(0 && "Unknown base language"); case langkind_c: case langkind_c_cpp: + case langkind_c_pch: case langkind_objc: case langkind_objc_cpp: + case langkind_objc_pch: LangStd = lang_gnu99; break; case langkind_cxx: @@ -1095,12 +1127,12 @@ static ASTConsumer* CreateASTConsumer(const std::string& InFile, /// ProcessInputFile - Process a single input file with the specified state. /// static void ProcessInputFile(Preprocessor &PP, PreprocessorFactory &PPF, - const std::string &InFile) { + const std::string &InFile, ProgActions PA) { llvm::OwningPtr<ASTConsumer> Consumer; bool ClearSourceMgr = false; - switch (ProgAction) { + switch (PA) { default: Consumer.reset(CreateASTConsumer(InFile, PP.getDiagnostics(), PP.getFileManager(), PP.getLangOptions(), @@ -1156,6 +1188,12 @@ static void ProcessInputFile(Preprocessor &PP, PreprocessorFactory &PPF, ClearSourceMgr = true; break; } + + case GeneratePCH: { + CacheTokens(PP, OutputFile); + ClearSourceMgr = true; + break; + } case PrintPreprocessedInput: // -E mode. DoPrintPreprocessedInput(PP, OutputFile); @@ -1352,10 +1390,10 @@ int main(int argc, char **argv) { LangOptions LangInfo; InitializeBaseLanguage(); LangKind LK = GetLanguage(InFile); - InitializeLangOptions(LangInfo, LK); + bool PCH = InitializeLangOptions(LangInfo, LK); InitializeLanguageStandard(LangInfo, LK, Target.get()); InitializeGCMode(LangInfo); - + // Process the -I options and set them in the HeaderInfo. HeaderSearch HeaderInfo(FileMgr); @@ -1383,7 +1421,8 @@ int main(int argc, char **argv) { Diags.setClient(TextDiagClient); // Process the source file. - ProcessInputFile(*PP, PPFactory, InFile); + ProcessInputFile(*PP, PPFactory, InFile, PCH ? GeneratePCH : ProgAction); + HeaderInfo.ClearFileInfo(); if (Stats) diff --git a/Driver/clang.h b/Driver/clang.h index 829467546d..a74a02d408 100644 --- a/Driver/clang.h +++ b/Driver/clang.h @@ -50,6 +50,8 @@ bool CheckASTConsumer(Preprocessor &PP, ASTConsumer* C); /// CheckDiagnostics - Gather the expected diagnostics and check them. bool CheckDiagnostics(Preprocessor &PP); +/// CacheTokens - Cache tokens for use with PCH. +void CacheTokens(Preprocessor& PP, const std::string& OutFile); } // end namespace clang |